lib/Analysis/ScalarEvolution.cpp

   1 //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file contains the implementation of the scalar evolution analysis
  11 // engine, which is used primarily to analyze expressions involving induction
  12 // variables in loops.
  13 //
  14 // There are several aspects to this library.  First is the representation of
  15 // scalar expressions, which are represented as subclasses of the SCEV class.
  16 // These classes are used to represent certain types of subexpressions that we
  17 // can handle. We only create one SCEV of a particular shape, so
  18 // pointer-comparisons for equality are legal.
  19 //
  20 // One important aspect of the SCEV objects is that they are never cyclic, even
  21 // if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
  22 // the PHI node is one of the idioms that we can represent (e.g., a polynomial
  23 // recurrence) then we represent it directly as a recurrence node, otherwise we
  24 // represent it as a SCEVUnknown node.
  25 //
  26 // In addition to being able to represent expressions of various types, we also
  27 // have folders that are used to build the *canonical* representation for a
  28 // particular expression.  These folders are capable of using a variety of
  29 // rewrite rules to simplify the expressions.
  30 //
  31 // Once the folders are defined, we can implement the more interesting
  32 // higher-level code, such as the code that recognizes PHI nodes of various
  33 // types, computes the execution count of a loop, etc.
  34 //
  35 // TODO: We should use these routines and value representations to implement
  36 // dependence analysis!
  37 //
  38 //===----------------------------------------------------------------------===//
  39 //
  40 // There are several good references for the techniques used in this analysis.
  41 //
  42 //  Chains of recurrences -- a method to expedite the evaluation
  43 //  of closed-form functions
  44 //  Olaf Bachmann, Paul S. Wang, Eugene V. Zima
  45 //
  46 //  On computational properties of chains of recurrences
  47 //  Eugene V. Zima
  48 //
  49 //  Symbolic Evaluation of Chains of Recurrences for Loop Optimization
  50 //  Robert A. van Engelen
  51 //
  52 //  Efficient Symbolic Analysis for Optimizing Compilers
  53 //  Robert A. van Engelen
  54 //
  55 //  Using the chains of recurrences algebra for data dependence testing and
  56 //  induction variable substitution
  57 //  MS Thesis, Johnie Birch
  58 //
  59 //===----------------------------------------------------------------------===//
  60
  61 #include "llvm/Analysis/ScalarEvolution.h"
  62 #include "llvm/ADT/Optional.h"
  63 #include "llvm/ADT/STLExtras.h"
  64 #include "llvm/ADT/SmallPtrSet.h"
  65 #include "llvm/ADT/Statistic.h"
  66 #include "llvm/Analysis/AssumptionCache.h"
  67 #include "llvm/Analysis/ConstantFolding.h"
  68 #include "llvm/Analysis/InstructionSimplify.h"
  69 #include "llvm/Analysis/LoopInfo.h"
  70 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  71 #include "llvm/Analysis/TargetLibraryInfo.h"
  72 #include "llvm/Analysis/ValueTracking.h"
  73 #include "llvm/IR/ConstantRange.h"
  74 #include "llvm/IR/Constants.h"
  75 #include "llvm/IR/DataLayout.h"
  76 #include "llvm/IR/DerivedTypes.h"
  77 #include "llvm/IR/Dominators.h"
  78 #include "llvm/IR/GetElementPtrTypeIterator.h"
  79 #include "llvm/IR/GlobalAlias.h"
  80 #include "llvm/IR/GlobalVariable.h"
  81 #include "llvm/IR/InstIterator.h"
  82 #include "llvm/IR/Instructions.h"
  83 #include "llvm/IR/LLVMContext.h"
  84 #include "llvm/IR/Metadata.h"
  85 #include "llvm/IR/Operator.h"
  86 #include "llvm/Support/CommandLine.h"
  87 #include "llvm/Support/Debug.h"
  88 #include "llvm/Support/ErrorHandling.h"
  89 #include "llvm/Support/MathExtras.h"
  90 #include "llvm/Support/raw_ostream.h"
  91 #include <algorithm>
  92 using namespace llvm;
  93
  94 #define DEBUG_TYPE "scalar-evolution"
  95
  96 STATISTIC(NumArrayLenItCounts,
  97           "Number of trip counts computed with array length");
  98 STATISTIC(NumTripCountsComputed,
  99           "Number of loops with predictable loop counts");
 100 STATISTIC(NumTripCountsNotComputed,
 101           "Number of loops without predictable loop counts");
 102 STATISTIC(NumBruteForceTripCountsComputed,
 103           "Number of loops with trip counts computed by force");
 104
 105 static cl::opt<unsigned>
 106 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
 107                         cl::desc("Maximum number of iterations SCEV will "
 108                                  "symbolically execute a constant "
 109                                  "derived loop"),
 110                         cl::init(100));
 111
 112 // FIXME: Enable this with XDEBUG when the test suite is clean.
 113 static cl::opt<bool>
 114 VerifySCEV("verify-scev",
 115            cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
 116
 117 //===----------------------------------------------------------------------===//
 118 //                           SCEV class definitions
 119 //===----------------------------------------------------------------------===//
 120
 121 //===----------------------------------------------------------------------===//
 122 // Implementation of the SCEV class.
 123 //
 124
 125 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 126 void SCEV::dump() const {
 127   print(dbgs());
 128   dbgs() << '\n';
 129 }
 130 #endif
 131
 132 void SCEV::print(raw_ostream &OS) const {
 133   switch (static_cast<SCEVTypes>(getSCEVType())) {
 134   case scConstant:
 135     cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
 136     return;
 137   case scTruncate: {
 138     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
 139     const SCEV *Op = Trunc->getOperand();
 140     OS << "(trunc " << *Op->getType() << " " << *Op << " to "
 141        << *Trunc->getType() << ")";
 142     return;
 143   }
 144   case scZeroExtend: {
 145     const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
 146     const SCEV *Op = ZExt->getOperand();
 147     OS << "(zext " << *Op->getType() << " " << *Op << " to "
 148        << *ZExt->getType() << ")";
 149     return;
 150   }
 151   case scSignExtend: {
 152     const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
 153     const SCEV *Op = SExt->getOperand();
 154     OS << "(sext " << *Op->getType() << " " << *Op << " to "
 155        << *SExt->getType() << ")";
 156     return;
 157   }
 158   case scAddRecExpr: {
 159     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
 160     OS << "{" << *AR->getOperand(0);
 161     for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
 162       OS << ",+," << *AR->getOperand(i);
 163     OS << "}<";
 164     if (AR->getNoWrapFlags(FlagNUW))
 165       OS << "nuw><";
 166     if (AR->getNoWrapFlags(FlagNSW))
 167       OS << "nsw><";
 168     if (AR->getNoWrapFlags(FlagNW) &&
 169         !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
 170       OS << "nw><";
 171     AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
 172     OS << ">";
 173     return;
 174   }
 175   case scAddExpr:
 176   case scMulExpr:
 177   case scUMaxExpr:
 178   case scSMaxExpr: {
 179     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
 180     const char *OpStr = nullptr;
 181     switch (NAry->getSCEVType()) {
 182     case scAddExpr: OpStr = " + "; break;
 183     case scMulExpr: OpStr = " * "; break;
 184     case scUMaxExpr: OpStr = " umax "; break;
 185     case scSMaxExpr: OpStr = " smax "; break;
 186     }
 187     OS << "(";
 188     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
 189          I != E; ++I) {
 190       OS << **I;
 191       if (std::next(I) != E)
 192         OS << OpStr;
 193     }
 194     OS << ")";
 195     switch (NAry->getSCEVType()) {
 196     case scAddExpr:
 197     case scMulExpr:
 198       if (NAry->getNoWrapFlags(FlagNUW))
 199         OS << "<nuw>";
 200       if (NAry->getNoWrapFlags(FlagNSW))
 201         OS << "<nsw>";
 202     }
 203     return;
 204   }
 205   case scUDivExpr: {
 206     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
 207     OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
 208     return;
 209   }
 210   case scUnknown: {
 211     const SCEVUnknown *U = cast<SCEVUnknown>(this);
 212     Type *AllocTy;
 213     if (U->isSizeOf(AllocTy)) {
 214       OS << "sizeof(" << *AllocTy << ")";
 215       return;
 216     }
 217     if (U->isAlignOf(AllocTy)) {
 218       OS << "alignof(" << *AllocTy << ")";
 219       return;
 220     }
 221
 222     Type *CTy;
 223     Constant *FieldNo;
 224     if (U->isOffsetOf(CTy, FieldNo)) {
 225       OS << "offsetof(" << *CTy << ", ";
 226       FieldNo->printAsOperand(OS, false);
 227       OS << ")";
 228       return;
 229     }
 230
 231     // Otherwise just print it normally.
 232     U->getValue()->printAsOperand(OS, false);
 233     return;
 234   }
 235   case scCouldNotCompute:
 236     OS << "***COULDNOTCOMPUTE***";
 237     return;
 238   }
 239   llvm_unreachable("Unknown SCEV kind!");
 240 }
 241
 242 Type *SCEV::getType() const {
 243   switch (static_cast<SCEVTypes>(getSCEVType())) {
 244   case scConstant:
 245     return cast<SCEVConstant>(this)->getType();
 246   case scTruncate:
 247   case scZeroExtend:
 248   case scSignExtend:
 249     return cast<SCEVCastExpr>(this)->getType();
 250   case scAddRecExpr:
 251   case scMulExpr:
 252   case scUMaxExpr:
 253   case scSMaxExpr:
 254     return cast<SCEVNAryExpr>(this)->getType();
 255   case scAddExpr:
 256     return cast<SCEVAddExpr>(this)->getType();
 257   case scUDivExpr:
 258     return cast<SCEVUDivExpr>(this)->getType();
 259   case scUnknown:
 260     return cast<SCEVUnknown>(this)->getType();
 261   case scCouldNotCompute:
 262     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
 263   }
 264   llvm_unreachable("Unknown SCEV kind!");
 265 }
 266
 267 bool SCEV::isZero() const {
 268   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
 269     return SC->getValue()->isZero();
 270   return false;
 271 }
 272
 273 bool SCEV::isOne() const {
 274   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
 275     return SC->getValue()->isOne();
 276   return false;
 277 }
 278
 279 bool SCEV::isAllOnesValue() const {
 280   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
 281     return SC->getValue()->isAllOnesValue();
 282   return false;
 283 }
 284
 285 /// isNonConstantNegative - Return true if the specified scev is negated, but
 286 /// not a constant.
 287 bool SCEV::isNonConstantNegative() const {
 288   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
 289   if (!Mul) return false;
 290
 291   // If there is a constant factor, it will be first.
 292   const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
 293   if (!SC) return false;
 294
 295   // Return true if the value is negative, this matches things like (-42 * V).
 296   return SC->getValue()->getValue().isNegative();
 297 }
 298
 299 SCEVCouldNotCompute::SCEVCouldNotCompute() :
 300   SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
 301
 302 bool SCEVCouldNotCompute::classof(const SCEV *S) {
 303   return S->getSCEVType() == scCouldNotCompute;
 304 }
 305
 306 const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
 307   FoldingSetNodeID ID;
 308   ID.AddInteger(scConstant);
 309   ID.AddPointer(V);
 310   void *IP = nullptr;
 311   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
 312   SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
 313   UniqueSCEVs.InsertNode(S, IP);
 314   return S;
 315 }
 316
 317 const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
 318   return getConstant(ConstantInt::get(getContext(), Val));
 319 }
 320
 321 const SCEV *
 322 ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
 323   IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
 324   return getConstant(ConstantInt::get(ITy, V, isSigned));
 325 }
 326
 327 SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
 328                            unsigned SCEVTy, const SCEV *op, Type *ty)
 329   : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
 330
 331 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
 332                                    const SCEV *op, Type *ty)
 333   : SCEVCastExpr(ID, scTruncate, op, ty) {
 334   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
 335          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
 336          "Cannot truncate non-integer value!");
 337 }
 338
 339 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
 340                                        const SCEV *op, Type *ty)
 341   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
 342   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
 343          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
 344          "Cannot zero extend non-integer value!");
 345 }
 346
 347 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
 348                                        const SCEV *op, Type *ty)
 349   : SCEVCastExpr(ID, scSignExtend, op, ty) {
 350   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
 351          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
 352          "Cannot sign extend non-integer value!");
 353 }
 354
 355 void SCEVUnknown::deleted() {
 356   // Clear this SCEVUnknown from various maps.
 357   SE->forgetMemoizedResults(this);
 358
 359   // Remove this SCEVUnknown from the uniquing map.
 360   SE->UniqueSCEVs.RemoveNode(this);
 361
 362   // Release the value.
 363   setValPtr(nullptr);
 364 }
 365
 366 void SCEVUnknown::allUsesReplacedWith(Value *New) {
 367   // Clear this SCEVUnknown from various maps.
 368   SE->forgetMemoizedResults(this);
 369
 370   // Remove this SCEVUnknown from the uniquing map.
 371   SE->UniqueSCEVs.RemoveNode(this);
 372
 373   // Update this SCEVUnknown to point to the new value. This is needed
 374   // because there may still be outstanding SCEVs which still point to
 375   // this SCEVUnknown.
 376   setValPtr(New);
 377 }
 378
 379 bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
 380   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
 381     if (VCE->getOpcode() == Instruction::PtrToInt)
 382       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
 383         if (CE->getOpcode() == Instruction::GetElementPtr &&
 384             CE->getOperand(0)->isNullValue() &&
 385             CE->getNumOperands() == 2)
 386           if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
 387             if (CI->isOne()) {
 388               AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
 389                                  ->getElementType();
 390               return true;
 391             }
 392
 393   return false;
 394 }
 395
 396 bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
 397   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
 398     if (VCE->getOpcode() == Instruction::PtrToInt)
 399       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
 400         if (CE->getOpcode() == Instruction::GetElementPtr &&
 401             CE->getOperand(0)->isNullValue()) {
 402           Type *Ty =
 403             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
 404           if (StructType *STy = dyn_cast<StructType>(Ty))
 405             if (!STy->isPacked() &&
 406                 CE->getNumOperands() == 3 &&
 407                 CE->getOperand(1)->isNullValue()) {
 408               if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
 409                 if (CI->isOne() &&
 410                     STy->getNumElements() == 2 &&
 411                     STy->getElementType(0)->isIntegerTy(1)) {
 412                   AllocTy = STy->getElementType(1);
 413                   return true;
 414                 }
 415             }
 416         }
 417
 418   return false;
 419 }
 420
 421 bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
 422   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
 423     if (VCE->getOpcode() == Instruction::PtrToInt)
 424       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
 425         if (CE->getOpcode() == Instruction::GetElementPtr &&
 426             CE->getNumOperands() == 3 &&
 427             CE->getOperand(0)->isNullValue() &&
 428             CE->getOperand(1)->isNullValue()) {
 429           Type *Ty =
 430             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
 431           // Ignore vector types here so that ScalarEvolutionExpander doesn't
 432           // emit getelementptrs that index into vectors.
 433           if (Ty->isStructTy() || Ty->isArrayTy()) {
 434             CTy = Ty;
 435             FieldNo = CE->getOperand(2);
 436             return true;
 437           }
 438         }
 439
 440   return false;
 441 }
 442
 443 //===----------------------------------------------------------------------===//
 444 //                               SCEV Utilities
 445 //===----------------------------------------------------------------------===//
 446
 447 namespace {
 448   /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
 449   /// than the complexity of the RHS.  This comparator is used to canonicalize
 450   /// expressions.
 451   class SCEVComplexityCompare {
 452     const LoopInfo *const LI;
 453   public:
 454     explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
 455
 456     // Return true or false if LHS is less than, or at least RHS, respectively.
 457     bool operator()(const SCEV *LHS, const SCEV *RHS) const {
 458       return compare(LHS, RHS) < 0;
 459     }
 460
 461     // Return negative, zero, or positive, if LHS is less than, equal to, or
 462     // greater than RHS, respectively. A three-way result allows recursive
 463     // comparisons to be more efficient.
 464     int compare(const SCEV *LHS, const SCEV *RHS) const {
 465       // Fast-path: SCEVs are uniqued so we can do a quick equality check.
 466       if (LHS == RHS)
 467         return 0;
 468
 469       // Primarily, sort the SCEVs by their getSCEVType().
 470       unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
 471       if (LType != RType)
 472         return (int)LType - (int)RType;
 473
 474       // Aside from the getSCEVType() ordering, the particular ordering
 475       // isn't very important except that it's beneficial to be consistent,
 476       // so that (a + b) and (b + a) don't end up as different expressions.
 477       switch (static_cast<SCEVTypes>(LType)) {
 478       case scUnknown: {
 479         const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
 480         const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
 481
 482         // Sort SCEVUnknown values with some loose heuristics. TODO: This is
 483         // not as complete as it could be.
 484         const Value *LV = LU->getValue(), *RV = RU->getValue();
 485
 486         // Order pointer values after integer values. This helps SCEVExpander
 487         // form GEPs.
 488         bool LIsPointer = LV->getType()->isPointerTy(),
 489              RIsPointer = RV->getType()->isPointerTy();
 490         if (LIsPointer != RIsPointer)
 491           return (int)LIsPointer - (int)RIsPointer;
 492
 493         // Compare getValueID values.
 494         unsigned LID = LV->getValueID(),
 495                  RID = RV->getValueID();
 496         if (LID != RID)
 497           return (int)LID - (int)RID;
 498
 499         // Sort arguments by their position.
 500         if (const Argument *LA = dyn_cast<Argument>(LV)) {
 501           const Argument *RA = cast<Argument>(RV);
 502           unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
 503           return (int)LArgNo - (int)RArgNo;
 504         }
 505
 506         // For instructions, compare their loop depth, and their operand
 507         // count.  This is pretty loose.
 508         if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
 509           const Instruction *RInst = cast<Instruction>(RV);
 510
 511           // Compare loop depths.
 512           const BasicBlock *LParent = LInst->getParent(),
 513                            *RParent = RInst->getParent();
 514           if (LParent != RParent) {
 515             unsigned LDepth = LI->getLoopDepth(LParent),
 516                      RDepth = LI->getLoopDepth(RParent);
 517             if (LDepth != RDepth)
 518               return (int)LDepth - (int)RDepth;
 519           }
 520
 521           // Compare the number of operands.
 522           unsigned LNumOps = LInst->getNumOperands(),
 523                    RNumOps = RInst->getNumOperands();
 524           return (int)LNumOps - (int)RNumOps;
 525         }
 526
 527         return 0;
 528       }
 529
 530       case scConstant: {
 531         const SCEVConstant *LC = cast<SCEVConstant>(LHS);
 532         const SCEVConstant *RC = cast<SCEVConstant>(RHS);
 533
 534         // Compare constant values.
 535         const APInt &LA = LC->getValue()->getValue();
 536         const APInt &RA = RC->getValue()->getValue();
 537         unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
 538         if (LBitWidth != RBitWidth)
 539           return (int)LBitWidth - (int)RBitWidth;
 540         return LA.ult(RA) ? -1 : 1;
 541       }
 542
 543       case scAddRecExpr: {
 544         const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
 545         const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
 546
 547         // Compare addrec loop depths.
 548         const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
 549         if (LLoop != RLoop) {
 550           unsigned LDepth = LLoop->getLoopDepth(),
 551                    RDepth = RLoop->getLoopDepth();
 552           if (LDepth != RDepth)
 553             return (int)LDepth - (int)RDepth;
 554         }
 555
 556         // Addrec complexity grows with operand count.
 557         unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
 558         if (LNumOps != RNumOps)
 559           return (int)LNumOps - (int)RNumOps;
 560
 561         // Lexicographically compare.
 562         for (unsigned i = 0; i != LNumOps; ++i) {
 563           long X = compare(LA->getOperand(i), RA->getOperand(i));
 564           if (X != 0)
 565             return X;
 566         }
 567
 568         return 0;
 569       }
 570
 571       case scAddExpr:
 572       case scMulExpr:
 573       case scSMaxExpr:
 574       case scUMaxExpr: {
 575         const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
 576         const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
 577
 578         // Lexicographically compare n-ary expressions.
 579         unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
 580         if (LNumOps != RNumOps)
 581           return (int)LNumOps - (int)RNumOps;
 582
 583         for (unsigned i = 0; i != LNumOps; ++i) {
 584           if (i >= RNumOps)
 585             return 1;
 586           long X = compare(LC->getOperand(i), RC->getOperand(i));
 587           if (X != 0)
 588             return X;
 589         }
 590         return (int)LNumOps - (int)RNumOps;
 591       }
 592
 593       case scUDivExpr: {
 594         const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
 595         const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
 596
 597         // Lexicographically compare udiv expressions.
 598         long X = compare(LC->getLHS(), RC->getLHS());
 599         if (X != 0)
 600           return X;
 601         return compare(LC->getRHS(), RC->getRHS());
 602       }
 603
 604       case scTruncate:
 605       case scZeroExtend:
 606       case scSignExtend: {
 607         const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
 608         const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
 609
 610         // Compare cast expressions by operand.
 611         return compare(LC->getOperand(), RC->getOperand());
 612       }
 613
 614       case scCouldNotCompute:
 615         llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
 616       }
 617       llvm_unreachable("Unknown SCEV kind!");
 618     }
 619   };
 620 }
 621
 622 /// GroupByComplexity - Given a list of SCEV objects, order them by their
 623 /// complexity, and group objects of the same complexity together by value.
 624 /// When this routine is finished, we know that any duplicates in the vector are
 625 /// consecutive and that complexity is monotonically increasing.
 626 ///
 627 /// Note that we go take special precautions to ensure that we get deterministic
 628 /// results from this routine.  In other words, we don't want the results of
 629 /// this to depend on where the addresses of various SCEV objects happened to
 630 /// land in memory.
 631 ///
 632 static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
 633                               LoopInfo *LI) {
 634   if (Ops.size() < 2) return;  // Noop
 635   if (Ops.size() == 2) {
 636     // This is the common case, which also happens to be trivially simple.
 637     // Special case it.
 638     const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
 639     if (SCEVComplexityCompare(LI)(RHS, LHS))
 640       std::swap(LHS, RHS);
 641     return;
 642   }
 643
 644   // Do the rough sort by complexity.
 645   std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
 646
 647   // Now that we are sorted by complexity, group elements of the same
 648   // complexity.  Note that this is, at worst, N^2, but the vector is likely to
 649   // be extremely short in practice.  Note that we take this approach because we
 650   // do not want to depend on the addresses of the objects we are grouping.
 651   for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
 652     const SCEV *S = Ops[i];
 653     unsigned Complexity = S->getSCEVType();
 654
 655     // If there are any objects of the same complexity and same value as this
 656     // one, group them.
 657     for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
 658       if (Ops[j] == S) { // Found a duplicate.
 659         // Move it to immediately after i'th element.
 660         std::swap(Ops[i+1], Ops[j]);
 661         ++i;   // no need to rescan it.
 662         if (i == e-2) return;  // Done!
 663       }
 664     }
 665   }
 666 }
 667
 668 namespace {
 669 struct FindSCEVSize {
 670   int Size;
 671   FindSCEVSize() : Size(0) {}
 672
 673   bool follow(const SCEV *S) {
 674     ++Size;
 675     // Keep looking at all operands of S.
 676     return true;
 677   }
 678   bool isDone() const {
 679     return false;
 680   }
 681 };
 682 }
 683
 684 // Returns the size of the SCEV S.
 685 static inline int sizeOfSCEV(const SCEV *S) {
 686   FindSCEVSize F;
 687   SCEVTraversal<FindSCEVSize> ST(F);
 688   ST.visitAll(S);
 689   return F.Size;
 690 }
 691
 692 namespace {
 693
 694 struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
 695 public:
 696   // Computes the Quotient and Remainder of the division of Numerator by
 697   // Denominator.
 698   static void divide(ScalarEvolution &SE, const SCEV *Numerator,
 699                      const SCEV *Denominator, const SCEV **Quotient,
 700                      const SCEV **Remainder) {
 701     assert(Numerator && Denominator && "Uninitialized SCEV");
 702
 703     SCEVDivision D(SE, Numerator, Denominator);
 704
 705     // Check for the trivial case here to avoid having to check for it in the
 706     // rest of the code.
 707     if (Numerator == Denominator) {
 708       *Quotient = D.One;
 709       *Remainder = D.Zero;
 710       return;
 711     }
 712
 713     if (Numerator->isZero()) {
 714       *Quotient = D.Zero;
 715       *Remainder = D.Zero;
 716       return;
 717     }
 718
 719     // A simple case when N/1. The quotient is N.
 720     if (Denominator->isOne()) {
 721       *Quotient = Numerator;
 722       *Remainder = D.Zero;
 723       return;
 724     }
 725
 726     // Split the Denominator when it is a product.
 727     if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
 728       const SCEV *Q, *R;
 729       *Quotient = Numerator;
 730       for (const SCEV *Op : T->operands()) {
 731         divide(SE, *Quotient, Op, &Q, &R);
 732         *Quotient = Q;
 733
 734         // Bail out when the Numerator is not divisible by one of the terms of
 735         // the Denominator.
 736         if (!R->isZero()) {
 737           *Quotient = D.Zero;
 738           *Remainder = Numerator;
 739           return;
 740         }
 741       }
 742       *Remainder = D.Zero;
 743       return;
 744     }
 745
 746     D.visit(Numerator);
 747     *Quotient = D.Quotient;
 748     *Remainder = D.Remainder;
 749   }
 750
 751   // Except in the trivial case described above, we do not know how to divide
 752   // Expr by Denominator for the following functions with empty implementation.
 753   void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
 754   void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
 755   void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
 756   void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
 757   void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
 758   void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
 759   void visitUnknown(const SCEVUnknown *Numerator) {}
 760   void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
 761
 762   void visitConstant(const SCEVConstant *Numerator) {
 763     if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
 764       APInt NumeratorVal = Numerator->getValue()->getValue();
 765       APInt DenominatorVal = D->getValue()->getValue();
 766       uint32_t NumeratorBW = NumeratorVal.getBitWidth();
 767       uint32_t DenominatorBW = DenominatorVal.getBitWidth();
 768
 769       if (NumeratorBW > DenominatorBW)
 770         DenominatorVal = DenominatorVal.sext(NumeratorBW);
 771       else if (NumeratorBW < DenominatorBW)
 772         NumeratorVal = NumeratorVal.sext(DenominatorBW);
 773
 774       APInt QuotientVal(NumeratorVal.getBitWidth(), 0);
 775       APInt RemainderVal(NumeratorVal.getBitWidth(), 0);
 776       APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal);
 777       Quotient = SE.getConstant(QuotientVal);
 778       Remainder = SE.getConstant(RemainderVal);
 779       return;
 780     }
 781   }
 782
 783   void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
 784     const SCEV *StartQ, *StartR, *StepQ, *StepR;
 785     assert(Numerator->isAffine() && "Numerator should be affine");
 786     divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
 787     divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
 788     // Bail out if the types do not match.
 789     Type *Ty = Denominator->getType();
 790     if (Ty != StartQ->getType() || Ty != StartR->getType() ||
 791         Ty != StepQ->getType() || Ty != StepR->getType()) {
 792       Quotient = Zero;
 793       Remainder = Numerator;
 794       return;
 795     }
 796     Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
 797                                 Numerator->getNoWrapFlags());
 798     Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
 799                                  Numerator->getNoWrapFlags());
 800   }
 801
 802   void visitAddExpr(const SCEVAddExpr *Numerator) {
 803     SmallVector<const SCEV *, 2> Qs, Rs;
 804     Type *Ty = Denominator->getType();
 805
 806     for (const SCEV *Op : Numerator->operands()) {
 807       const SCEV *Q, *R;
 808       divide(SE, Op, Denominator, &Q, &R);
 809
 810       // Bail out if types do not match.
 811       if (Ty != Q->getType() || Ty != R->getType()) {
 812         Quotient = Zero;
 813         Remainder = Numerator;
 814         return;
 815       }
 816
 817       Qs.push_back(Q);
 818       Rs.push_back(R);
 819     }
 820
 821     if (Qs.size() == 1) {
 822       Quotient = Qs[0];
 823       Remainder = Rs[0];
 824       return;
 825     }
 826
 827     Quotient = SE.getAddExpr(Qs);
 828     Remainder = SE.getAddExpr(Rs);
 829   }
 830
 831   void visitMulExpr(const SCEVMulExpr *Numerator) {
 832     SmallVector<const SCEV *, 2> Qs;
 833     Type *Ty = Denominator->getType();
 834
 835     bool FoundDenominatorTerm = false;
 836     for (const SCEV *Op : Numerator->operands()) {
 837       // Bail out if types do not match.
 838       if (Ty != Op->getType()) {
 839         Quotient = Zero;
 840         Remainder = Numerator;
 841         return;
 842       }
 843
 844       if (FoundDenominatorTerm) {
 845         Qs.push_back(Op);
 846         continue;
 847       }
 848
 849       // Check whether Denominator divides one of the product operands.
 850       const SCEV *Q, *R;
 851       divide(SE, Op, Denominator, &Q, &R);
 852       if (!R->isZero()) {
 853         Qs.push_back(Op);
 854         continue;
 855       }
 856
 857       // Bail out if types do not match.
 858       if (Ty != Q->getType()) {
 859         Quotient = Zero;
 860         Remainder = Numerator;
 861         return;
 862       }
 863
 864       FoundDenominatorTerm = true;
 865       Qs.push_back(Q);
 866     }
 867
 868     if (FoundDenominatorTerm) {
 869       Remainder = Zero;
 870       if (Qs.size() == 1)
 871         Quotient = Qs[0];
 872       else
 873         Quotient = SE.getMulExpr(Qs);
 874       return;
 875     }
 876
 877     if (!isa<SCEVUnknown>(Denominator)) {
 878       Quotient = Zero;
 879       Remainder = Numerator;
 880       return;
 881     }
 882
 883     // The Remainder is obtained by replacing Denominator by 0 in Numerator.
 884     ValueToValueMap RewriteMap;
 885     RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
 886         cast<SCEVConstant>(Zero)->getValue();
 887     Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
 888
 889     if (Remainder->isZero()) {
 890       // The Quotient is obtained by replacing Denominator by 1 in Numerator.
 891       RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
 892           cast<SCEVConstant>(One)->getValue();
 893       Quotient =
 894           SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
 895       return;
 896     }
 897
 898     // Quotient is (Numerator - Remainder) divided by Denominator.
 899     const SCEV *Q, *R;
 900     const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
 901     if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
 902       // This SCEV does not seem to simplify: fail the division here.
 903       Quotient = Zero;
 904       Remainder = Numerator;
 905       return;
 906     }
 907     divide(SE, Diff, Denominator, &Q, &R);
 908     assert(R == Zero &&
 909            "(Numerator - Remainder) should evenly divide Denominator");
 910     Quotient = Q;
 911   }
 912
 913 private:
 914   SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
 915                const SCEV *Denominator)
 916       : SE(S), Denominator(Denominator) {
 917     Zero = SE.getConstant(Denominator->getType(), 0);
 918     One = SE.getConstant(Denominator->getType(), 1);
 919
 920     // By default, we don't know how to divide Expr by Denominator.
 921     // Providing the default here simplifies the rest of the code.
 922     Quotient = Zero;
 923     Remainder = Numerator;
 924   }
 925
 926   ScalarEvolution &SE;
 927   const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
 928 };
 929
 930 }
 931
 932 //===----------------------------------------------------------------------===//
 933 //                      Simple SCEV method implementations
 934 //===----------------------------------------------------------------------===//
 935
 936 /// BinomialCoefficient - Compute BC(It, K).  The result has width W.
 937 /// Assume, K > 0.
 938 static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
 939                                        ScalarEvolution &SE,
 940                                        Type *ResultTy) {
 941   // Handle the simplest case efficiently.
 942   if (K == 1)
 943     return SE.getTruncateOrZeroExtend(It, ResultTy);
 944
 945   // We are using the following formula for BC(It, K):
 946   //
 947   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
 948   //
 949   // Suppose, W is the bitwidth of the return value.  We must be prepared for
 950   // overflow.  Hence, we must assure that the result of our computation is
 951   // equal to the accurate one modulo 2^W.  Unfortunately, division isn't
 952   // safe in modular arithmetic.
 953   //
 954   // However, this code doesn't use exactly that formula; the formula it uses
 955   // is something like the following, where T is the number of factors of 2 in
 956   // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
 957   // exponentiation:
 958   //
 959   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
 960   //
 961   // This formula is trivially equivalent to the previous formula.  However,
 962   // this formula can be implemented much more efficiently.  The trick is that
 963   // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
 964   // arithmetic.  To do exact division in modular arithmetic, all we have
 965   // to do is multiply by the inverse.  Therefore, this step can be done at
 966   // width W.
 967   //
 968   // The next issue is how to safely do the division by 2^T.  The way this
 969   // is done is by doing the multiplication step at a width of at least W + T
 970   // bits.  This way, the bottom W+T bits of the product are accurate. Then,
 971   // when we perform the division by 2^T (which is equivalent to a right shift
 972   // by T), the bottom W bits are accurate.  Extra bits are okay; they'll get
 973   // truncated out after the division by 2^T.
 974   //
 975   // In comparison to just directly using the first formula, this technique
 976   // is much more efficient; using the first formula requires W * K bits,
 977   // but this formula less than W + K bits. Also, the first formula requires
 978   // a division step, whereas this formula only requires multiplies and shifts.
 979   //
 980   // It doesn't matter whether the subtraction step is done in the calculation
 981   // width or the input iteration count's width; if the subtraction overflows,
 982   // the result must be zero anyway.  We prefer here to do it in the width of
 983   // the induction variable because it helps a lot for certain cases; CodeGen
 984   // isn't smart enough to ignore the overflow, which leads to much less
 985   // efficient code if the width of the subtraction is wider than the native
 986   // register width.
 987   //
 988   // (It's possible to not widen at all by pulling out factors of 2 before
 989   // the multiplication; for example, K=2 can be calculated as
 990   // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
 991   // extra arithmetic, so it's not an obvious win, and it gets
 992   // much more complicated for K > 3.)
 993
 994   // Protection from insane SCEVs; this bound is conservative,
 995   // but it probably doesn't matter.
 996   if (K > 1000)
 997     return SE.getCouldNotCompute();
 998
 999   unsigned W = SE.getTypeSizeInBits(ResultTy);
1000
1001   // Calculate K! / 2^T and T; we divide out the factors of two before
1002   // multiplying for calculating K! / 2^T to avoid overflow.
1003   // Other overflow doesn't matter because we only care about the bottom
1004   // W bits of the result.
1005   APInt OddFactorial(W, 1);
1006   unsigned T = 1;
1007   for (unsigned i = 3; i <= K; ++i) {
1008     APInt Mult(W, i);
1009     unsigned TwoFactors = Mult.countTrailingZeros();
1010     T += TwoFactors;
1011     Mult = Mult.lshr(TwoFactors);
1012     OddFactorial *= Mult;
1013   }
1014
1015   // We need at least W + T bits for the multiplication step
1016   unsigned CalculationBits = W + T;
1017
1018   // Calculate 2^T, at width T+W.
1019   APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
1020
1021   // Calculate the multiplicative inverse of K! / 2^T;
1022   // this multiplication factor will perform the exact division by
1023   // K! / 2^T.
1024   APInt Mod = APInt::getSignedMinValue(W+1);
1025   APInt MultiplyFactor = OddFactorial.zext(W+1);
1026   MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
1027   MultiplyFactor = MultiplyFactor.trunc(W);
1028
1029   // Calculate the product, at width T+W
1030   IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
1031                                                       CalculationBits);
1032   const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
1033   for (unsigned i = 1; i != K; ++i) {
1034     const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
1035     Dividend = SE.getMulExpr(Dividend,
1036                              SE.getTruncateOrZeroExtend(S, CalculationTy));
1037   }
1038
1039   // Divide by 2^T
1040   const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
1041
1042   // Truncate the result, and divide by K! / 2^T.
1043
1044   return SE.getMulExpr(SE.getConstant(MultiplyFactor),
1045                        SE.getTruncateOrZeroExtend(DivResult, ResultTy));
1046 }
1047
1048 /// evaluateAtIteration - Return the value of this chain of recurrences at
1049 /// the specified iteration number.  We can evaluate this recurrence by
1050 /// multiplying each element in the chain by the binomial coefficient
1051 /// corresponding to it.  In other words, we can evaluate {A,+,B,+,C,+,D} as:
1052 ///
1053 ///   A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
1054 ///
1055 /// where BC(It, k) stands for binomial coefficient.
1056 ///
1057 const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
1058                                                 ScalarEvolution &SE) const {
1059   const SCEV *Result = getStart();
1060   for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
1061     // The computation is correct in the face of overflow provided that the
1062     // multiplication is performed _after_ the evaluation of the binomial
1063     // coefficient.
1064     const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
1065     if (isa<SCEVCouldNotCompute>(Coeff))
1066       return Coeff;
1067
1068     Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
1069   }
1070   return Result;
1071 }
1072
1073 //===----------------------------------------------------------------------===//
1074 //                    SCEV Expression folder implementations
1075 //===----------------------------------------------------------------------===//
1076
1077 const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
1078                                              Type *Ty) {
1079   assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
1080          "This is not a truncating conversion!");
1081   assert(isSCEVable(Ty) &&
1082          "This is not a conversion to a SCEVable type!");
1083   Ty = getEffectiveSCEVType(Ty);
1084
1085   FoldingSetNodeID ID;
1086   ID.AddInteger(scTruncate);
1087   ID.AddPointer(Op);
1088   ID.AddPointer(Ty);
1089   void *IP = nullptr;
1090   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
1091
1092   // Fold if the operand is constant.
1093   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1094     return getConstant(
1095       cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
1096
1097   // trunc(trunc(x)) --> trunc(x)
1098   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
1099     return getTruncateExpr(ST->getOperand(), Ty);
1100
1101   // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
1102   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
1103     return getTruncateOrSignExtend(SS->getOperand(), Ty);
1104
1105   // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
1106   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
1107     return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
1108
1109   // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
1110   // eliminate all the truncates, or we replace other casts with truncates.
1111   if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
1112     SmallVector<const SCEV *, 4> Operands;
1113     bool hasTrunc = false;
1114     for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
1115       const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
1116       if (!isa<SCEVCastExpr>(SA->getOperand(i)))
1117         hasTrunc = isa<SCEVTruncateExpr>(S);
1118       Operands.push_back(S);
1119     }
1120     if (!hasTrunc)
1121       return getAddExpr(Operands);
1122     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
1123   }
1124
1125   // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
1126   // eliminate all the truncates, or we replace other casts with truncates.
1127   if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
1128     SmallVector<const SCEV *, 4> Operands;
1129     bool hasTrunc = false;
1130     for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
1131       const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
1132       if (!isa<SCEVCastExpr>(SM->getOperand(i)))
1133         hasTrunc = isa<SCEVTruncateExpr>(S);
1134       Operands.push_back(S);
1135     }
1136     if (!hasTrunc)
1137       return getMulExpr(Operands);
1138     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
1139   }
1140
1141   // If the input value is a chrec scev, truncate the chrec's operands.
1142   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
1143     SmallVector<const SCEV *, 4> Operands;
1144     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
1145       Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
1146     return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
1147   }
1148
1149   // The cast wasn't folded; create an explicit cast node. We can reuse
1150   // the existing insert position since if we get here, we won't have
1151   // made any changes which would invalidate it.
1152   SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
1153                                                  Op, Ty);
1154   UniqueSCEVs.InsertNode(S, IP);
1155   return S;
1156 }
1157
1158 // Get the limit of a recurrence such that incrementing by Step cannot cause
1159 // signed overflow as long as the value of the recurrence within the
1160 // loop does not exceed this limit before incrementing.
1161 static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
1162                                                  ICmpInst::Predicate *Pred,
1163                                                  ScalarEvolution *SE) {
1164   unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
1165   if (SE->isKnownPositive(Step)) {
1166     *Pred = ICmpInst::ICMP_SLT;
1167     return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
1168                            SE->getSignedRange(Step).getSignedMax());
1169   }
1170   if (SE->isKnownNegative(Step)) {
1171     *Pred = ICmpInst::ICMP_SGT;
1172     return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
1173                            SE->getSignedRange(Step).getSignedMin());
1174   }
1175   return nullptr;
1176 }
1177
1178 // Get the limit of a recurrence such that incrementing by Step cannot cause
1179 // unsigned overflow as long as the value of the recurrence within the loop does
1180 // not exceed this limit before incrementing.
1181 static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
1182                                                    ICmpInst::Predicate *Pred,
1183                                                    ScalarEvolution *SE) {
1184   unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
1185   *Pred = ICmpInst::ICMP_ULT;
1186
1187   return SE->getConstant(APInt::getMinValue(BitWidth) -
1188                          SE->getUnsignedRange(Step).getUnsignedMax());
1189 }
1190
1191 namespace {
1192
1193 struct ExtendOpTraitsBase {
1194   typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
1195 };
1196
1197 // Used to make code generic over signed and unsigned overflow.
1198 template <typename ExtendOp> struct ExtendOpTraits {
1199   // Members present:
1200   //
1201   // static const SCEV::NoWrapFlags WrapType;
1202   //
1203   // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
1204   //
1205   // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
1206   //                                           ICmpInst::Predicate *Pred,
1207   //                                           ScalarEvolution *SE);
1208 };
1209
1210 template <>
1211 struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
1212   static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
1213
1214   static const GetExtendExprTy GetExtendExpr;
1215
1216   static const SCEV *getOverflowLimitForStep(const SCEV *Step,
1217                                              ICmpInst::Predicate *Pred,
1218                                              ScalarEvolution *SE) {
1219     return getSignedOverflowLimitForStep(Step, Pred, SE);
1220   }
1221 };
1222
1223 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
1224     SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
1225
1226 template <>
1227 struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
1228   static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
1229
1230   static const GetExtendExprTy GetExtendExpr;
1231
1232   static const SCEV *getOverflowLimitForStep(const SCEV *Step,
1233                                              ICmpInst::Predicate *Pred,
1234                                              ScalarEvolution *SE) {
1235     return getUnsignedOverflowLimitForStep(Step, Pred, SE);
1236   }
1237 };
1238
1239 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
1240     SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
1241 }
1242
1243 // The recurrence AR has been shown to have no signed/unsigned wrap or something
1244 // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
1245 // easily prove NSW/NUW for its preincrement or postincrement sibling. This
1246 // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
1247 // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
1248 // expression "Step + sext/zext(PreIncAR)" is congruent with
1249 // "sext/zext(PostIncAR)"
1250 template <typename ExtendOpTy>
1251 static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
1252                                         ScalarEvolution *SE) {
1253   auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1254   auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1255
1256   const Loop *L = AR->getLoop();
1257   const SCEV *Start = AR->getStart();
1258   const SCEV *Step = AR->getStepRecurrence(*SE);
1259
1260   // Check for a simple looking step prior to loop entry.
1261   const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
1262   if (!SA)
1263     return nullptr;
1264
1265   // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
1266   // subtraction is expensive. For this purpose, perform a quick and dirty
1267   // difference, by checking for Step in the operand list.
1268   SmallVector<const SCEV *, 4> DiffOps;
1269   for (const SCEV *Op : SA->operands())
1270     if (Op != Step)
1271       DiffOps.push_back(Op);
1272
1273   if (DiffOps.size() == SA->getNumOperands())
1274     return nullptr;
1275
1276   // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
1277   // `Step`:
1278
1279   // 1. NSW/NUW flags on the step increment.
1280   const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
1281   const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
1282       SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
1283
1284   // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
1285   // "S+X does not sign/unsign-overflow".
1286   //
1287
1288   const SCEV *BECount = SE->getBackedgeTakenCount(L);
1289   if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
1290       !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
1291     return PreStart;
1292
1293   // 2. Direct overflow check on the step operation's expression.
1294   unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
1295   Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
1296   const SCEV *OperandExtendedStart =
1297       SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
1298                      (SE->*GetExtendExpr)(Step, WideTy));
1299   if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
1300     if (PreAR && AR->getNoWrapFlags(WrapType)) {
1301       // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
1302       // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
1303       // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`.  Cache this fact.
1304       const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
1305     }
1306     return PreStart;
1307   }
1308
1309   // 3. Loop precondition.
1310   ICmpInst::Predicate Pred;
1311   const SCEV *OverflowLimit =
1312       ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
1313
1314   if (OverflowLimit &&
1315       SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
1316     return PreStart;
1317   }
1318   return nullptr;
1319 }
1320
1321 // Get the normalized zero or sign extended expression for this AddRec's Start.
1322 template <typename ExtendOpTy>
1323 static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
1324                                         ScalarEvolution *SE) {
1325   auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1326
1327   const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
1328   if (!PreStart)
1329     return (SE->*GetExtendExpr)(AR->getStart(), Ty);
1330
1331   return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
1332                         (SE->*GetExtendExpr)(PreStart, Ty));
1333 }
1334
1335 // Try to prove away overflow by looking at "nearby" add recurrences.  A
1336 // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
1337 // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
1338 //
1339 // Formally:
1340 //
1341 //     {S,+,X} == {S-T,+,X} + T
1342 //  => Ext({S,+,X}) == Ext({S-T,+,X} + T)
1343 //
1344 // If ({S-T,+,X} + T) does not overflow  ... (1)
1345 //
1346 //  RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
1347 //
1348 // If {S-T,+,X} does not overflow  ... (2)
1349 //
1350 //  RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
1351 //      == {Ext(S-T)+Ext(T),+,Ext(X)}
1352 //
1353 // If (S-T)+T does not overflow  ... (3)
1354 //
1355 //  RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
1356 //      == {Ext(S),+,Ext(X)} == LHS
1357 //
1358 // Thus, if (1), (2) and (3) are true for some T, then
1359 //   Ext({S,+,X}) == {Ext(S),+,Ext(X)}
1360 //
1361 // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
1362 // does not overflow" restricted to the 0th iteration.  Therefore we only need
1363 // to check for (1) and (2).
1364 //
1365 // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
1366 // is `Delta` (defined below).
1367 //
1368 template <typename ExtendOpTy>
1369 bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
1370                                                 const SCEV *Step,
1371                                                 const Loop *L) {
1372   auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1373
1374   // We restrict `Start` to a constant to prevent SCEV from spending too much
1375   // time here.  It is correct (but more expensive) to continue with a
1376   // non-constant `Start` and do a general SCEV subtraction to compute
1377   // `PreStart` below.
1378   //
1379   const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
1380   if (!StartC)
1381     return false;
1382
1383   APInt StartAI = StartC->getValue()->getValue();
1384
1385   for (unsigned Delta : {-2, -1, 1, 2}) {
1386     const SCEV *PreStart = getConstant(StartAI - Delta);
1387
1388     // Give up if we don't already have the add recurrence we need because
1389     // actually constructing an add recurrence is relatively expensive.
1390     const SCEVAddRecExpr *PreAR = [&]() {
1391       FoldingSetNodeID ID;
1392       ID.AddInteger(scAddRecExpr);
1393       ID.AddPointer(PreStart);
1394       ID.AddPointer(Step);
1395       ID.AddPointer(L);
1396       void *IP = nullptr;
1397       return static_cast<SCEVAddRecExpr *>(
1398           this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
1399     }();
1400
1401     if (PreAR && PreAR->getNoWrapFlags(WrapType)) {  // proves (2)
1402       const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
1403       ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
1404       const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
1405           DeltaS, &Pred, this);
1406       if (Limit && isKnownPredicate(Pred, PreAR, Limit))  // proves (1)
1407         return true;
1408     }
1409   }
1410
1411   return false;
1412 }
1413
1414 const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
1415                                                Type *Ty) {
1416   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
1417          "This is not an extending conversion!");
1418   assert(isSCEVable(Ty) &&
1419          "This is not a conversion to a SCEVable type!");
1420   Ty = getEffectiveSCEVType(Ty);
1421
1422   // Fold if the operand is constant.
1423   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1424     return getConstant(
1425       cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
1426
1427   // zext(zext(x)) --> zext(x)
1428   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
1429     return getZeroExtendExpr(SZ->getOperand(), Ty);
1430
1431   // Before doing any expensive analysis, check to see if we've already
1432   // computed a SCEV for this Op and Ty.
1433   FoldingSetNodeID ID;
1434   ID.AddInteger(scZeroExtend);
1435   ID.AddPointer(Op);
1436   ID.AddPointer(Ty);
1437   void *IP = nullptr;
1438   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
1439
1440   // zext(trunc(x)) --> zext(x) or x or trunc(x)
1441   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
1442     // It's possible the bits taken off by the truncate were all zero bits. If
1443     // so, we should be able to simplify this further.
1444     const SCEV *X = ST->getOperand();
1445     ConstantRange CR = getUnsignedRange(X);
1446     unsigned TruncBits = getTypeSizeInBits(ST->getType());
1447     unsigned NewBits = getTypeSizeInBits(Ty);
1448     if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
1449             CR.zextOrTrunc(NewBits)))
1450       return getTruncateOrZeroExtend(X, Ty);
1451   }
1452
1453   // If the input value is a chrec scev, and we can prove that the value
1454   // did not overflow the old, smaller, value, we can zero extend all of the
1455   // operands (often constants).  This allows analysis of something like
1456   // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
1457   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
1458     if (AR->isAffine()) {
1459       const SCEV *Start = AR->getStart();
1460       const SCEV *Step = AR->getStepRecurrence(*this);
1461       unsigned BitWidth = getTypeSizeInBits(AR->getType());
1462       const Loop *L = AR->getLoop();
1463
1464       // If we have special knowledge that this addrec won't overflow,
1465       // we don't need to do any further analysis.
1466       if (AR->getNoWrapFlags(SCEV::FlagNUW))
1467         return getAddRecExpr(
1468             getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1469             getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1470
1471       // Check whether the backedge-taken count is SCEVCouldNotCompute.
1472       // Note that this serves two purposes: It filters out loops that are
1473       // simply not analyzable, and it covers the case where this code is
1474       // being called from within backedge-taken count analysis, such that
1475       // attempting to ask for the backedge-taken count would likely result
1476       // in infinite recursion. In the later case, the analysis code will
1477       // cope with a conservative value, and it will take care to purge
1478       // that value once it has finished.
1479       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
1480       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
1481         // Manually compute the final value for AR, checking for
1482         // overflow.
1483
1484         // Check whether the backedge-taken count can be losslessly casted to
1485         // the addrec's type. The count is always unsigned.
1486         const SCEV *CastedMaxBECount =
1487           getTruncateOrZeroExtend(MaxBECount, Start->getType());
1488         const SCEV *RecastedMaxBECount =
1489           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
1490         if (MaxBECount == RecastedMaxBECount) {
1491           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
1492           // Check whether Start+Step*MaxBECount has no unsigned overflow.
1493           const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
1494           const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
1495           const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
1496           const SCEV *WideMaxBECount =
1497             getZeroExtendExpr(CastedMaxBECount, WideTy);
1498           const SCEV *OperandExtendedAdd =
1499             getAddExpr(WideStart,
1500                        getMulExpr(WideMaxBECount,
1501                                   getZeroExtendExpr(Step, WideTy)));
1502           if (ZAdd == OperandExtendedAdd) {
1503             // Cache knowledge of AR NUW, which is propagated to this AddRec.
1504             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
1505             // Return the expression with the addrec on the outside.
1506             return getAddRecExpr(
1507                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1508                 getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1509           }
1510           // Similar to above, only this time treat the step value as signed.
1511           // This covers loops that count down.
1512           OperandExtendedAdd =
1513             getAddExpr(WideStart,
1514                        getMulExpr(WideMaxBECount,
1515                                   getSignExtendExpr(Step, WideTy)));
1516           if (ZAdd == OperandExtendedAdd) {
1517             // Cache knowledge of AR NW, which is propagated to this AddRec.
1518             // Negative step causes unsigned wrap, but it still can't self-wrap.
1519             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
1520             // Return the expression with the addrec on the outside.
1521             return getAddRecExpr(
1522                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1523                 getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1524           }
1525         }
1526
1527         // If the backedge is guarded by a comparison with the pre-inc value
1528         // the addrec is safe. Also, if the entry is guarded by a comparison
1529         // with the start value and the backedge is guarded by a comparison
1530         // with the post-inc value, the addrec is safe.
1531         if (isKnownPositive(Step)) {
1532           const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
1533                                       getUnsignedRange(Step).getUnsignedMax());
1534           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
1535               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
1536                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
1537                                            AR->getPostIncExpr(*this), N))) {
1538             // Cache knowledge of AR NUW, which is propagated to this AddRec.
1539             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
1540             // Return the expression with the addrec on the outside.
1541             return getAddRecExpr(
1542                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1543                 getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1544           }
1545         } else if (isKnownNegative(Step)) {
1546           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
1547                                       getSignedRange(Step).getSignedMin());
1548           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
1549               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
1550                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
1551                                            AR->getPostIncExpr(*this), N))) {
1552             // Cache knowledge of AR NW, which is propagated to this AddRec.
1553             // Negative step causes unsigned wrap, but it still can't self-wrap.
1554             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
1555             // Return the expression with the addrec on the outside.
1556             return getAddRecExpr(
1557                 getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1558                 getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1559           }
1560         }
1561       }
1562
1563       if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
1564         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
1565         return getAddRecExpr(
1566             getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
1567             getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1568       }
1569     }
1570
1571   // The cast wasn't folded; create an explicit cast node.
1572   // Recompute the insert position, as it may have been invalidated.
1573   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
1574   SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
1575                                                    Op, Ty);
1576   UniqueSCEVs.InsertNode(S, IP);
1577   return S;
1578 }
1579
1580 const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
1581                                                Type *Ty) {
1582   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
1583          "This is not an extending conversion!");
1584   assert(isSCEVable(Ty) &&
1585          "This is not a conversion to a SCEVable type!");
1586   Ty = getEffectiveSCEVType(Ty);
1587
1588   // Fold if the operand is constant.
1589   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1590     return getConstant(
1591       cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
1592
1593   // sext(sext(x)) --> sext(x)
1594   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
1595     return getSignExtendExpr(SS->getOperand(), Ty);
1596
1597   // sext(zext(x)) --> zext(x)
1598   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
1599     return getZeroExtendExpr(SZ->getOperand(), Ty);
1600
1601   // Before doing any expensive analysis, check to see if we've already
1602   // computed a SCEV for this Op and Ty.
1603   FoldingSetNodeID ID;
1604   ID.AddInteger(scSignExtend);
1605   ID.AddPointer(Op);
1606   ID.AddPointer(Ty);
1607   void *IP = nullptr;
1608   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
1609
1610   // If the input value is provably positive, build a zext instead.
1611   if (isKnownNonNegative(Op))
1612     return getZeroExtendExpr(Op, Ty);
1613
1614   // sext(trunc(x)) --> sext(x) or x or trunc(x)
1615   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
1616     // It's possible the bits taken off by the truncate were all sign bits. If
1617     // so, we should be able to simplify this further.
1618     const SCEV *X = ST->getOperand();
1619     ConstantRange CR = getSignedRange(X);
1620     unsigned TruncBits = getTypeSizeInBits(ST->getType());
1621     unsigned NewBits = getTypeSizeInBits(Ty);
1622     if (CR.truncate(TruncBits).signExtend(NewBits).contains(
1623             CR.sextOrTrunc(NewBits)))
1624       return getTruncateOrSignExtend(X, Ty);
1625   }
1626
1627   // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
1628   if (auto SA = dyn_cast<SCEVAddExpr>(Op)) {
1629     if (SA->getNumOperands() == 2) {
1630       auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
1631       auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
1632       if (SMul && SC1) {
1633         if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
1634           const APInt &C1 = SC1->getValue()->getValue();
1635           const APInt &C2 = SC2->getValue()->getValue();
1636           if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
1637               C2.ugt(C1) && C2.isPowerOf2())
1638             return getAddExpr(getSignExtendExpr(SC1, Ty),
1639                               getSignExtendExpr(SMul, Ty));
1640         }
1641       }
1642     }
1643   }
1644   // If the input value is a chrec scev, and we can prove that the value
1645   // did not overflow the old, smaller, value, we can sign extend all of the
1646   // operands (often constants).  This allows analysis of something like
1647   // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
1648   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
1649     if (AR->isAffine()) {
1650       const SCEV *Start = AR->getStart();
1651       const SCEV *Step = AR->getStepRecurrence(*this);
1652       unsigned BitWidth = getTypeSizeInBits(AR->getType());
1653       const Loop *L = AR->getLoop();
1654
1655       // If we have special knowledge that this addrec won't overflow,
1656       // we don't need to do any further analysis.
1657       if (AR->getNoWrapFlags(SCEV::FlagNSW))
1658         return getAddRecExpr(
1659             getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
1660             getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
1661
1662       // Check whether the backedge-taken count is SCEVCouldNotCompute.
1663       // Note that this serves two purposes: It filters out loops that are
1664       // simply not analyzable, and it covers the case where this code is
1665       // being called from within backedge-taken count analysis, such that
1666       // attempting to ask for the backedge-taken count would likely result
1667       // in infinite recursion. In the later case, the analysis code will
1668       // cope with a conservative value, and it will take care to purge
1669       // that value once it has finished.
1670       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
1671       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
1672         // Manually compute the final value for AR, checking for
1673         // overflow.
1674
1675         // Check whether the backedge-taken count can be losslessly casted to
1676         // the addrec's type. The count is always unsigned.
1677         const SCEV *CastedMaxBECount =
1678           getTruncateOrZeroExtend(MaxBECount, Start->getType());
1679         const SCEV *RecastedMaxBECount =
1680           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
1681         if (MaxBECount == RecastedMaxBECount) {
1682           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
1683           // Check whether Start+Step*MaxBECount has no signed overflow.
1684           const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
1685           const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
1686           const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
1687           const SCEV *WideMaxBECount =
1688             getZeroExtendExpr(CastedMaxBECount, WideTy);
1689           const SCEV *OperandExtendedAdd =
1690             getAddExpr(WideStart,
1691                        getMulExpr(WideMaxBECount,
1692                                   getSignExtendExpr(Step, WideTy)));
1693           if (SAdd == OperandExtendedAdd) {
1694             // Cache knowledge of AR NSW, which is propagated to this AddRec.
1695             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
1696             // Return the expression with the addrec on the outside.
1697             return getAddRecExpr(
1698                 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
1699                 getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1700           }
1701           // Similar to above, only this time treat the step value as unsigned.
1702           // This covers loops that count up with an unsigned step.
1703           OperandExtendedAdd =
1704             getAddExpr(WideStart,
1705                        getMulExpr(WideMaxBECount,
1706                                   getZeroExtendExpr(Step, WideTy)));
1707           if (SAdd == OperandExtendedAdd) {
1708             // If AR wraps around then
1709             //
1710             //    abs(Step) * MaxBECount > unsigned-max(AR->getType())
1711             // => SAdd != OperandExtendedAdd
1712             //
1713             // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
1714             // (SAdd == OperandExtendedAdd => AR is NW)
1715
1716             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
1717
1718             // Return the expression with the addrec on the outside.
1719             return getAddRecExpr(
1720                 getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
1721                 getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1722           }
1723         }
1724
1725         // If the backedge is guarded by a comparison with the pre-inc value
1726         // the addrec is safe. Also, if the entry is guarded by a comparison
1727         // with the start value and the backedge is guarded by a comparison
1728         // with the post-inc value, the addrec is safe.
1729         ICmpInst::Predicate Pred;
1730         const SCEV *OverflowLimit =
1731             getSignedOverflowLimitForStep(Step, &Pred, this);
1732         if (OverflowLimit &&
1733             (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
1734              (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
1735               isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
1736                                           OverflowLimit)))) {
1737           // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
1738           const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
1739           return getAddRecExpr(
1740               getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
1741               getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1742         }
1743       }
1744       // If Start and Step are constants, check if we can apply this
1745       // transformation:
1746       // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
1747       auto SC1 = dyn_cast<SCEVConstant>(Start);
1748       auto SC2 = dyn_cast<SCEVConstant>(Step);
1749       if (SC1 && SC2) {
1750         const APInt &C1 = SC1->getValue()->getValue();
1751         const APInt &C2 = SC2->getValue()->getValue();
1752         if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
1753             C2.isPowerOf2()) {
1754           Start = getSignExtendExpr(Start, Ty);
1755           const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
1756                                             L, AR->getNoWrapFlags());
1757           return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
1758         }
1759       }
1760
1761       if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
1762         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
1763         return getAddRecExpr(
1764             getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
1765             getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
1766       }
1767     }
1768
1769   // The cast wasn't folded; create an explicit cast node.
1770   // Recompute the insert position, as it may have been invalidated.
1771   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
1772   SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
1773                                                    Op, Ty);
1774   UniqueSCEVs.InsertNode(S, IP);
1775   return S;
1776 }
1777
1778 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
1779 /// unspecified bits out to the given type.
1780 ///
1781 const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
1782                                               Type *Ty) {
1783   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
1784          "This is not an extending conversion!");
1785   assert(isSCEVable(Ty) &&
1786          "This is not a conversion to a SCEVable type!");
1787   Ty = getEffectiveSCEVType(Ty);
1788
1789   // Sign-extend negative constants.
1790   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1791     if (SC->getValue()->getValue().isNegative())
1792       return getSignExtendExpr(Op, Ty);
1793
1794   // Peel off a truncate cast.
1795   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
1796     const SCEV *NewOp = T->getOperand();
1797     if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
1798       return getAnyExtendExpr(NewOp, Ty);
1799     return getTruncateOrNoop(NewOp, Ty);
1800   }
1801
1802   // Next try a zext cast. If the cast is folded, use it.
1803   const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
1804   if (!isa<SCEVZeroExtendExpr>(ZExt))
1805     return ZExt;
1806
1807   // Next try a sext cast. If the cast is folded, use it.
1808   const SCEV *SExt = getSignExtendExpr(Op, Ty);
1809   if (!isa<SCEVSignExtendExpr>(SExt))
1810     return SExt;
1811
1812   // Force the cast to be folded into the operands of an addrec.
1813   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
1814     SmallVector<const SCEV *, 4> Ops;
1815     for (const SCEV *Op : AR->operands())
1816       Ops.push_back(getAnyExtendExpr(Op, Ty));
1817     return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
1818   }
1819
1820   // If the expression is obviously signed, use the sext cast value.
1821   if (isa<SCEVSMaxExpr>(Op))
1822     return SExt;
1823
1824   // Absent any other information, use the zext cast value.
1825   return ZExt;
1826 }
1827
1828 /// CollectAddOperandsWithScales - Process the given Ops list, which is
1829 /// a list of operands to be added under the given scale, update the given
1830 /// map. This is a helper function for getAddRecExpr. As an example of
1831 /// what it does, given a sequence of operands that would form an add
1832 /// expression like this:
1833 ///
1834 ///    m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
1835 ///
1836 /// where A and B are constants, update the map with these values:
1837 ///
1838 ///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
1839 ///
1840 /// and add 13 + A*B*29 to AccumulatedConstant.
1841 /// This will allow getAddRecExpr to produce this:
1842 ///
1843 ///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
1844 ///
1845 /// This form often exposes folding opportunities that are hidden in
1846 /// the original operand list.
1847 ///
1848 /// Return true iff it appears that any interesting folding opportunities
1849 /// may be exposed. This helps getAddRecExpr short-circuit extra work in
1850 /// the common case where no interesting opportunities are present, and
1851 /// is also used as a check to avoid infinite recursion.
1852 ///
1853 static bool
1854 CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
1855                              SmallVectorImpl<const SCEV *> &NewOps,
1856                              APInt &AccumulatedConstant,
1857                              const SCEV *const *Ops, size_t NumOperands,
1858                              const APInt &Scale,
1859                              ScalarEvolution &SE) {
1860   bool Interesting = false;
1861
1862   // Iterate over the add operands. They are sorted, with constants first.
1863   unsigned i = 0;
1864   while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
1865     ++i;
1866     // Pull a buried constant out to the outside.
1867     if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
1868       Interesting = true;
1869     AccumulatedConstant += Scale * C->getValue()->getValue();
1870   }
1871
1872   // Next comes everything else. We're especially interested in multiplies
1873   // here, but they're in the middle, so just visit the rest with one loop.
1874   for (; i != NumOperands; ++i) {
1875     const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
1876     if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
1877       APInt NewScale =
1878         Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
1879       if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
1880         // A multiplication of a constant with another add; recurse.
1881         const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
1882         Interesting |=
1883           CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
1884                                        Add->op_begin(), Add->getNumOperands(),
1885                                        NewScale, SE);
1886       } else {
1887         // A multiplication of a constant with some other value. Update
1888         // the map.
1889         SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
1890         const SCEV *Key = SE.getMulExpr(MulOps);
1891         std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
1892           M.insert(std::make_pair(Key, NewScale));
1893         if (Pair.second) {
1894           NewOps.push_back(Pair.first->first);
1895         } else {
1896           Pair.first->second += NewScale;
1897           // The map already had an entry for this value, which may indicate
1898           // a folding opportunity.
1899           Interesting = true;
1900         }
1901       }
1902     } else {
1903       // An ordinary operand. Update the map.
1904       std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
1905         M.insert(std::make_pair(Ops[i], Scale));
1906       if (Pair.second) {
1907         NewOps.push_back(Pair.first->first);
1908       } else {
1909         Pair.first->second += Scale;
1910         // The map already had an entry for this value, which may indicate
1911         // a folding opportunity.
1912         Interesting = true;
1913       }
1914     }
1915   }
1916
1917   return Interesting;
1918 }
1919
1920 namespace {
1921   struct APIntCompare {
1922     bool operator()(const APInt &LHS, const APInt &RHS) const {
1923       return LHS.ult(RHS);
1924     }
1925   };
1926 }
1927
1928 // We're trying to construct a SCEV of type `Type' with `Ops' as operands and
1929 // `OldFlags' as can't-wrap behavior.  Infer a more aggressive set of
1930 // can't-overflow flags for the operation if possible.
1931 static SCEV::NoWrapFlags
1932 StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
1933                       const SmallVectorImpl<const SCEV *> &Ops,
1934                       SCEV::NoWrapFlags OldFlags) {
1935   using namespace std::placeholders;
1936
1937   bool CanAnalyze =
1938       Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
1939   (void)CanAnalyze;
1940   assert(CanAnalyze && "don't call from other places!");
1941
1942   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
1943   SCEV::NoWrapFlags SignOrUnsignWrap =
1944       ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask);
1945
1946   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
1947   auto IsKnownNonNegative =
1948     std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1);
1949
1950   if (SignOrUnsignWrap == SCEV::FlagNSW &&
1951       std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative))
1952     return ScalarEvolution::setFlags(OldFlags,
1953                                      (SCEV::NoWrapFlags)SignOrUnsignMask);
1954
1955   return OldFlags;
1956 }
1957
1958 /// getAddExpr - Get a canonical add expression, or something simpler if
1959 /// possible.
1960 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
1961                                         SCEV::NoWrapFlags Flags) {
1962   assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
1963          "only nuw or nsw allowed");
1964   assert(!Ops.empty() && "Cannot get empty add!");
1965   if (Ops.size() == 1) return Ops[0];
1966 #ifndef NDEBUG
1967   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
1968   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
1969     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
1970            "SCEVAddExpr operand types don't match!");
1971 #endif
1972
1973   Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
1974
1975   // Sort by complexity, this groups all similar expression types together.
1976   GroupByComplexity(Ops, &LI);
1977
1978   // If there are any constants, fold them together.
1979   unsigned Idx = 0;
1980   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
1981     ++Idx;
1982     assert(Idx < Ops.size());
1983     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
1984       // We found two constants, fold them together!
1985       Ops[0] = getConstant(LHSC->getValue()->getValue() +
1986                            RHSC->getValue()->getValue());
1987       if (Ops.size() == 2) return Ops[0];
1988       Ops.erase(Ops.begin()+1);  // Erase the folded element
1989       LHSC = cast<SCEVConstant>(Ops[0]);
1990     }
1991
1992     // If we are left with a constant zero being added, strip it off.
1993     if (LHSC->getValue()->isZero()) {
1994       Ops.erase(Ops.begin());
1995       --Idx;
1996     }
1997
1998     if (Ops.size() == 1) return Ops[0];
1999   }
2000
2001   // Okay, check to see if the same value occurs in the operand list more than
2002   // once.  If so, merge them together into an multiply expression.  Since we
2003   // sorted the list, these values are required to be adjacent.
2004   Type *Ty = Ops[0]->getType();
2005   bool FoundMatch = false;
2006   for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
2007     if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
2008       // Scan ahead to count how many equal operands there are.
2009       unsigned Count = 2;
2010       while (i+Count != e && Ops[i+Count] == Ops[i])
2011         ++Count;
2012       // Merge the values into a multiply.
2013       const SCEV *Scale = getConstant(Ty, Count);
2014       const SCEV *Mul = getMulExpr(Scale, Ops[i]);
2015       if (Ops.size() == Count)
2016         return Mul;
2017       Ops[i] = Mul;
2018       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
2019       --i; e -= Count - 1;
2020       FoundMatch = true;
2021     }
2022   if (FoundMatch)
2023     return getAddExpr(Ops, Flags);
2024
2025   // Check for truncates. If all the operands are truncated from the same
2026   // type, see if factoring out the truncate would permit the result to be
2027   // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
2028   // if the contents of the resulting outer trunc fold to something simple.
2029   for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
2030     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
2031     Type *DstType = Trunc->getType();
2032     Type *SrcType = Trunc->getOperand()->getType();
2033     SmallVector<const SCEV *, 8> LargeOps;
2034     bool Ok = true;
2035     // Check all the operands to see if they can be represented in the
2036     // source type of the truncate.
2037     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
2038       if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
2039         if (T->getOperand()->getType() != SrcType) {
2040           Ok = false;
2041           break;
2042         }
2043         LargeOps.push_back(T->getOperand());
2044       } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
2045         LargeOps.push_back(getAnyExtendExpr(C, SrcType));
2046       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
2047         SmallVector<const SCEV *, 8> LargeMulOps;
2048         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
2049           if (const SCEVTruncateExpr *T =
2050                 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
2051             if (T->getOperand()->getType() != SrcType) {
2052               Ok = false;
2053               break;
2054             }
2055             LargeMulOps.push_back(T->getOperand());
2056           } else if (const SCEVConstant *C =
2057                        dyn_cast<SCEVConstant>(M->getOperand(j))) {
2058             LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
2059           } else {
2060             Ok = false;
2061             break;
2062           }
2063         }
2064         if (Ok)
2065           LargeOps.push_back(getMulExpr(LargeMulOps));
2066       } else {
2067         Ok = false;
2068         break;
2069       }
2070     }
2071     if (Ok) {
2072       // Evaluate the expression in the larger type.
2073       const SCEV *Fold = getAddExpr(LargeOps, Flags);
2074       // If it folds to something simple, use it. Otherwise, don't.
2075       if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
2076         return getTruncateExpr(Fold, DstType);
2077     }
2078   }
2079
2080   // Skip past any other cast SCEVs.
2081   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
2082     ++Idx;
2083
2084   // If there are add operands they would be next.
2085   if (Idx < Ops.size()) {
2086     bool DeletedAdd = false;
2087     while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
2088       // If we have an add, expand the add operands onto the end of the operands
2089       // list.
2090       Ops.erase(Ops.begin()+Idx);
2091       Ops.append(Add->op_begin(), Add->op_end());
2092       DeletedAdd = true;
2093     }
2094
2095     // If we deleted at least one add, we added operands to the end of the list,
2096     // and they are not necessarily sorted.  Recurse to resort and resimplify
2097     // any operands we just acquired.
2098     if (DeletedAdd)
2099       return getAddExpr(Ops);
2100   }
2101
2102   // Skip over the add expression until we get to a multiply.
2103   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
2104     ++Idx;
2105
2106   // Check to see if there are any folding opportunities present with
2107   // operands multiplied by constant values.
2108   if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
2109     uint64_t BitWidth = getTypeSizeInBits(Ty);
2110     DenseMap<const SCEV *, APInt> M;
2111     SmallVector<const SCEV *, 8> NewOps;
2112     APInt AccumulatedConstant(BitWidth, 0);
2113     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
2114                                      Ops.data(), Ops.size(),
2115                                      APInt(BitWidth, 1), *this)) {
2116       // Some interesting folding opportunity is present, so its worthwhile to
2117       // re-generate the operands list. Group the operands by constant scale,
2118       // to avoid multiplying by the same constant scale multiple times.
2119       std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
2120       for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
2121            E = NewOps.end(); I != E; ++I)
2122         MulOpLists[M.find(*I)->second].push_back(*I);
2123       // Re-generate the operands list.
2124       Ops.clear();
2125       if (AccumulatedConstant != 0)
2126         Ops.push_back(getConstant(AccumulatedConstant));
2127       for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
2128            I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
2129         if (I->first != 0)
2130           Ops.push_back(getMulExpr(getConstant(I->first),
2131                                    getAddExpr(I->second)));
2132       if (Ops.empty())
2133         return getConstant(Ty, 0);
2134       if (Ops.size() == 1)
2135         return Ops[0];
2136       return getAddExpr(Ops);
2137     }
2138   }
2139
2140   // If we are adding something to a multiply expression, make sure the
2141   // something is not already an operand of the multiply.  If so, merge it into
2142   // the multiply.
2143   for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
2144     const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
2145     for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
2146       const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
2147       if (isa<SCEVConstant>(MulOpSCEV))
2148         continue;
2149       for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
2150         if (MulOpSCEV == Ops[AddOp]) {
2151           // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
2152           const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
2153           if (Mul->getNumOperands() != 2) {
2154             // If the multiply has more than two operands, we must get the
2155             // Y*Z term.
2156             SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
2157                                                 Mul->op_begin()+MulOp);
2158             MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
2159             InnerMul = getMulExpr(MulOps);
2160           }
2161           const SCEV *One = getConstant(Ty, 1);
2162           const SCEV *AddOne = getAddExpr(One, InnerMul);
2163           const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
2164           if (Ops.size() == 2) return OuterMul;
2165           if (AddOp < Idx) {
2166             Ops.erase(Ops.begin()+AddOp);
2167             Ops.erase(Ops.begin()+Idx-1);
2168           } else {
2169             Ops.erase(Ops.begin()+Idx);
2170             Ops.erase(Ops.begin()+AddOp-1);
2171           }
2172           Ops.push_back(OuterMul);
2173           return getAddExpr(Ops);
2174         }
2175
2176       // Check this multiply against other multiplies being added together.
2177       for (unsigned OtherMulIdx = Idx+1;
2178            OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
2179            ++OtherMulIdx) {
2180         const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
2181         // If MulOp occurs in OtherMul, we can fold the two multiplies
2182         // together.
2183         for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
2184              OMulOp != e; ++OMulOp)
2185           if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
2186             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
2187             const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
2188             if (Mul->getNumOperands() != 2) {
2189               SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
2190                                                   Mul->op_begin()+MulOp);
2191               MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
2192               InnerMul1 = getMulExpr(MulOps);
2193             }
2194             const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
2195             if (OtherMul->getNumOperands() != 2) {
2196               SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
2197                                                   OtherMul->op_begin()+OMulOp);
2198               MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
2199               InnerMul2 = getMulExpr(MulOps);
2200             }
2201             const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
2202             const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
2203             if (Ops.size() == 2) return OuterMul;
2204             Ops.erase(Ops.begin()+Idx);
2205             Ops.erase(Ops.begin()+OtherMulIdx-1);
2206             Ops.push_back(OuterMul);
2207             return getAddExpr(Ops);
2208           }
2209       }
2210     }
2211   }
2212
2213   // If there are any add recurrences in the operands list, see if any other
2214   // added values are loop invariant.  If so, we can fold them into the
2215   // recurrence.
2216   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
2217     ++Idx;
2218
2219   // Scan over all recurrences, trying to fold loop invariants into them.
2220   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
2221     // Scan all of the other operands to this add and add them to the vector if
2222     // they are loop invariant w.r.t. the recurrence.
2223     SmallVector<const SCEV *, 8> LIOps;
2224     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
2225     const Loop *AddRecLoop = AddRec->getLoop();
2226     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2227       if (isLoopInvariant(Ops[i], AddRecLoop)) {
2228         LIOps.push_back(Ops[i]);
2229         Ops.erase(Ops.begin()+i);
2230         --i; --e;
2231       }
2232
2233     // If we found some loop invariants, fold them into the recurrence.
2234     if (!LIOps.empty()) {
2235       //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
2236       LIOps.push_back(AddRec->getStart());
2237
2238       SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
2239                                              AddRec->op_end());
2240       AddRecOps[0] = getAddExpr(LIOps);
2241
2242       // Build the new addrec. Propagate the NUW and NSW flags if both the
2243       // outer add and the inner addrec are guaranteed to have no overflow.
2244       // Always propagate NW.
2245       Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
2246       const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
2247
2248       // If all of the other operands were loop invariant, we are done.
2249       if (Ops.size() == 1) return NewRec;
2250
2251       // Otherwise, add the folded AddRec by the non-invariant parts.
2252       for (unsigned i = 0;; ++i)
2253         if (Ops[i] == AddRec) {
2254           Ops[i] = NewRec;
2255           break;
2256         }
2257       return getAddExpr(Ops);
2258     }
2259
2260     // Okay, if there weren't any loop invariants to be folded, check to see if
2261     // there are multiple AddRec's with the same loop induction variable being
2262     // added together.  If so, we can fold them.
2263     for (unsigned OtherIdx = Idx+1;
2264          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
2265          ++OtherIdx)
2266       if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
2267         // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
2268         SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
2269                                                AddRec->op_end());
2270         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
2271              ++OtherIdx)
2272           if (const SCEVAddRecExpr *OtherAddRec =
2273                 dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
2274             if (OtherAddRec->getLoop() == AddRecLoop) {
2275               for (unsigned i = 0, e = OtherAddRec->getNumOperands();
2276                    i != e; ++i) {
2277                 if (i >= AddRecOps.size()) {
2278                   AddRecOps.append(OtherAddRec->op_begin()+i,
2279                                    OtherAddRec->op_end());
2280                   break;
2281                 }
2282                 AddRecOps[i] = getAddExpr(AddRecOps[i],
2283                                           OtherAddRec->getOperand(i));
2284               }
2285               Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
2286             }
2287         // Step size has changed, so we cannot guarantee no self-wraparound.
2288         Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
2289         return getAddExpr(Ops);
2290       }
2291
2292     // Otherwise couldn't fold anything into this recurrence.  Move onto the
2293     // next one.
2294   }
2295
2296   // Okay, it looks like we really DO need an add expr.  Check to see if we
2297   // already have one, otherwise create a new one.
2298   FoldingSetNodeID ID;
2299   ID.AddInteger(scAddExpr);
2300   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2301     ID.AddPointer(Ops[i]);
2302   void *IP = nullptr;
2303   SCEVAddExpr *S =
2304     static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
2305   if (!S) {
2306     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
2307     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
2308     S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
2309                                         O, Ops.size());
2310     UniqueSCEVs.InsertNode(S, IP);
2311   }
2312   S->setNoWrapFlags(Flags);
2313   return S;
2314 }
2315
2316 static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
2317   uint64_t k = i*j;
2318   if (j > 1 && k / j != i) Overflow = true;
2319   return k;
2320 }
2321
2322 /// Compute the result of "n choose k", the binomial coefficient.  If an
2323 /// intermediate computation overflows, Overflow will be set and the return will
2324 /// be garbage. Overflow is not cleared on absence of overflow.
2325 static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
2326   // We use the multiplicative formula:
2327   //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
2328   // At each iteration, we take the n-th term of the numeral and divide by the
2329   // (k-n)th term of the denominator.  This division will always produce an
2330   // integral result, and helps reduce the chance of overflow in the
2331   // intermediate computations. However, we can still overflow even when the
2332   // final result would fit.
2333
2334   if (n == 0 || n == k) return 1;
2335   if (k > n) return 0;
2336
2337   if (k > n/2)
2338     k = n-k;
2339
2340   uint64_t r = 1;
2341   for (uint64_t i = 1; i <= k; ++i) {
2342     r = umul_ov(r, n-(i-1), Overflow);
2343     r /= i;
2344   }
2345   return r;
2346 }
2347
2348 /// Determine if any of the operands in this SCEV are a constant or if
2349 /// any of the add or multiply expressions in this SCEV contain a constant.
2350 static bool containsConstantSomewhere(const SCEV *StartExpr) {
2351   SmallVector<const SCEV *, 4> Ops;
2352   Ops.push_back(StartExpr);
2353   while (!Ops.empty()) {
2354     const SCEV *CurrentExpr = Ops.pop_back_val();
2355     if (isa<SCEVConstant>(*CurrentExpr))
2356       return true;
2357
2358     if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
2359       const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
2360       Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end());
2361     }
2362   }
2363   return false;
2364 }
2365
2366 /// getMulExpr - Get a canonical multiply expression, or something simpler if
2367 /// possible.
2368 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
2369                                         SCEV::NoWrapFlags Flags) {
2370   assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
2371          "only nuw or nsw allowed");
2372   assert(!Ops.empty() && "Cannot get empty mul!");
2373   if (Ops.size() == 1) return Ops[0];
2374 #ifndef NDEBUG
2375   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
2376   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
2377     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
2378            "SCEVMulExpr operand types don't match!");
2379 #endif
2380
2381   Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
2382
2383   // Sort by complexity, this groups all similar expression types together.
2384   GroupByComplexity(Ops, &LI);
2385
2386   // If there are any constants, fold them together.
2387   unsigned Idx = 0;
2388   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
2389
2390     // C1*(C2+V) -> C1*C2 + C1*V
2391     if (Ops.size() == 2)
2392         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
2393           // If any of Add's ops are Adds or Muls with a constant,
2394           // apply this transformation as well.
2395           if (Add->getNumOperands() == 2)
2396             if (containsConstantSomewhere(Add))
2397               return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
2398                                 getMulExpr(LHSC, Add->getOperand(1)));
2399
2400     ++Idx;
2401     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
2402       // We found two constants, fold them together!
2403       ConstantInt *Fold = ConstantInt::get(getContext(),
2404                                            LHSC->getValue()->getValue() *
2405                                            RHSC->getValue()->getValue());
2406       Ops[0] = getConstant(Fold);
2407       Ops.erase(Ops.begin()+1);  // Erase the folded element
2408       if (Ops.size() == 1) return Ops[0];
2409       LHSC = cast<SCEVConstant>(Ops[0]);
2410     }
2411
2412     // If we are left with a constant one being multiplied, strip it off.
2413     if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
2414       Ops.erase(Ops.begin());
2415       --Idx;
2416     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
2417       // If we have a multiply of zero, it will always be zero.
2418       return Ops[0];
2419     } else if (Ops[0]->isAllOnesValue()) {
2420       // If we have a mul by -1 of an add, try distributing the -1 among the
2421       // add operands.
2422       if (Ops.size() == 2) {
2423         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
2424           SmallVector<const SCEV *, 4> NewOps;
2425           bool AnyFolded = false;
2426           for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
2427                  E = Add->op_end(); I != E; ++I) {
2428             const SCEV *Mul = getMulExpr(Ops[0], *I);
2429             if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
2430             NewOps.push_back(Mul);
2431           }
2432           if (AnyFolded)
2433             return getAddExpr(NewOps);
2434         }
2435         else if (const SCEVAddRecExpr *
2436                  AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
2437           // Negation preserves a recurrence's no self-wrap property.
2438           SmallVector<const SCEV *, 4> Operands;
2439           for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
2440                  E = AddRec->op_end(); I != E; ++I) {
2441             Operands.push_back(getMulExpr(Ops[0], *I));
2442           }
2443           return getAddRecExpr(Operands, AddRec->getLoop(),
2444                                AddRec->getNoWrapFlags(SCEV::FlagNW));
2445         }
2446       }
2447     }
2448
2449     if (Ops.size() == 1)
2450       return Ops[0];
2451   }
2452
2453   // Skip over the add expression until we get to a multiply.
2454   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
2455     ++Idx;
2456
2457   // If there are mul operands inline them all into this expression.
2458   if (Idx < Ops.size()) {
2459     bool DeletedMul = false;
2460     while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
2461       // If we have an mul, expand the mul operands onto the end of the operands
2462       // list.
2463       Ops.erase(Ops.begin()+Idx);
2464       Ops.append(Mul->op_begin(), Mul->op_end());
2465       DeletedMul = true;
2466     }
2467
2468     // If we deleted at least one mul, we added operands to the end of the list,
2469     // and they are not necessarily sorted.  Recurse to resort and resimplify
2470     // any operands we just acquired.
2471     if (DeletedMul)
2472       return getMulExpr(Ops);
2473   }
2474
2475   // If there are any add recurrences in the operands list, see if any other
2476   // added values are loop invariant.  If so, we can fold them into the
2477   // recurrence.
2478   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
2479     ++Idx;
2480
2481   // Scan over all recurrences, trying to fold loop invariants into them.
2482   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
2483     // Scan all of the other operands to this mul and add them to the vector if
2484     // they are loop invariant w.r.t. the recurrence.
2485     SmallVector<const SCEV *, 8> LIOps;
2486     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
2487     const Loop *AddRecLoop = AddRec->getLoop();
2488     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2489       if (isLoopInvariant(Ops[i], AddRecLoop)) {
2490         LIOps.push_back(Ops[i]);
2491         Ops.erase(Ops.begin()+i);
2492         --i; --e;
2493       }
2494
2495     // If we found some loop invariants, fold them into the recurrence.
2496     if (!LIOps.empty()) {
2497       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
2498       SmallVector<const SCEV *, 4> NewOps;
2499       NewOps.reserve(AddRec->getNumOperands());
2500       const SCEV *Scale = getMulExpr(LIOps);
2501       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
2502         NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
2503
2504       // Build the new addrec. Propagate the NUW and NSW flags if both the
2505       // outer mul and the inner addrec are guaranteed to have no overflow.
2506       //
2507       // No self-wrap cannot be guaranteed after changing the step size, but
2508       // will be inferred if either NUW or NSW is true.
2509       Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
2510       const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
2511
2512       // If all of the other operands were loop invariant, we are done.
2513       if (Ops.size() == 1) return NewRec;
2514
2515       // Otherwise, multiply the folded AddRec by the non-invariant parts.
2516       for (unsigned i = 0;; ++i)
2517         if (Ops[i] == AddRec) {
2518           Ops[i] = NewRec;
2519           break;
2520         }
2521       return getMulExpr(Ops);
2522     }
2523
2524     // Okay, if there weren't any loop invariants to be folded, check to see if
2525     // there are multiple AddRec's with the same loop induction variable being
2526     // multiplied together.  If so, we can fold them.
2527
2528     // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
2529     // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
2530     //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
2531     //   ]]],+,...up to x=2n}.
2532     // Note that the arguments to choose() are always integers with values
2533     // known at compile time, never SCEV objects.
2534     //
2535     // The implementation avoids pointless extra computations when the two
2536     // addrec's are of different length (mathematically, it's equivalent to
2537     // an infinite stream of zeros on the right).
2538     bool OpsModified = false;
2539     for (unsigned OtherIdx = Idx+1;
2540          OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
2541          ++OtherIdx) {
2542       const SCEVAddRecExpr *OtherAddRec =
2543         dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
2544       if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
2545         continue;
2546
2547       bool Overflow = false;
2548       Type *Ty = AddRec->getType();
2549       bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
2550       SmallVector<const SCEV*, 7> AddRecOps;
2551       for (int x = 0, xe = AddRec->getNumOperands() +
2552              OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
2553         const SCEV *Term = getConstant(Ty, 0);
2554         for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
2555           uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
2556           for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
2557                  ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
2558                z < ze && !Overflow; ++z) {
2559             uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
2560             uint64_t Coeff;
2561             if (LargerThan64Bits)
2562               Coeff = umul_ov(Coeff1, Coeff2, Overflow);
2563             else
2564               Coeff = Coeff1*Coeff2;
2565             const SCEV *CoeffTerm = getConstant(Ty, Coeff);
2566             const SCEV *Term1 = AddRec->getOperand(y-z);
2567             const SCEV *Term2 = OtherAddRec->getOperand(z);
2568             Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
2569           }
2570         }
2571         AddRecOps.push_back(Term);
2572       }
2573       if (!Overflow) {
2574         const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
2575                                               SCEV::FlagAnyWrap);
2576         if (Ops.size() == 2) return NewAddRec;
2577         Ops[Idx] = NewAddRec;
2578         Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
2579         OpsModified = true;
2580         AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
2581         if (!AddRec)
2582           break;
2583       }
2584     }
2585     if (OpsModified)
2586       return getMulExpr(Ops);
2587
2588     // Otherwise couldn't fold anything into this recurrence.  Move onto the
2589     // next one.
2590   }
2591
2592   // Okay, it looks like we really DO need an mul expr.  Check to see if we
2593   // already have one, otherwise create a new one.
2594   FoldingSetNodeID ID;
2595   ID.AddInteger(scMulExpr);
2596   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2597     ID.AddPointer(Ops[i]);
2598   void *IP = nullptr;
2599   SCEVMulExpr *S =
2600     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
2601   if (!S) {
2602     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
2603     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
2604     S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
2605                                         O, Ops.size());
2606     UniqueSCEVs.InsertNode(S, IP);
2607   }
2608   S->setNoWrapFlags(Flags);
2609   return S;
2610 }
2611
2612 /// getUDivExpr - Get a canonical unsigned division expression, or something
2613 /// simpler if possible.
2614 const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
2615                                          const SCEV *RHS) {
2616   assert(getEffectiveSCEVType(LHS->getType()) ==
2617          getEffectiveSCEVType(RHS->getType()) &&
2618          "SCEVUDivExpr operand types don't match!");
2619
2620   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
2621     if (RHSC->getValue()->equalsInt(1))
2622       return LHS;                               // X udiv 1 --> x
2623     // If the denominator is zero, the result of the udiv is undefined. Don't
2624     // try to analyze it, because the resolution chosen here may differ from
2625     // the resolution chosen in other parts of the compiler.
2626     if (!RHSC->getValue()->isZero()) {
2627       // Determine if the division can be folded into the operands of
2628       // its operands.
2629       // TODO: Generalize this to non-constants by using known-bits information.
2630       Type *Ty = LHS->getType();
2631       unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
2632       unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
2633       // For non-power-of-two values, effectively round the value up to the
2634       // nearest power of two.
2635       if (!RHSC->getValue()->getValue().isPowerOf2())
2636         ++MaxShiftAmt;
2637       IntegerType *ExtTy =
2638         IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
2639       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
2640         if (const SCEVConstant *Step =
2641             dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
2642           // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
2643           const APInt &StepInt = Step->getValue()->getValue();
2644           const APInt &DivInt = RHSC->getValue()->getValue();
2645           if (!StepInt.urem(DivInt) &&
2646               getZeroExtendExpr(AR, ExtTy) ==
2647               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
2648                             getZeroExtendExpr(Step, ExtTy),
2649                             AR->getLoop(), SCEV::FlagAnyWrap)) {
2650             SmallVector<const SCEV *, 4> Operands;
2651             for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
2652               Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
2653             return getAddRecExpr(Operands, AR->getLoop(),
2654                                  SCEV::FlagNW);
2655           }
2656           /// Get a canonical UDivExpr for a recurrence.
2657           /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
2658           // We can currently only fold X%N if X is constant.
2659           const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
2660           if (StartC && !DivInt.urem(StepInt) &&
2661               getZeroExtendExpr(AR, ExtTy) ==
2662               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
2663                             getZeroExtendExpr(Step, ExtTy),
2664                             AR->getLoop(), SCEV::FlagAnyWrap)) {
2665             const APInt &StartInt = StartC->getValue()->getValue();
2666             const APInt &StartRem = StartInt.urem(StepInt);
2667             if (StartRem != 0)
2668               LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
2669                                   AR->getLoop(), SCEV::FlagNW);
2670           }
2671         }
2672       // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
2673       if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
2674         SmallVector<const SCEV *, 4> Operands;
2675         for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
2676           Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
2677         if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
2678           // Find an operand that's safely divisible.
2679           for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
2680             const SCEV *Op = M->getOperand(i);
2681             const SCEV *Div = getUDivExpr(Op, RHSC);
2682             if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
2683               Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
2684                                                       M->op_end());
2685               Operands[i] = Div;
2686               return getMulExpr(Operands);
2687             }
2688           }
2689       }
2690       // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
2691       if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
2692         SmallVector<const SCEV *, 4> Operands;
2693         for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
2694           Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
2695         if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
2696           Operands.clear();
2697           for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
2698             const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
2699             if (isa<SCEVUDivExpr>(Op) ||
2700                 getMulExpr(Op, RHS) != A->getOperand(i))
2701               break;
2702             Operands.push_back(Op);
2703           }
2704           if (Operands.size() == A->getNumOperands())
2705             return getAddExpr(Operands);
2706         }
2707       }
2708
2709       // Fold if both operands are constant.
2710       if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
2711         Constant *LHSCV = LHSC->getValue();
2712         Constant *RHSCV = RHSC->getValue();
2713         return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
2714                                                                    RHSCV)));
2715       }
2716     }
2717   }
2718
2719   FoldingSetNodeID ID;
2720   ID.AddInteger(scUDivExpr);
2721   ID.AddPointer(LHS);
2722   ID.AddPointer(RHS);
2723   void *IP = nullptr;
2724   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
2725   SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
2726                                              LHS, RHS);
2727   UniqueSCEVs.InsertNode(S, IP);
2728   return S;
2729 }
2730
2731 static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
2732   APInt A = C1->getValue()->getValue().abs();
2733   APInt B = C2->getValue()->getValue().abs();
2734   uint32_t ABW = A.getBitWidth();
2735   uint32_t BBW = B.getBitWidth();
2736
2737   if (ABW > BBW)
2738     B = B.zext(ABW);
2739   else if (ABW < BBW)
2740     A = A.zext(BBW);
2741
2742   return APIntOps::GreatestCommonDivisor(A, B);
2743 }
2744
2745 /// getUDivExactExpr - Get a canonical unsigned division expression, or
2746 /// something simpler if possible. There is no representation for an exact udiv
2747 /// in SCEV IR, but we can attempt to remove factors from the LHS and RHS.
2748 /// We can't do this when it's not exact because the udiv may be clearing bits.
2749 const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
2750                                               const SCEV *RHS) {
2751   // TODO: we could try to find factors in all sorts of things, but for now we
2752   // just deal with u/exact (multiply, constant). See SCEVDivision towards the
2753   // end of this file for inspiration.
2754
2755   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
2756   if (!Mul)
2757     return getUDivExpr(LHS, RHS);
2758
2759   if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
2760     // If the mulexpr multiplies by a constant, then that constant must be the
2761     // first element of the mulexpr.
2762     if (const SCEVConstant *LHSCst =
2763             dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
2764       if (LHSCst == RHSCst) {
2765         SmallVector<const SCEV *, 2> Operands;
2766         Operands.append(Mul->op_begin() + 1, Mul->op_end());
2767         return getMulExpr(Operands);
2768       }
2769
2770       // We can't just assume that LHSCst divides RHSCst cleanly, it could be
2771       // that there's a factor provided by one of the other terms. We need to
2772       // check.
2773       APInt Factor = gcd(LHSCst, RHSCst);
2774       if (!Factor.isIntN(1)) {
2775         LHSCst = cast<SCEVConstant>(
2776             getConstant(LHSCst->getValue()->getValue().udiv(Factor)));
2777         RHSCst = cast<SCEVConstant>(
2778             getConstant(RHSCst->getValue()->getValue().udiv(Factor)));
2779         SmallVector<const SCEV *, 2> Operands;
2780         Operands.push_back(LHSCst);
2781         Operands.append(Mul->op_begin() + 1, Mul->op_end());
2782         LHS = getMulExpr(Operands);
2783         RHS = RHSCst;
2784         Mul = dyn_cast<SCEVMulExpr>(LHS);
2785         if (!Mul)
2786           return getUDivExactExpr(LHS, RHS);
2787       }
2788     }
2789   }
2790
2791   for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
2792     if (Mul->getOperand(i) == RHS) {
2793       SmallVector<const SCEV *, 2> Operands;
2794       Operands.append(Mul->op_begin(), Mul->op_begin() + i);
2795       Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
2796       return getMulExpr(Operands);
2797     }
2798   }
2799
2800   return getUDivExpr(LHS, RHS);
2801 }
2802
2803 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
2804 /// Simplify the expression as much as possible.
2805 const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
2806                                            const Loop *L,
2807                                            SCEV::NoWrapFlags Flags) {
2808   SmallVector<const SCEV *, 4> Operands;
2809   Operands.push_back(Start);
2810   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
2811     if (StepChrec->getLoop() == L) {
2812       Operands.append(StepChrec->op_begin(), StepChrec->op_end());
2813       return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
2814     }
2815
2816   Operands.push_back(Step);
2817   return getAddRecExpr(Operands, L, Flags);
2818 }
2819
2820 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
2821 /// Simplify the expression as much as possible.
2822 const SCEV *
2823 ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
2824                                const Loop *L, SCEV::NoWrapFlags Flags) {
2825   if (Operands.size() == 1) return Operands[0];
2826 #ifndef NDEBUG
2827   Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
2828   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
2829     assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
2830            "SCEVAddRecExpr operand types don't match!");
2831   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
2832     assert(isLoopInvariant(Operands[i], L) &&
2833            "SCEVAddRecExpr operand is not loop-invariant!");
2834 #endif
2835
2836   if (Operands.back()->isZero()) {
2837     Operands.pop_back();
2838     return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
2839   }
2840
2841   // It's tempting to want to call getMaxBackedgeTakenCount count here and
2842   // use that information to infer NUW and NSW flags. However, computing a
2843   // BE count requires calling getAddRecExpr, so we may not yet have a
2844   // meaningful BE count at this point (and if we don't, we'd be stuck
2845   // with a SCEVCouldNotCompute as the cached BE count).
2846
2847   Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
2848
2849   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
2850   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
2851     const Loop *NestedLoop = NestedAR->getLoop();
2852     if (L->contains(NestedLoop)
2853             ? (L->getLoopDepth() < NestedLoop->getLoopDepth())
2854             : (!NestedLoop->contains(L) &&
2855                DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
2856       SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
2857                                                   NestedAR->op_end());
2858       Operands[0] = NestedAR->getStart();
2859       // AddRecs require their operands be loop-invariant with respect to their
2860       // loops. Don't perform this transformation if it would break this
2861       // requirement.
2862       bool AllInvariant = true;
2863       for (unsigned i = 0, e = Operands.size(); i != e; ++i)
2864         if (!isLoopInvariant(Operands[i], L)) {
2865           AllInvariant = false;
2866           break;
2867         }
2868       if (AllInvariant) {
2869         // Create a recurrence for the outer loop with the same step size.
2870         //
2871         // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
2872         // inner recurrence has the same property.
2873         SCEV::NoWrapFlags OuterFlags =
2874           maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
2875
2876         NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
2877         AllInvariant = true;
2878         for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
2879           if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
2880             AllInvariant = false;
2881             break;
2882           }
2883         if (AllInvariant) {
2884           // Ok, both add recurrences are valid after the transformation.
2885           //
2886           // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
2887           // the outer recurrence has the same property.
2888           SCEV::NoWrapFlags InnerFlags =
2889             maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
2890           return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
2891         }
2892       }
2893       // Reset Operands to its original state.
2894       Operands[0] = NestedAR;
2895     }
2896   }
2897
2898   // Okay, it looks like we really DO need an addrec expr.  Check to see if we
2899   // already have one, otherwise create a new one.
2900   FoldingSetNodeID ID;
2901   ID.AddInteger(scAddRecExpr);
2902   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
2903     ID.AddPointer(Operands[i]);
2904   ID.AddPointer(L);
2905   void *IP = nullptr;
2906   SCEVAddRecExpr *S =
2907     static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
2908   if (!S) {
2909     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
2910     std::uninitialized_copy(Operands.begin(), Operands.end(), O);
2911     S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
2912                                            O, Operands.size(), L);
2913     UniqueSCEVs.InsertNode(S, IP);
2914   }
2915   S->setNoWrapFlags(Flags);
2916   return S;
2917 }
2918
2919 const SCEV *
2920 ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
2921                             const SmallVectorImpl<const SCEV *> &IndexExprs,
2922                             bool InBounds) {
2923   // getSCEV(Base)->getType() has the same address space as Base->getType()
2924   // because SCEV::getType() preserves the address space.
2925   Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
2926   // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
2927   // instruction to its SCEV, because the Instruction may be guarded by control
2928   // flow and the no-overflow bits may not be valid for the expression in any
2929   // context. This can be fixed similarly to how these flags are handled for
2930   // adds.
2931   SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
2932
2933   const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
2934   // The address space is unimportant. The first thing we do on CurTy is getting
2935   // its element type.
2936   Type *CurTy = PointerType::getUnqual(PointeeType);
2937   for (const SCEV *IndexExpr : IndexExprs) {
2938     // Compute the (potentially symbolic) offset in bytes for this index.
2939     if (StructType *STy = dyn_cast<StructType>(CurTy)) {
2940       // For a struct, add the member offset.
2941       ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
2942       unsigned FieldNo = Index->getZExtValue();
2943       const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
2944
2945       // Add the field offset to the running total offset.
2946       TotalOffset = getAddExpr(TotalOffset, FieldOffset);
2947
2948       // Update CurTy to the type of the field at Index.
2949       CurTy = STy->getTypeAtIndex(Index);
2950     } else {
2951       // Update CurTy to its element type.
2952       CurTy = cast<SequentialType>(CurTy)->getElementType();
2953       // For an array, add the element offset, explicitly scaled.
2954       const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
2955       // Getelementptr indices are signed.
2956       IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
2957
2958       // Multiply the index by the element size to compute the element offset.
2959       const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
2960
2961       // Add the element offset to the running total offset.
2962       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
2963     }
2964   }
2965
2966   // Add the total offset from all the GEP indices to the base.
2967   return getAddExpr(BaseExpr, TotalOffset, Wrap);
2968 }
2969
2970 const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
2971                                          const SCEV *RHS) {
2972   SmallVector<const SCEV *, 2> Ops;
2973   Ops.push_back(LHS);
2974   Ops.push_back(RHS);
2975   return getSMaxExpr(Ops);
2976 }
2977
2978 const SCEV *
2979 ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
2980   assert(!Ops.empty() && "Cannot get empty smax!");
2981   if (Ops.size() == 1) return Ops[0];
2982 #ifndef NDEBUG
2983   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
2984   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
2985     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
2986            "SCEVSMaxExpr operand types don't match!");
2987 #endif
2988
2989   // Sort by complexity, this groups all similar expression types together.
2990   GroupByComplexity(Ops, &LI);
2991
2992   // If there are any constants, fold them together.
2993   unsigned Idx = 0;
2994   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
2995     ++Idx;
2996     assert(Idx < Ops.size());
2997     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
2998       // We found two constants, fold them together!
2999       ConstantInt *Fold = ConstantInt::get(getContext(),
3000                               APIntOps::smax(LHSC->getValue()->getValue(),
3001                                              RHSC->getValue()->getValue()));
3002       Ops[0] = getConstant(Fold);
3003       Ops.erase(Ops.begin()+1);  // Erase the folded element
3004       if (Ops.size() == 1) return Ops[0];
3005       LHSC = cast<SCEVConstant>(Ops[0]);
3006     }
3007
3008     // If we are left with a constant minimum-int, strip it off.
3009     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
3010       Ops.erase(Ops.begin());
3011       --Idx;
3012     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
3013       // If we have an smax with a constant maximum-int, it will always be
3014       // maximum-int.
3015       return Ops[0];
3016     }
3017
3018     if (Ops.size() == 1) return Ops[0];
3019   }
3020
3021   // Find the first SMax
3022   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
3023     ++Idx;
3024
3025   // Check to see if one of the operands is an SMax. If so, expand its operands
3026   // onto our operand list, and recurse to simplify.
3027   if (Idx < Ops.size()) {
3028     bool DeletedSMax = false;
3029     while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
3030       Ops.erase(Ops.begin()+Idx);
3031       Ops.append(SMax->op_begin(), SMax->op_end());
3032       DeletedSMax = true;
3033     }
3034
3035     if (DeletedSMax)
3036       return getSMaxExpr(Ops);
3037   }
3038
3039   // Okay, check to see if the same value occurs in the operand list twice.  If
3040   // so, delete one.  Since we sorted the list, these values are required to
3041   // be adjacent.
3042   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
3043     //  X smax Y smax Y  -->  X smax Y
3044     //  X smax Y         -->  X, if X is always greater than Y
3045     if (Ops[i] == Ops[i+1] ||
3046         isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
3047       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
3048       --i; --e;
3049     } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
3050       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
3051       --i; --e;
3052     }
3053
3054   if (Ops.size() == 1) return Ops[0];
3055
3056   assert(!Ops.empty() && "Reduced smax down to nothing!");
3057
3058   // Okay, it looks like we really DO need an smax expr.  Check to see if we
3059   // already have one, otherwise create a new one.
3060   FoldingSetNodeID ID;
3061   ID.AddInteger(scSMaxExpr);
3062   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
3063     ID.AddPointer(Ops[i]);
3064   void *IP = nullptr;
3065   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
3066   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
3067   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
3068   SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
3069                                              O, Ops.size());
3070   UniqueSCEVs.InsertNode(S, IP);
3071   return S;
3072 }
3073
3074 const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
3075                                          const SCEV *RHS) {
3076   SmallVector<const SCEV *, 2> Ops;
3077   Ops.push_back(LHS);
3078   Ops.push_back(RHS);
3079   return getUMaxExpr(Ops);
3080 }
3081
3082 const SCEV *
3083 ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
3084   assert(!Ops.empty() && "Cannot get empty umax!");
3085   if (Ops.size() == 1) return Ops[0];
3086 #ifndef NDEBUG
3087   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
3088   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
3089     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
3090            "SCEVUMaxExpr operand types don't match!");
3091 #endif
3092
3093   // Sort by complexity, this groups all similar expression types together.
3094   GroupByComplexity(Ops, &LI);
3095
3096   // If there are any constants, fold them together.
3097   unsigned Idx = 0;
3098   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
3099     ++Idx;
3100     assert(Idx < Ops.size());
3101     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
3102       // We found two constants, fold them together!
3103       ConstantInt *Fold = ConstantInt::get(getContext(),
3104                               APIntOps::umax(LHSC->getValue()->getValue(),
3105                                              RHSC->getValue()->getValue()));
3106       Ops[0] = getConstant(Fold);
3107       Ops.erase(Ops.begin()+1);  // Erase the folded element
3108       if (Ops.size() == 1) return Ops[0];
3109       LHSC = cast<SCEVConstant>(Ops[0]);
3110     }
3111
3112     // If we are left with a constant minimum-int, strip it off.
3113     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
3114       Ops.erase(Ops.begin());
3115       --Idx;
3116     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
3117       // If we have an umax with a constant maximum-int, it will always be
3118       // maximum-int.
3119       return Ops[0];
3120     }
3121
3122     if (Ops.size() == 1) return Ops[0];
3123   }
3124
3125   // Find the first UMax
3126   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
3127     ++Idx;
3128
3129   // Check to see if one of the operands is a UMax. If so, expand its operands
3130   // onto our operand list, and recurse to simplify.
3131   if (Idx < Ops.size()) {
3132     bool DeletedUMax = false;
3133     while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
3134       Ops.erase(Ops.begin()+Idx);
3135       Ops.append(UMax->op_begin(), UMax->op_end());
3136       DeletedUMax = true;
3137     }
3138
3139     if (DeletedUMax)
3140       return getUMaxExpr(Ops);
3141   }
3142
3143   // Okay, check to see if the same value occurs in the operand list twice.  If
3144   // so, delete one.  Since we sorted the list, these values are required to
3145   // be adjacent.
3146   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
3147     //  X umax Y umax Y  -->  X umax Y
3148     //  X umax Y         -->  X, if X is always greater than Y
3149     if (Ops[i] == Ops[i+1] ||
3150         isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
3151       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
3152       --i; --e;
3153     } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
3154       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
3155       --i; --e;
3156     }
3157
3158   if (Ops.size() == 1) return Ops[0];
3159
3160   assert(!Ops.empty() && "Reduced umax down to nothing!");
3161
3162   // Okay, it looks like we really DO need a umax expr.  Check to see if we
3163   // already have one, otherwise create a new one.
3164   FoldingSetNodeID ID;
3165   ID.AddInteger(scUMaxExpr);
3166   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
3167     ID.AddPointer(Ops[i]);
3168   void *IP = nullptr;
3169   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
3170   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
3171   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
3172   SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
3173                                              O, Ops.size());
3174   UniqueSCEVs.InsertNode(S, IP);
3175   return S;
3176 }
3177
3178 const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
3179                                          const SCEV *RHS) {
3180   // ~smax(~x, ~y) == smin(x, y).
3181   return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
3182 }
3183
3184 const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
3185                                          const SCEV *RHS) {
3186   // ~umax(~x, ~y) == umin(x, y)
3187   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
3188 }
3189
3190 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
3191   // We can bypass creating a target-independent
3192   // constant expression and then folding it back into a ConstantInt.
3193   // This is just a compile-time optimization.
3194   return getConstant(IntTy,
3195                      F.getParent()->getDataLayout().getTypeAllocSize(AllocTy));
3196 }
3197
3198 const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
3199                                              StructType *STy,
3200                                              unsigned FieldNo) {
3201   // We can bypass creating a target-independent
3202   // constant expression and then folding it back into a ConstantInt.
3203   // This is just a compile-time optimization.
3204   return getConstant(
3205       IntTy,
3206       F.getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
3207           FieldNo));
3208 }
3209
3210 const SCEV *ScalarEvolution::getUnknown(Value *V) {
3211   // Don't attempt to do anything other than create a SCEVUnknown object
3212   // here.  createSCEV only calls getUnknown after checking for all other
3213   // interesting possibilities, and any other code that calls getUnknown
3214   // is doing so in order to hide a value from SCEV canonicalization.
3215
3216   FoldingSetNodeID ID;
3217   ID.AddInteger(scUnknown);
3218   ID.AddPointer(V);
3219   void *IP = nullptr;
3220   if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
3221     assert(cast<SCEVUnknown>(S)->getValue() == V &&
3222            "Stale SCEVUnknown in uniquing map!");
3223     return S;
3224   }
3225   SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
3226                                             FirstUnknown);
3227   FirstUnknown = cast<SCEVUnknown>(S);
3228   UniqueSCEVs.InsertNode(S, IP);
3229   return S;
3230 }
3231
3232 //===----------------------------------------------------------------------===//
3233 //            Basic SCEV Analysis and PHI Idiom Recognition Code
3234 //
3235
3236 /// isSCEVable - Test if values of the given type are analyzable within
3237 /// the SCEV framework. This primarily includes integer types, and it
3238 /// can optionally include pointer types if the ScalarEvolution class
3239 /// has access to target-specific information.
3240 bool ScalarEvolution::isSCEVable(Type *Ty) const {
3241   // Integers and pointers are always SCEVable.
3242   return Ty->isIntegerTy() || Ty->isPointerTy();
3243 }
3244
3245 /// getTypeSizeInBits - Return the size in bits of the specified type,
3246 /// for which isSCEVable must return true.
3247 uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
3248   assert(isSCEVable(Ty) && "Type is not SCEVable!");
3249   return F.getParent()->getDataLayout().getTypeSizeInBits(Ty);
3250 }
3251
3252 /// getEffectiveSCEVType - Return a type with the same bitwidth as
3253 /// the given type and which represents how SCEV will treat the given
3254 /// type, for which isSCEVable must return true. For pointer types,
3255 /// this is the pointer-sized integer type.
3256 Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
3257   assert(isSCEVable(Ty) && "Type is not SCEVable!");
3258
3259   if (Ty->isIntegerTy()) {
3260     return Ty;
3261   }
3262
3263   // The only other support type is pointer.
3264   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
3265   return F.getParent()->getDataLayout().getIntPtrType(Ty);
3266 }
3267
3268 const SCEV *ScalarEvolution::getCouldNotCompute() {
3269   return CouldNotCompute.get();
3270 }
3271
3272 namespace {
3273   // Helper class working with SCEVTraversal to figure out if a SCEV contains
3274   // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
3275   // is set iff if find such SCEVUnknown.
3276   //
3277   struct FindInvalidSCEVUnknown {
3278     bool FindOne;
3279     FindInvalidSCEVUnknown() { FindOne = false; }
3280     bool follow(const SCEV *S) {
3281       switch (static_cast<SCEVTypes>(S->getSCEVType())) {
3282       case scConstant:
3283         return false;
3284       case scUnknown:
3285         if (!cast<SCEVUnknown>(S)->getValue())
3286           FindOne = true;
3287         return false;
3288       default:
3289         return true;
3290       }
3291     }
3292     bool isDone() const { return FindOne; }
3293   };
3294 }
3295
3296 bool ScalarEvolution::checkValidity(const SCEV *S) const {
3297   FindInvalidSCEVUnknown F;
3298   SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
3299   ST.visitAll(S);
3300
3301   return !F.FindOne;
3302 }
3303
3304 /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
3305 /// expression and create a new one.
3306 const SCEV *ScalarEvolution::getSCEV(Value *V) {
3307   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
3308
3309   const SCEV *S = getExistingSCEV(V);
3310   if (S == nullptr) {
3311     S = createSCEV(V);
3312     ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
3313   }
3314   return S;
3315 }
3316
3317 const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
3318   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
3319
3320   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
3321   if (I != ValueExprMap.end()) {
3322     const SCEV *S = I->second;
3323     if (checkValidity(S))
3324       return S;
3325     ValueExprMap.erase(I);
3326   }
3327   return nullptr;
3328 }
3329
3330 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
3331 ///
3332 const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
3333                                              SCEV::NoWrapFlags Flags) {
3334   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
3335     return getConstant(
3336                cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
3337
3338   Type *Ty = V->getType();
3339   Ty = getEffectiveSCEVType(Ty);
3340   return getMulExpr(
3341       V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
3342 }
3343
3344 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
3345 const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
3346   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
3347     return getConstant(
3348                 cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
3349
3350   Type *Ty = V->getType();
3351   Ty = getEffectiveSCEVType(Ty);
3352   const SCEV *AllOnes =
3353                    getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
3354   return getMinusSCEV(AllOnes, V);
3355 }
3356
3357 /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
3358 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
3359                                           SCEV::NoWrapFlags Flags) {
3360   // Fast path: X - X --> 0.
3361   if (LHS == RHS)
3362     return getConstant(LHS->getType(), 0);
3363
3364   // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
3365   // makes it so that we cannot make much use of NUW.
3366   auto AddFlags = SCEV::FlagAnyWrap;
3367   const bool RHSIsNotMinSigned =
3368       !getSignedRange(RHS).getSignedMin().isMinSignedValue();
3369   if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
3370     // Let M be the minimum representable signed value. Then (-1)*RHS
3371     // signed-wraps if and only if RHS is M. That can happen even for
3372     // a NSW subtraction because e.g. (-1)*M signed-wraps even though
3373     // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
3374     // (-1)*RHS, we need to prove that RHS != M.
3375     //
3376     // If LHS is non-negative and we know that LHS - RHS does not
3377     // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
3378     // either by proving that RHS > M or that LHS >= 0.
3379     if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
3380       AddFlags = SCEV::FlagNSW;
3381     }
3382   }
3383
3384   // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
3385   // RHS is NSW and LHS >= 0.
3386   //
3387   // The difficulty here is that the NSW flag may have been proven
3388   // relative to a loop that is to be found in a recurrence in LHS and
3389   // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
3390   // larger scope than intended.
3391   auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
3392
3393   return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
3394 }
3395
3396 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
3397 /// input value to the specified type.  If the type must be extended, it is zero
3398 /// extended.
3399 const SCEV *
3400 ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
3401   Type *SrcTy = V->getType();
3402   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3403          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3404          "Cannot truncate or zero extend with non-integer arguments!");
3405   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3406     return V;  // No conversion
3407   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
3408     return getTruncateExpr(V, Ty);
3409   return getZeroExtendExpr(V, Ty);
3410 }
3411
3412 /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
3413 /// input value to the specified type.  If the type must be extended, it is sign
3414 /// extended.
3415 const SCEV *
3416 ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
3417                                          Type *Ty) {
3418   Type *SrcTy = V->getType();
3419   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3420          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3421          "Cannot truncate or zero extend with non-integer arguments!");
3422   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3423     return V;  // No conversion
3424   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
3425     return getTruncateExpr(V, Ty);
3426   return getSignExtendExpr(V, Ty);
3427 }
3428
3429 /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
3430 /// input value to the specified type.  If the type must be extended, it is zero
3431 /// extended.  The conversion must not be narrowing.
3432 const SCEV *
3433 ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
3434   Type *SrcTy = V->getType();
3435   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3436          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3437          "Cannot noop or zero extend with non-integer arguments!");
3438   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
3439          "getNoopOrZeroExtend cannot truncate!");
3440   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3441     return V;  // No conversion
3442   return getZeroExtendExpr(V, Ty);
3443 }
3444
3445 /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
3446 /// input value to the specified type.  If the type must be extended, it is sign
3447 /// extended.  The conversion must not be narrowing.
3448 const SCEV *
3449 ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
3450   Type *SrcTy = V->getType();
3451   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3452          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3453          "Cannot noop or sign extend with non-integer arguments!");
3454   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
3455          "getNoopOrSignExtend cannot truncate!");
3456   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3457     return V;  // No conversion
3458   return getSignExtendExpr(V, Ty);
3459 }
3460
3461 /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
3462 /// the input value to the specified type. If the type must be extended,
3463 /// it is extended with unspecified bits. The conversion must not be
3464 /// narrowing.
3465 const SCEV *
3466 ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
3467   Type *SrcTy = V->getType();
3468   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3469          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3470          "Cannot noop or any extend with non-integer arguments!");
3471   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
3472          "getNoopOrAnyExtend cannot truncate!");
3473   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3474     return V;  // No conversion
3475   return getAnyExtendExpr(V, Ty);
3476 }
3477
3478 /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
3479 /// input value to the specified type.  The conversion must not be widening.
3480 const SCEV *
3481 ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
3482   Type *SrcTy = V->getType();
3483   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
3484          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
3485          "Cannot truncate or noop with non-integer arguments!");
3486   assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
3487          "getTruncateOrNoop cannot extend!");
3488   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
3489     return V;  // No conversion
3490   return getTruncateExpr(V, Ty);
3491 }
3492
3493 /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
3494 /// the types using zero-extension, and then perform a umax operation
3495 /// with them.
3496 const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
3497                                                         const SCEV *RHS) {
3498   const SCEV *PromotedLHS = LHS;
3499   const SCEV *PromotedRHS = RHS;
3500
3501   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
3502     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
3503   else
3504     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
3505
3506   return getUMaxExpr(PromotedLHS, PromotedRHS);
3507 }
3508
3509 /// getUMinFromMismatchedTypes - Promote the operands to the wider of
3510 /// the types using zero-extension, and then perform a umin operation
3511 /// with them.
3512 const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
3513                                                         const SCEV *RHS) {
3514   const SCEV *PromotedLHS = LHS;
3515   const SCEV *PromotedRHS = RHS;
3516
3517   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
3518     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
3519   else
3520     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
3521
3522   return getUMinExpr(PromotedLHS, PromotedRHS);
3523 }
3524
3525 /// getPointerBase - Transitively follow the chain of pointer-type operands
3526 /// until reaching a SCEV that does not have a single pointer operand. This
3527 /// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
3528 /// but corner cases do exist.
3529 const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
3530   // A pointer operand may evaluate to a nonpointer expression, such as null.
3531   if (!V->getType()->isPointerTy())
3532     return V;
3533
3534   if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
3535     return getPointerBase(Cast->getOperand());
3536   }
3537   else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
3538     const SCEV *PtrOp = nullptr;
3539     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
3540          I != E; ++I) {
3541       if ((*I)->getType()->isPointerTy()) {
3542         // Cannot find the base of an expression with multiple pointer operands.
3543         if (PtrOp)
3544           return V;
3545         PtrOp = *I;
3546       }
3547     }
3548     if (!PtrOp)
3549       return V;
3550     return getPointerBase(PtrOp);
3551   }
3552   return V;
3553 }
3554
3555 /// PushDefUseChildren - Push users of the given Instruction
3556 /// onto the given Worklist.
3557 static void
3558 PushDefUseChildren(Instruction *I,
3559                    SmallVectorImpl<Instruction *> &Worklist) {
3560   // Push the def-use children onto the Worklist stack.
3561   for (User *U : I->users())
3562     Worklist.push_back(cast<Instruction>(U));
3563 }
3564
3565 /// ForgetSymbolicValue - This looks up computed SCEV values for all
3566 /// instructions that depend on the given instruction and removes them from
3567 /// the ValueExprMapType map if they reference SymName. This is used during PHI
3568 /// resolution.
3569 void
3570 ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
3571   SmallVector<Instruction *, 16> Worklist;
3572   PushDefUseChildren(PN, Worklist);
3573
3574   SmallPtrSet<Instruction *, 8> Visited;
3575   Visited.insert(PN);
3576   while (!Worklist.empty()) {
3577     Instruction *I = Worklist.pop_back_val();
3578     if (!Visited.insert(I).second)
3579       continue;
3580
3581     ValueExprMapType::iterator It =
3582       ValueExprMap.find_as(static_cast<Value *>(I));
3583     if (It != ValueExprMap.end()) {
3584       const SCEV *Old = It->second;
3585
3586       // Short-circuit the def-use traversal if the symbolic name
3587       // ceases to appear in expressions.
3588       if (Old != SymName && !hasOperand(Old, SymName))
3589         continue;
3590
3591       // SCEVUnknown for a PHI either means that it has an unrecognized
3592       // structure, it's a PHI that's in the progress of being computed
3593       // by createNodeForPHI, or it's a single-value PHI. In the first case,
3594       // additional loop trip count information isn't going to change anything.
3595       // In the second case, createNodeForPHI will perform the necessary
3596       // updates on its own when it gets to that point. In the third, we do
3597       // want to forget the SCEVUnknown.
3598       if (!isa<PHINode>(I) ||
3599           !isa<SCEVUnknown>(Old) ||
3600           (I != PN && Old == SymName)) {
3601         forgetMemoizedResults(Old);
3602         ValueExprMap.erase(It);
3603       }
3604     }
3605
3606     PushDefUseChildren(I, Worklist);
3607   }
3608 }
3609
3610 /// createNodeForPHI - PHI nodes have two cases.  Either the PHI node exists in
3611 /// a loop header, making it a potential recurrence, or it doesn't.
3612 ///
3613 const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
3614   if (const Loop *L = LI.getLoopFor(PN->getParent()))
3615     if (L->getHeader() == PN->getParent()) {
3616       // The loop may have multiple entrances or multiple exits; we can analyze
3617       // this phi as an addrec if it has a unique entry value and a unique
3618       // backedge value.
3619       Value *BEValueV = nullptr, *StartValueV = nullptr;
3620       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
3621         Value *V = PN->getIncomingValue(i);
3622         if (L->contains(PN->getIncomingBlock(i))) {
3623           if (!BEValueV) {
3624             BEValueV = V;
3625           } else if (BEValueV != V) {
3626             BEValueV = nullptr;
3627             break;
3628           }
3629         } else if (!StartValueV) {
3630           StartValueV = V;
3631         } else if (StartValueV != V) {
3632           StartValueV = nullptr;
3633           break;
3634         }
3635       }
3636       if (BEValueV && StartValueV) {
3637         // While we are analyzing this PHI node, handle its value symbolically.
3638         const SCEV *SymbolicName = getUnknown(PN);
3639         assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
3640                "PHI node already processed?");
3641         ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
3642
3643         // Using this symbolic name for the PHI, analyze the value coming around
3644         // the back-edge.
3645         const SCEV *BEValue = getSCEV(BEValueV);
3646
3647         // NOTE: If BEValue is loop invariant, we know that the PHI node just
3648         // has a special value for the first iteration of the loop.
3649
3650         // If the value coming around the backedge is an add with the symbolic
3651         // value we just inserted, then we found a simple induction variable!
3652         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
3653           // If there is a single occurrence of the symbolic value, replace it
3654           // with a recurrence.
3655           unsigned FoundIndex = Add->getNumOperands();
3656           for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
3657             if (Add->getOperand(i) == SymbolicName)
3658               if (FoundIndex == e) {
3659                 FoundIndex = i;
3660                 break;
3661               }
3662
3663           if (FoundIndex != Add->getNumOperands()) {
3664             // Create an add with everything but the specified operand.
3665             SmallVector<const SCEV *, 8> Ops;
3666             for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
3667               if (i != FoundIndex)
3668                 Ops.push_back(Add->getOperand(i));
3669             const SCEV *Accum = getAddExpr(Ops);
3670
3671             // This is not a valid addrec if the step amount is varying each
3672             // loop iteration, but is not itself an addrec in this loop.
3673             if (isLoopInvariant(Accum, L) ||
3674                 (isa<SCEVAddRecExpr>(Accum) &&
3675                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
3676               SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
3677
3678               // If the increment doesn't overflow, then neither the addrec nor
3679               // the post-increment will overflow.
3680               if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
3681                 if (OBO->getOperand(0) == PN) {
3682                   if (OBO->hasNoUnsignedWrap())
3683                     Flags = setFlags(Flags, SCEV::FlagNUW);
3684                   if (OBO->hasNoSignedWrap())
3685                     Flags = setFlags(Flags, SCEV::FlagNSW);
3686                 }
3687               } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
3688                 // If the increment is an inbounds GEP, then we know the address
3689                 // space cannot be wrapped around. We cannot make any guarantee
3690                 // about signed or unsigned overflow because pointers are
3691                 // unsigned but we may have a negative index from the base
3692                 // pointer. We can guarantee that no unsigned wrap occurs if the
3693                 // indices form a positive value.
3694                 if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
3695                   Flags = setFlags(Flags, SCEV::FlagNW);
3696
3697                   const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
3698                   if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
3699                     Flags = setFlags(Flags, SCEV::FlagNUW);
3700                 }
3701
3702                 // We cannot transfer nuw and nsw flags from subtraction
3703                 // operations -- sub nuw X, Y is not the same as add nuw X, -Y
3704                 // for instance.
3705               }
3706
3707               const SCEV *StartVal = getSCEV(StartValueV);
3708               const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
3709
3710               // Since the no-wrap flags are on the increment, they apply to the
3711               // post-incremented value as well.
3712               if (isLoopInvariant(Accum, L))
3713                 (void)getAddRecExpr(getAddExpr(StartVal, Accum),
3714                                     Accum, L, Flags);
3715
3716               // Okay, for the entire analysis of this edge we assumed the PHI
3717               // to be symbolic.  We now need to go back and purge all of the
3718               // entries for the scalars that use the symbolic expression.
3719               ForgetSymbolicName(PN, SymbolicName);
3720               ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
3721               return PHISCEV;
3722             }
3723           }
3724         } else if (const SCEVAddRecExpr *AddRec =
3725                      dyn_cast<SCEVAddRecExpr>(BEValue)) {
3726           // Otherwise, this could be a loop like this:
3727           //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
3728           // In this case, j = {1,+,1}  and BEValue is j.
3729           // Because the other in-value of i (0) fits the evolution of BEValue
3730           // i really is an addrec evolution.
3731           if (AddRec->getLoop() == L && AddRec->isAffine()) {
3732             const SCEV *StartVal = getSCEV(StartValueV);
3733
3734             // If StartVal = j.start - j.stride, we can use StartVal as the
3735             // initial step of the addrec evolution.
3736             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
3737                                          AddRec->getOperand(1))) {
3738               // FIXME: For constant StartVal, we should be able to infer
3739               // no-wrap flags.
3740               const SCEV *PHISCEV =
3741                 getAddRecExpr(StartVal, AddRec->getOperand(1), L,
3742                               SCEV::FlagAnyWrap);
3743
3744               // Okay, for the entire analysis of this edge we assumed the PHI
3745               // to be symbolic.  We now need to go back and purge all of the
3746               // entries for the scalars that use the symbolic expression.
3747               ForgetSymbolicName(PN, SymbolicName);
3748               ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
3749               return PHISCEV;
3750             }
3751           }
3752         }
3753       }
3754     }
3755
3756   // If the PHI has a single incoming value, follow that value, unless the
3757   // PHI's incoming blocks are in a different loop, in which case doing so
3758   // risks breaking LCSSA form. Instcombine would normally zap these, but
3759   // it doesn't have DominatorTree information, so it may miss cases.
3760   if (Value *V = SimplifyInstruction(PN, F.getParent()->getDataLayout(), &TLI,
3761                                      &DT, &AC))
3762     if (LI.replacementPreservesLCSSAForm(PN, V))
3763       return getSCEV(V);
3764
3765   // If it's not a loop phi, we can't handle it yet.
3766   return getUnknown(PN);
3767 }
3768
3769 /// createNodeForGEP - Expand GEP instructions into add and multiply
3770 /// operations. This allows them to be analyzed by regular SCEV code.
3771 ///
3772 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
3773   Value *Base = GEP->getOperand(0);
3774   // Don't attempt to analyze GEPs over unsized objects.
3775   if (!Base->getType()->getPointerElementType()->isSized())
3776     return getUnknown(GEP);
3777
3778   SmallVector<const SCEV *, 4> IndexExprs;
3779   for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
3780     IndexExprs.push_back(getSCEV(*Index));
3781   return getGEPExpr(GEP->getSourceElementType(), getSCEV(Base), IndexExprs,
3782                     GEP->isInBounds());
3783 }
3784
3785 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
3786 /// guaranteed to end in (at every loop iteration).  It is, at the same time,
3787 /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
3788 /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
3789 uint32_t
3790 ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
3791   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
3792     return C->getValue()->getValue().countTrailingZeros();
3793
3794   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
3795     return std::min(GetMinTrailingZeros(T->getOperand()),
3796                     (uint32_t)getTypeSizeInBits(T->getType()));
3797
3798   if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
3799     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
3800     return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
3801              getTypeSizeInBits(E->getType()) : OpRes;
3802   }
3803
3804   if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
3805     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
3806     return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
3807              getTypeSizeInBits(E->getType()) : OpRes;
3808   }
3809
3810   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
3811     // The result is the min of all operands results.
3812     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
3813     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
3814       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
3815     return MinOpRes;
3816   }
3817
3818   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
3819     // The result is the sum of all operands results.
3820     uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
3821     uint32_t BitWidth = getTypeSizeInBits(M->getType());
3822     for (unsigned i = 1, e = M->getNumOperands();
3823          SumOpRes != BitWidth && i != e; ++i)
3824       SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
3825                           BitWidth);
3826     return SumOpRes;
3827   }
3828
3829   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
3830     // The result is the min of all operands results.
3831     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
3832     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
3833       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
3834     return MinOpRes;
3835   }
3836
3837   if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
3838     // The result is the min of all operands results.
3839     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
3840     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
3841       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
3842     return MinOpRes;
3843   }
3844
3845   if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
3846     // The result is the min of all operands results.
3847     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
3848     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
3849       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
3850     return MinOpRes;
3851   }
3852
3853   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
3854     // For a SCEVUnknown, ask ValueTracking.
3855     unsigned BitWidth = getTypeSizeInBits(U->getType());
3856     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
3857     computeKnownBits(U->getValue(), Zeros, Ones, F.getParent()->getDataLayout(),
3858                      0, &AC, nullptr, &DT);
3859     return Zeros.countTrailingOnes();
3860   }
3861
3862   // SCEVUDivExpr
3863   return 0;
3864 }
3865
3866 /// GetRangeFromMetadata - Helper method to assign a range to V from
3867 /// metadata present in the IR.
3868 static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
3869   if (Instruction *I = dyn_cast<Instruction>(V)) {
3870     if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) {
3871       ConstantRange TotalRange(
3872           cast<IntegerType>(I->getType())->getBitWidth(), false);
3873
3874       unsigned NumRanges = MD->getNumOperands() / 2;
3875       assert(NumRanges >= 1);
3876
3877       for (unsigned i = 0; i < NumRanges; ++i) {
3878         ConstantInt *Lower =
3879             mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0));
3880         ConstantInt *Upper =
3881             mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1));
3882         ConstantRange Range(Lower->getValue(), Upper->getValue());
3883         TotalRange = TotalRange.unionWith(Range);
3884       }
3885
3886       return TotalRange;
3887     }
3888   }
3889
3890   return None;
3891 }
3892
3893 /// getRange - Determine the range for a particular SCEV.  If SignHint is
3894 /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
3895 /// with a "cleaner" unsigned (resp. signed) representation.
3896 ///
3897 ConstantRange
3898 ScalarEvolution::getRange(const SCEV *S,
3899                           ScalarEvolution::RangeSignHint SignHint) {
3900   DenseMap<const SCEV *, ConstantRange> &Cache =
3901       SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
3902                                                        : SignedRanges;
3903
3904   // See if we've computed this range already.
3905   DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
3906   if (I != Cache.end())
3907     return I->second;
3908
3909   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
3910     return setRange(C, SignHint, ConstantRange(C->getValue()->getValue()));
3911
3912   unsigned BitWidth = getTypeSizeInBits(S->getType());
3913   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
3914
3915   // If the value has known zeros, the maximum value will have those known zeros
3916   // as well.
3917   uint32_t TZ = GetMinTrailingZeros(S);
3918   if (TZ != 0) {
3919     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
3920       ConservativeResult =
3921           ConstantRange(APInt::getMinValue(BitWidth),
3922                         APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
3923     else
3924       ConservativeResult = ConstantRange(
3925           APInt::getSignedMinValue(BitWidth),
3926           APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
3927   }
3928
3929   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3930     ConstantRange X = getRange(Add->getOperand(0), SignHint);
3931     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
3932       X = X.add(getRange(Add->getOperand(i), SignHint));
3933     return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
3934   }
3935
3936   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3937     ConstantRange X = getRange(Mul->getOperand(0), SignHint);
3938     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
3939       X = X.multiply(getRange(Mul->getOperand(i), SignHint));
3940     return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
3941   }
3942
3943   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
3944     ConstantRange X = getRange(SMax->getOperand(0), SignHint);
3945     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
3946       X = X.smax(getRange(SMax->getOperand(i), SignHint));
3947     return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
3948   }
3949
3950   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
3951     ConstantRange X = getRange(UMax->getOperand(0), SignHint);
3952     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
3953       X = X.umax(getRange(UMax->getOperand(i), SignHint));
3954     return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
3955   }
3956
3957   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
3958     ConstantRange X = getRange(UDiv->getLHS(), SignHint);
3959     ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
3960     return setRange(UDiv, SignHint,
3961                     ConservativeResult.intersectWith(X.udiv(Y)));
3962   }
3963
3964   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
3965     ConstantRange X = getRange(ZExt->getOperand(), SignHint);
3966     return setRange(ZExt, SignHint,
3967                     ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
3968   }
3969
3970   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
3971     ConstantRange X = getRange(SExt->getOperand(), SignHint);
3972     return setRange(SExt, SignHint,
3973                     ConservativeResult.intersectWith(X.signExtend(BitWidth)));
3974   }
3975
3976   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
3977     ConstantRange X = getRange(Trunc->getOperand(), SignHint);
3978     return setRange(Trunc, SignHint,
3979                     ConservativeResult.intersectWith(X.truncate(BitWidth)));
3980   }
3981
3982   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
3983     // If there's no unsigned wrap, the value will never be less than its
3984     // initial value.
3985     if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
3986       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
3987         if (!C->getValue()->isZero())
3988           ConservativeResult =
3989             ConservativeResult.intersectWith(
3990               ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
3991
3992     // If there's no signed wrap, and all the operands have the same sign or
3993     // zero, the value won't ever change sign.
3994     if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
3995       bool AllNonNeg = true;
3996       bool AllNonPos = true;
3997       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
3998         if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
3999         if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
4000       }
4001       if (AllNonNeg)
4002         ConservativeResult = ConservativeResult.intersectWith(
4003           ConstantRange(APInt(BitWidth, 0),
4004                         APInt::getSignedMinValue(BitWidth)));
4005       else if (AllNonPos)
4006         ConservativeResult = ConservativeResult.intersectWith(
4007           ConstantRange(APInt::getSignedMinValue(BitWidth),
4008                         APInt(BitWidth, 1)));
4009     }
4010
4011     // TODO: non-affine addrec
4012     if (AddRec->isAffine()) {
4013       Type *Ty = AddRec->getType();
4014       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
4015       if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
4016           getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
4017
4018         // Check for overflow.  This must be done with ConstantRange arithmetic
4019         // because we could be called from within the ScalarEvolution overflow
4020         // checking code.
4021
4022         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
4023         ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
4024         ConstantRange ZExtMaxBECountRange =
4025             MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
4026
4027         const SCEV *Start = AddRec->getStart();
4028         const SCEV *Step = AddRec->getStepRecurrence(*this);
4029         ConstantRange StepSRange = getSignedRange(Step);
4030         ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
4031
4032         ConstantRange StartURange = getUnsignedRange(Start);
4033         ConstantRange EndURange =
4034             StartURange.add(MaxBECountRange.multiply(StepSRange));
4035
4036         // Check for unsigned overflow.
4037         ConstantRange ZExtStartURange =
4038             StartURange.zextOrTrunc(BitWidth * 2 + 1);
4039         ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
4040         if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
4041             ZExtEndURange) {
4042           APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
4043                                      EndURange.getUnsignedMin());
4044           APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
4045                                      EndURange.getUnsignedMax());
4046           bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
4047           if (!IsFullRange)
4048             ConservativeResult =
4049                 ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
4050         }
4051
4052         ConstantRange StartSRange = getSignedRange(Start);
4053         ConstantRange EndSRange =
4054             StartSRange.add(MaxBECountRange.multiply(StepSRange));
4055
4056         // Check for signed overflow. This must be done with ConstantRange
4057         // arithmetic because we could be called from within the ScalarEvolution
4058         // overflow checking code.
4059         ConstantRange SExtStartSRange =
4060             StartSRange.sextOrTrunc(BitWidth * 2 + 1);
4061         ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
4062         if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
4063             SExtEndSRange) {
4064           APInt Min = APIntOps::smin(StartSRange.getSignedMin(),
4065                                      EndSRange.getSignedMin());
4066           APInt Max = APIntOps::smax(StartSRange.getSignedMax(),
4067                                      EndSRange.getSignedMax());
4068           bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
4069           if (!IsFullRange)
4070             ConservativeResult =
4071                 ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
4072         }
4073       }
4074     }
4075
4076     return setRange(AddRec, SignHint, ConservativeResult);
4077   }
4078
4079   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
4080     // Check if the IR explicitly contains !range metadata.
4081     Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
4082     if (MDRange.hasValue())
4083       ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
4084
4085     // Split here to avoid paying the compile-time cost of calling both
4086     // computeKnownBits and ComputeNumSignBits.  This restriction can be lifted
4087     // if needed.
4088     const DataLayout &DL = F.getParent()->getDataLayout();
4089     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
4090       // For a SCEVUnknown, ask ValueTracking.
4091       APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
4092       computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);
4093       if (Ones != ~Zeros + 1)
4094         ConservativeResult =
4095             ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
4096     } else {
4097       assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
4098              "generalize as needed!");
4099       unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
4100       if (NS > 1)
4101         ConservativeResult = ConservativeResult.intersectWith(
4102             ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
4103                           APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
4104     }
4105
4106     return setRange(U, SignHint, ConservativeResult);
4107   }
4108
4109   return setRange(S, SignHint, ConservativeResult);
4110 }
4111
4112 SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
4113   if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
4114   const BinaryOperator *BinOp = cast<BinaryOperator>(V);
4115
4116   // Return early if there are no flags to propagate to the SCEV.
4117   SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
4118   if (BinOp->hasNoUnsignedWrap())
4119     Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
4120   if (BinOp->hasNoSignedWrap())
4121     Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
4122   if (Flags == SCEV::FlagAnyWrap) {
4123     return SCEV::FlagAnyWrap;
4124   }
4125
4126   // Here we check that BinOp is in the header of the innermost loop
4127   // containing BinOp, since we only deal with instructions in the loop
4128   // header. The actual loop we need to check later will come from an add
4129   // recurrence, but getting that requires computing the SCEV of the operands,
4130   // which can be expensive. This check we can do cheaply to rule out some
4131   // cases early.
4132   Loop *innermostContainingLoop = LI.getLoopFor(BinOp->getParent());
4133   if (innermostContainingLoop == nullptr ||
4134       innermostContainingLoop->getHeader() != BinOp->getParent())
4135     return SCEV::FlagAnyWrap;
4136
4137   // Only proceed if we can prove that BinOp does not yield poison.
4138   if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap;
4139
4140   // At this point we know that if V is executed, then it does not wrap
4141   // according to at least one of NSW or NUW. If V is not executed, then we do
4142   // not know if the calculation that V represents would wrap. Multiple
4143   // instructions can map to the same SCEV. If we apply NSW or NUW from V to
4144   // the SCEV, we must guarantee no wrapping for that SCEV also when it is
4145   // derived from other instructions that map to the same SCEV. We cannot make
4146   // that guarantee for cases where V is not executed. So we need to find the
4147   // loop that V is considered in relation to and prove that V is executed for
4148   // every iteration of that loop. That implies that the value that V
4149   // calculates does not wrap anywhere in the loop, so then we can apply the
4150   // flags to the SCEV.
4151   //
4152   // We check isLoopInvariant to disambiguate in case we are adding two
4153   // recurrences from different loops, so that we know which loop to prove
4154   // that V is executed in.
4155   for (int OpIndex = 0; OpIndex < 2; ++OpIndex) {
4156     const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex));
4157     if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
4158       const int OtherOpIndex = 1 - OpIndex;
4159       const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex));
4160       if (isLoopInvariant(OtherOp, AddRec->getLoop()) &&
4161           isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop()))
4162         return Flags;
4163     }
4164   }
4165   return SCEV::FlagAnyWrap;
4166 }
4167
4168 /// createSCEV - We know that there is no SCEV for the specified value.  Analyze
4169 /// the expression.
4170 ///
4171 const SCEV *ScalarEvolution::createSCEV(Value *V) {
4172   if (!isSCEVable(V->getType()))
4173     return getUnknown(V);
4174
4175   unsigned Opcode = Instruction::UserOp1;
4176   if (Instruction *I = dyn_cast<Instruction>(V)) {
4177     Opcode = I->getOpcode();
4178
4179     // Don't attempt to analyze instructions in blocks that aren't
4180     // reachable. Such instructions don't matter, and they aren't required
4181     // to obey basic rules for definitions dominating uses which this
4182     // analysis depends on.
4183     if (!DT.isReachableFromEntry(I->getParent()))
4184       return getUnknown(V);
4185   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
4186     Opcode = CE->getOpcode();
4187   else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
4188     return getConstant(CI);
4189   else if (isa<ConstantPointerNull>(V))
4190     return getConstant(V->getType(), 0);
4191   else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
4192     return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
4193   else
4194     return getUnknown(V);
4195
4196   Operator *U = cast<Operator>(V);
4197   switch (Opcode) {
4198   case Instruction::Add: {
4199     // The simple thing to do would be to just call getSCEV on both operands
4200     // and call getAddExpr with the result. However if we're looking at a
4201     // bunch of things all added together, this can be quite inefficient,
4202     // because it leads to N-1 getAddExpr calls for N ultimate operands.
4203     // Instead, gather up all the operands and make a single getAddExpr call.
4204     // LLVM IR canonical form means we need only traverse the left operands.
4205     SmallVector<const SCEV *, 4> AddOps;
4206     for (Value *Op = U;; Op = U->getOperand(0)) {
4207       U = dyn_cast<Operator>(Op);
4208       unsigned Opcode = U ? U->getOpcode() : 0;
4209       if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) {
4210         assert(Op != V && "V should be an add");
4211         AddOps.push_back(getSCEV(Op));
4212         break;
4213       }
4214
4215       if (auto *OpSCEV = getExistingSCEV(U)) {
4216         AddOps.push_back(OpSCEV);
4217         break;
4218       }
4219
4220       // If a NUW or NSW flag can be applied to the SCEV for this
4221       // addition, then compute the SCEV for this addition by itself
4222       // with a separate call to getAddExpr. We need to do that
4223       // instead of pushing the operands of the addition onto AddOps,
4224       // since the flags are only known to apply to this particular
4225       // addition - they may not apply to other additions that can be
4226       // formed with operands from AddOps.
4227       const SCEV *RHS = getSCEV(U->getOperand(1));
4228       SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
4229       if (Flags != SCEV::FlagAnyWrap) {
4230         const SCEV *LHS = getSCEV(U->getOperand(0));
4231         if (Opcode == Instruction::Sub)
4232           AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
4233         else
4234           AddOps.push_back(getAddExpr(LHS, RHS, Flags));
4235         break;
4236       }
4237
4238       if (Opcode == Instruction::Sub)
4239         AddOps.push_back(getNegativeSCEV(RHS));
4240       else
4241         AddOps.push_back(RHS);
4242     }
4243     return getAddExpr(AddOps);
4244   }
4245
4246   case Instruction::Mul: {
4247     SmallVector<const SCEV *, 4> MulOps;
4248     for (Value *Op = U;; Op = U->getOperand(0)) {
4249       U = dyn_cast<Operator>(Op);
4250       if (!U || U->getOpcode() != Instruction::Mul) {
4251         assert(Op != V && "V should be a mul");
4252         MulOps.push_back(getSCEV(Op));
4253         break;
4254       }
4255
4256       if (auto *OpSCEV = getExistingSCEV(U)) {
4257         MulOps.push_back(OpSCEV);
4258         break;
4259       }
4260
4261       SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
4262       if (Flags != SCEV::FlagAnyWrap) {
4263         MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
4264                                     getSCEV(U->getOperand(1)), Flags));
4265         break;
4266       }
4267
4268       MulOps.push_back(getSCEV(U->getOperand(1)));
4269     }
4270     return getMulExpr(MulOps);
4271   }
4272   case Instruction::UDiv:
4273     return getUDivExpr(getSCEV(U->getOperand(0)),
4274                        getSCEV(U->getOperand(1)));
4275   case Instruction::Sub:
4276     return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
4277                         getNoWrapFlagsFromUB(U));
4278   case Instruction::And:
4279     // For an expression like x&255 that merely masks off the high bits,
4280     // use zext(trunc(x)) as the SCEV expression.
4281     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
4282       if (CI->isNullValue())
4283         return getSCEV(U->getOperand(1));
4284       if (CI->isAllOnesValue())
4285         return getSCEV(U->getOperand(0));
4286       const APInt &A = CI->getValue();
4287
4288       // Instcombine's ShrinkDemandedConstant may strip bits out of
4289       // constants, obscuring what would otherwise be a low-bits mask.
4290       // Use computeKnownBits to compute what ShrinkDemandedConstant
4291       // knew about to reconstruct a low-bits mask value.
4292       unsigned LZ = A.countLeadingZeros();
4293       unsigned TZ = A.countTrailingZeros();
4294       unsigned BitWidth = A.getBitWidth();
4295       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
4296       computeKnownBits(U->getOperand(0), KnownZero, KnownOne,
4297                        F.getParent()->getDataLayout(), 0, &AC, nullptr, &DT);
4298
4299       APInt EffectiveMask =
4300           APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
4301       if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
4302         const SCEV *MulCount = getConstant(
4303             ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ)));
4304         return getMulExpr(
4305             getZeroExtendExpr(
4306                 getTruncateExpr(
4307                     getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount),
4308                     IntegerType::get(getContext(), BitWidth - LZ - TZ)),
4309                 U->getType()),
4310             MulCount);
4311       }
4312     }
4313     break;
4314
4315   case Instruction::Or:
4316     // If the RHS of the Or is a constant, we may have something like:
4317     // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
4318     // optimizations will transparently handle this case.
4319     //
4320     // In order for this transformation to be safe, the LHS must be of the
4321     // form X*(2^n) and the Or constant must be less than 2^n.
4322     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
4323       const SCEV *LHS = getSCEV(U->getOperand(0));
4324       const APInt &CIVal = CI->getValue();
4325       if (GetMinTrailingZeros(LHS) >=
4326           (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
4327         // Build a plain add SCEV.
4328         const SCEV *S = getAddExpr(LHS, getSCEV(CI));
4329         // If the LHS of the add was an addrec and it has no-wrap flags,
4330         // transfer the no-wrap flags, since an or won't introduce a wrap.
4331         if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
4332           const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
4333           const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
4334             OldAR->getNoWrapFlags());
4335         }
4336         return S;
4337       }
4338     }
4339     break;
4340   case Instruction::Xor:
4341     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
4342       // If the RHS of the xor is a signbit, then this is just an add.
4343       // Instcombine turns add of signbit into xor as a strength reduction step.
4344       if (CI->getValue().isSignBit())
4345         return getAddExpr(getSCEV(U->getOperand(0)),
4346                           getSCEV(U->getOperand(1)));
4347
4348       // If the RHS of xor is -1, then this is a not operation.
4349       if (CI->isAllOnesValue())
4350         return getNotSCEV(getSCEV(U->getOperand(0)));
4351
4352       // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
4353       // This is a variant of the check for xor with -1, and it handles
4354       // the case where instcombine has trimmed non-demanded bits out
4355       // of an xor with -1.
4356       if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
4357         if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
4358           if (BO->getOpcode() == Instruction::And &&
4359               LCI->getValue() == CI->getValue())
4360             if (const SCEVZeroExtendExpr *Z =
4361                   dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
4362               Type *UTy = U->getType();
4363               const SCEV *Z0 = Z->getOperand();
4364               Type *Z0Ty = Z0->getType();
4365               unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
4366
4367               // If C is a low-bits mask, the zero extend is serving to
4368               // mask off the high bits. Complement the operand and
4369               // re-apply the zext.
4370               if (APIntOps::isMask(Z0TySize, CI->getValue()))
4371                 return getZeroExtendExpr(getNotSCEV(Z0), UTy);
4372
4373               // If C is a single bit, it may be in the sign-bit position
4374               // before the zero-extend. In this case, represent the xor
4375               // using an add, which is equivalent, and re-apply the zext.
4376               APInt Trunc = CI->getValue().trunc(Z0TySize);
4377               if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
4378                   Trunc.isSignBit())
4379                 return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
4380                                          UTy);
4381             }
4382     }
4383     break;
4384
4385   case Instruction::Shl:
4386     // Turn shift left of a constant amount into a multiply.
4387     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
4388       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
4389
4390       // If the shift count is not less than the bitwidth, the result of
4391       // the shift is undefined. Don't try to analyze it, because the
4392       // resolution chosen here may differ from the resolution chosen in
4393       // other parts of the compiler.
4394       if (SA->getValue().uge(BitWidth))
4395         break;
4396
4397       // It is currently not resolved how to interpret NSW for left
4398       // shift by BitWidth - 1, so we avoid applying flags in that
4399       // case. Remove this check (or this comment) once the situation
4400       // is resolved. See
4401       // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
4402       // and http://reviews.llvm.org/D8890 .
4403       auto Flags = SCEV::FlagAnyWrap;
4404       if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
4405
4406       Constant *X = ConstantInt::get(getContext(),
4407         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
4408       return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
4409     }
4410     break;
4411
4412   case Instruction::LShr:
4413     // Turn logical shift right of a constant into a unsigned divide.
4414     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
4415       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
4416
4417       // If the shift count is not less than the bitwidth, the result of
4418       // the shift is undefined. Don't try to analyze it, because the
4419       // resolution chosen here may differ from the resolution chosen in
4420       // other parts of the compiler.
4421       if (SA->getValue().uge(BitWidth))
4422         break;
4423
4424       Constant *X = ConstantInt::get(getContext(),
4425         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
4426       return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
4427     }
4428     break;
4429
4430   case Instruction::AShr:
4431     // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
4432     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
4433       if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
4434         if (L->getOpcode() == Instruction::Shl &&
4435             L->getOperand(1) == U->getOperand(1)) {
4436           uint64_t BitWidth = getTypeSizeInBits(U->getType());
4437
4438           // If the shift count is not less than the bitwidth, the result of
4439           // the shift is undefined. Don't try to analyze it, because the
4440           // resolution chosen here may differ from the resolution chosen in
4441           // other parts of the compiler.
4442           if (CI->getValue().uge(BitWidth))
4443             break;
4444
4445           uint64_t Amt = BitWidth - CI->getZExtValue();
4446           if (Amt == BitWidth)
4447             return getSCEV(L->getOperand(0));       // shift by zero --> noop
4448           return
4449             getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
4450                                               IntegerType::get(getContext(),
4451                                                                Amt)),
4452                               U->getType());
4453         }
4454     break;
4455
4456   case Instruction::Trunc:
4457     return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
4458
4459   case Instruction::ZExt:
4460     return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
4461
4462   case Instruction::SExt:
4463     return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
4464
4465   case Instruction::BitCast:
4466     // BitCasts are no-op casts so we just eliminate the cast.
4467     if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
4468       return getSCEV(U->getOperand(0));
4469     break;
4470
4471   // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
4472   // lead to pointer expressions which cannot safely be expanded to GEPs,
4473   // because ScalarEvolution doesn't respect the GEP aliasing rules when
4474   // simplifying integer expressions.
4475
4476   case Instruction::GetElementPtr:
4477     return createNodeForGEP(cast<GEPOperator>(U));
4478
4479   case Instruction::PHI:
4480     return createNodeForPHI(cast<PHINode>(U));
4481
4482   case Instruction::Select:
4483     // This could be a smax or umax that was lowered earlier.
4484     // Try to recover it.
4485     if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
4486       Value *LHS = ICI->getOperand(0);
4487       Value *RHS = ICI->getOperand(1);
4488       switch (ICI->getPredicate()) {
4489       case ICmpInst::ICMP_SLT:
4490       case ICmpInst::ICMP_SLE:
4491         std::swap(LHS, RHS);
4492         // fall through
4493       case ICmpInst::ICMP_SGT:
4494       case ICmpInst::ICMP_SGE:
4495         // a >s b ? a+x : b+x  ->  smax(a, b)+x
4496         // a >s b ? b+x : a+x  ->  smin(a, b)+x
4497         if (getTypeSizeInBits(LHS->getType()) <=
4498             getTypeSizeInBits(U->getType())) {
4499           const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType());
4500           const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType());
4501           const SCEV *LA = getSCEV(U->getOperand(1));
4502           const SCEV *RA = getSCEV(U->getOperand(2));
4503           const SCEV *LDiff = getMinusSCEV(LA, LS);
4504           const SCEV *RDiff = getMinusSCEV(RA, RS);
4505           if (LDiff == RDiff)
4506             return getAddExpr(getSMaxExpr(LS, RS), LDiff);
4507           LDiff = getMinusSCEV(LA, RS);
4508           RDiff = getMinusSCEV(RA, LS);
4509           if (LDiff == RDiff)
4510             return getAddExpr(getSMinExpr(LS, RS), LDiff);
4511         }
4512         break;
4513       case ICmpInst::ICMP_ULT:
4514       case ICmpInst::ICMP_ULE:
4515         std::swap(LHS, RHS);
4516         // fall through
4517       case ICmpInst::ICMP_UGT:
4518       case ICmpInst::ICMP_UGE:
4519         // a >u b ? a+x : b+x  ->  umax(a, b)+x
4520         // a >u b ? b+x : a+x  ->  umin(a, b)+x
4521         if (getTypeSizeInBits(LHS->getType()) <=
4522             getTypeSizeInBits(U->getType())) {
4523           const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
4524           const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType());
4525           const SCEV *LA = getSCEV(U->getOperand(1));
4526           const SCEV *RA = getSCEV(U->getOperand(2));
4527           const SCEV *LDiff = getMinusSCEV(LA, LS);
4528           const SCEV *RDiff = getMinusSCEV(RA, RS);
4529           if (LDiff == RDiff)
4530             return getAddExpr(getUMaxExpr(LS, RS), LDiff);
4531           LDiff = getMinusSCEV(LA, RS);
4532           RDiff = getMinusSCEV(RA, LS);
4533           if (LDiff == RDiff)
4534             return getAddExpr(getUMinExpr(LS, RS), LDiff);
4535         }
4536         break;
4537       case ICmpInst::ICMP_NE:
4538         // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
4539         if (getTypeSizeInBits(LHS->getType()) <=
4540                 getTypeSizeInBits(U->getType()) &&
4541             isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
4542           const SCEV *One = getConstant(U->getType(), 1);
4543           const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
4544           const SCEV *LA = getSCEV(U->getOperand(1));
4545           const SCEV *RA = getSCEV(U->getOperand(2));
4546           const SCEV *LDiff = getMinusSCEV(LA, LS);
4547           const SCEV *RDiff = getMinusSCEV(RA, One);
4548           if (LDiff == RDiff)
4549             return getAddExpr(getUMaxExpr(One, LS), LDiff);
4550         }
4551         break;
4552       case ICmpInst::ICMP_EQ:
4553         // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
4554         if (getTypeSizeInBits(LHS->getType()) <=
4555                 getTypeSizeInBits(U->getType()) &&
4556             isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
4557           const SCEV *One = getConstant(U->getType(), 1);
4558           const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
4559           const SCEV *LA = getSCEV(U->getOperand(1));
4560           const SCEV *RA = getSCEV(U->getOperand(2));
4561           const SCEV *LDiff = getMinusSCEV(LA, One);
4562           const SCEV *RDiff = getMinusSCEV(RA, LS);
4563           if (LDiff == RDiff)
4564             return getAddExpr(getUMaxExpr(One, LS), LDiff);
4565         }
4566         break;
4567       default:
4568         break;
4569       }
4570     }
4571
4572   default: // We cannot analyze this expression.
4573     break;
4574   }
4575
4576   return getUnknown(V);
4577 }
4578
4579
4580
4581 //===----------------------------------------------------------------------===//
4582 //                   Iteration Count Computation Code
4583 //
4584
4585 unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
4586   if (BasicBlock *ExitingBB = L->getExitingBlock())
4587     return getSmallConstantTripCount(L, ExitingBB);
4588
4589   // No trip count information for multiple exits.
4590   return 0;
4591 }
4592
4593 /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
4594 /// normal unsigned value. Returns 0 if the trip count is unknown or not
4595 /// constant. Will also return 0 if the maximum trip count is very large (>=
4596 /// 2^32).
4597 ///
4598 /// This "trip count" assumes that control exits via ExitingBlock. More
4599 /// precisely, it is the number of times that control may reach ExitingBlock
4600 /// before taking the branch. For loops with multiple exits, it may not be the
4601 /// number times that the loop header executes because the loop may exit
4602 /// prematurely via another branch.
4603 unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
4604                                                     BasicBlock *ExitingBlock) {
4605   assert(ExitingBlock && "Must pass a non-null exiting block!");
4606   assert(L->isLoopExiting(ExitingBlock) &&
4607          "Exiting block must actually branch out of the loop!");
4608   const SCEVConstant *ExitCount =
4609       dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
4610   if (!ExitCount)
4611     return 0;
4612
4613   ConstantInt *ExitConst = ExitCount->getValue();
4614
4615   // Guard against huge trip counts.
4616   if (ExitConst->getValue().getActiveBits() > 32)
4617     return 0;
4618
4619   // In case of integer overflow, this returns 0, which is correct.
4620   return ((unsigned)ExitConst->getZExtValue()) + 1;
4621 }
4622
4623 unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
4624   if (BasicBlock *ExitingBB = L->getExitingBlock())
4625     return getSmallConstantTripMultiple(L, ExitingBB);
4626
4627   // No trip multiple information for multiple exits.
4628   return 0;
4629 }
4630
4631 /// getSmallConstantTripMultiple - Returns the largest constant divisor of the
4632 /// trip count of this loop as a normal unsigned value, if possible. This
4633 /// means that the actual trip count is always a multiple of the returned
4634 /// value (don't forget the trip count could very well be zero as well!).
4635 ///
4636 /// Returns 1 if the trip count is unknown or not guaranteed to be the
4637 /// multiple of a constant (which is also the case if the trip count is simply
4638 /// constant, use getSmallConstantTripCount for that case), Will also return 1
4639 /// if the trip count is very large (>= 2^32).
4640 ///
4641 /// As explained in the comments for getSmallConstantTripCount, this assumes
4642 /// that control exits the loop via ExitingBlock.
4643 unsigned
4644 ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
4645                                               BasicBlock *ExitingBlock) {
4646   assert(ExitingBlock && "Must pass a non-null exiting block!");
4647   assert(L->isLoopExiting(ExitingBlock) &&
4648          "Exiting block must actually branch out of the loop!");
4649   const SCEV *ExitCount = getExitCount(L, ExitingBlock);
4650   if (ExitCount == getCouldNotCompute())
4651     return 1;
4652
4653   // Get the trip count from the BE count by adding 1.
4654   const SCEV *TCMul = getAddExpr(ExitCount,
4655                                  getConstant(ExitCount->getType(), 1));
4656   // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
4657   // to factor simple cases.
4658   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
4659     TCMul = Mul->getOperand(0);
4660
4661   const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
4662   if (!MulC)
4663     return 1;
4664
4665   ConstantInt *Result = MulC->getValue();
4666
4667   // Guard against huge trip counts (this requires checking
4668   // for zero to handle the case where the trip count == -1 and the
4669   // addition wraps).
4670   if (!Result || Result->getValue().getActiveBits() > 32 ||
4671       Result->getValue().getActiveBits() == 0)
4672     return 1;
4673
4674   return (unsigned)Result->getZExtValue();
4675 }
4676
4677 // getExitCount - Get the expression for the number of loop iterations for which
4678 // this loop is guaranteed not to exit via ExitingBlock. Otherwise return
4679 // SCEVCouldNotCompute.
4680 const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
4681   return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
4682 }
4683
4684 /// getBackedgeTakenCount - If the specified loop has a predictable
4685 /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
4686 /// object. The backedge-taken count is the number of times the loop header
4687 /// will be branched to from within the loop. This is one less than the
4688 /// trip count of the loop, since it doesn't count the first iteration,
4689 /// when the header is branched to from outside the loop.
4690 ///
4691 /// Note that it is not valid to call this method on a loop without a
4692 /// loop-invariant backedge-taken count (see
4693 /// hasLoopInvariantBackedgeTakenCount).
4694 ///
4695 const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
4696   return getBackedgeTakenInfo(L).getExact(this);
4697 }
4698
4699 /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
4700 /// return the least SCEV value that is known never to be less than the
4701 /// actual backedge taken count.
4702 const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
4703   return getBackedgeTakenInfo(L).getMax(this);
4704 }
4705
4706 /// PushLoopPHIs - Push PHI nodes in the header of the given loop
4707 /// onto the given Worklist.
4708 static void
4709 PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
4710   BasicBlock *Header = L->getHeader();
4711
4712   // Push all Loop-header PHIs onto the Worklist stack.
4713   for (BasicBlock::iterator I = Header->begin();
4714        PHINode *PN = dyn_cast<PHINode>(I); ++I)
4715     Worklist.push_back(PN);
4716 }
4717
4718 const ScalarEvolution::BackedgeTakenInfo &
4719 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
4720   // Initially insert an invalid entry for this loop. If the insertion
4721   // succeeds, proceed to actually compute a backedge-taken count and
4722   // update the value. The temporary CouldNotCompute value tells SCEV
4723   // code elsewhere that it shouldn't attempt to request a new
4724   // backedge-taken count, which could result in infinite recursion.
4725   std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
4726     BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
4727   if (!Pair.second)
4728     return Pair.first->second;
4729
4730   // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
4731   // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
4732   // must be cleared in this scope.
4733   BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
4734
4735   if (Result.getExact(this) != getCouldNotCompute()) {
4736     assert(isLoopInvariant(Result.getExact(this), L) &&
4737            isLoopInvariant(Result.getMax(this), L) &&
4738            "Computed backedge-taken count isn't loop invariant for loop!");
4739     ++NumTripCountsComputed;
4740   }
4741   else if (Result.getMax(this) == getCouldNotCompute() &&
4742            isa<PHINode>(L->getHeader()->begin())) {
4743     // Only count loops that have phi nodes as not being computable.
4744     ++NumTripCountsNotComputed;
4745   }
4746
4747   // Now that we know more about the trip count for this loop, forget any
4748   // existing SCEV values for PHI nodes in this loop since they are only
4749   // conservative estimates made without the benefit of trip count
4750   // information. This is similar to the code in forgetLoop, except that
4751   // it handles SCEVUnknown PHI nodes specially.
4752   if (Result.hasAnyInfo()) {
4753     SmallVector<Instruction *, 16> Worklist;
4754     PushLoopPHIs(L, Worklist);
4755
4756     SmallPtrSet<Instruction *, 8> Visited;
4757     while (!Worklist.empty()) {
4758       Instruction *I = Worklist.pop_back_val();
4759       if (!Visited.insert(I).second)
4760         continue;
4761
4762       ValueExprMapType::iterator It =
4763         ValueExprMap.find_as(static_cast<Value *>(I));
4764       if (It != ValueExprMap.end()) {
4765         const SCEV *Old = It->second;
4766
4767         // SCEVUnknown for a PHI either means that it has an unrecognized
4768         // structure, or it's a PHI that's in the progress of being computed
4769         // by createNodeForPHI.  In the former case, additional loop trip
4770         // count information isn't going to change anything. In the later
4771         // case, createNodeForPHI will perform the necessary updates on its
4772         // own when it gets to that point.
4773         if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
4774           forgetMemoizedResults(Old);
4775           ValueExprMap.erase(It);
4776         }
4777         if (PHINode *PN = dyn_cast<PHINode>(I))
4778           ConstantEvolutionLoopExitValue.erase(PN);
4779       }
4780
4781       PushDefUseChildren(I, Worklist);
4782     }
4783   }
4784
4785   // Re-lookup the insert position, since the call to
4786   // ComputeBackedgeTakenCount above could result in a
4787   // recusive call to getBackedgeTakenInfo (on a different
4788   // loop), which would invalidate the iterator computed
4789   // earlier.
4790   return BackedgeTakenCounts.find(L)->second = Result;
4791 }
4792
4793 /// forgetLoop - This method should be called by the client when it has
4794 /// changed a loop in a way that may effect ScalarEvolution's ability to
4795 /// compute a trip count, or if the loop is deleted.
4796 void ScalarEvolution::forgetLoop(const Loop *L) {
4797   // Drop any stored trip count value.
4798   DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
4799     BackedgeTakenCounts.find(L);
4800   if (BTCPos != BackedgeTakenCounts.end()) {
4801     BTCPos->second.clear();
4802     BackedgeTakenCounts.erase(BTCPos);
4803   }
4804
4805   // Drop information about expressions based on loop-header PHIs.
4806   SmallVector<Instruction *, 16> Worklist;
4807   PushLoopPHIs(L, Worklist);
4808
4809   SmallPtrSet<Instruction *, 8> Visited;
4810   while (!Worklist.empty()) {
4811     Instruction *I = Worklist.pop_back_val();
4812     if (!Visited.insert(I).second)
4813       continue;
4814
4815     ValueExprMapType::iterator It =
4816       ValueExprMap.find_as(static_cast<Value *>(I));
4817     if (It != ValueExprMap.end()) {
4818       forgetMemoizedResults(It->second);
4819       ValueExprMap.erase(It);
4820       if (PHINode *PN = dyn_cast<PHINode>(I))
4821         ConstantEvolutionLoopExitValue.erase(PN);
4822     }
4823
4824     PushDefUseChildren(I, Worklist);
4825   }
4826
4827   // Forget all contained loops too, to avoid dangling entries in the
4828   // ValuesAtScopes map.
4829   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
4830     forgetLoop(*I);
4831 }
4832
4833 /// forgetValue - This method should be called by the client when it has
4834 /// changed a value in a way that may effect its value, or which may
4835 /// disconnect it from a def-use chain linking it to a loop.
4836 void ScalarEvolution::forgetValue(Value *V) {
4837   Instruction *I = dyn_cast<Instruction>(V);
4838   if (!I) return;
4839
4840   // Drop information about expressions based on loop-header PHIs.
4841   SmallVector<Instruction *, 16> Worklist;
4842   Worklist.push_back(I);
4843
4844   SmallPtrSet<Instruction *, 8> Visited;
4845   while (!Worklist.empty()) {
4846     I = Worklist.pop_back_val();
4847     if (!Visited.insert(I).second)
4848       continue;
4849
4850     ValueExprMapType::iterator It =
4851       ValueExprMap.find_as(static_cast<Value *>(I));
4852     if (It != ValueExprMap.end()) {
4853       forgetMemoizedResults(It->second);
4854       ValueExprMap.erase(It);
4855       if (PHINode *PN = dyn_cast<PHINode>(I))
4856         ConstantEvolutionLoopExitValue.erase(PN);
4857     }
4858
4859     PushDefUseChildren(I, Worklist);
4860   }
4861 }
4862
4863 /// getExact - Get the exact loop backedge taken count considering all loop
4864 /// exits. A computable result can only be returned for loops with a single
4865 /// exit.  Returning the minimum taken count among all exits is incorrect
4866 /// because one of the loop's exit limit's may have been skipped. HowFarToZero
4867 /// assumes that the limit of each loop test is never skipped. This is a valid
4868 /// assumption as long as the loop exits via that test. For precise results, it
4869 /// is the caller's responsibility to specify the relevant loop exit using
4870 /// getExact(ExitingBlock, SE).
4871 const SCEV *
4872 ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
4873   // If any exits were not computable, the loop is not computable.
4874   if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
4875
4876   // We need exactly one computable exit.
4877   if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
4878   assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
4879
4880   const SCEV *BECount = nullptr;
4881   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
4882        ENT != nullptr; ENT = ENT->getNextExit()) {
4883
4884     assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
4885
4886     if (!BECount)
4887       BECount = ENT->ExactNotTaken;
4888     else if (BECount != ENT->ExactNotTaken)
4889       return SE->getCouldNotCompute();
4890   }
4891   assert(BECount && "Invalid not taken count for loop exit");
4892   return BECount;
4893 }
4894
4895 /// getExact - Get the exact not taken count for this loop exit.
4896 const SCEV *
4897 ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
4898                                              ScalarEvolution *SE) const {
4899   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
4900        ENT != nullptr; ENT = ENT->getNextExit()) {
4901
4902     if (ENT->ExitingBlock == ExitingBlock)
4903       return ENT->ExactNotTaken;
4904   }
4905   return SE->getCouldNotCompute();
4906 }
4907
4908 /// getMax - Get the max backedge taken count for the loop.
4909 const SCEV *
4910 ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
4911   return Max ? Max : SE->getCouldNotCompute();
4912 }
4913
4914 bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
4915                                                     ScalarEvolution *SE) const {
4916   if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
4917     return true;
4918
4919   if (!ExitNotTaken.ExitingBlock)
4920     return false;
4921
4922   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
4923        ENT != nullptr; ENT = ENT->getNextExit()) {
4924
4925     if (ENT->ExactNotTaken != SE->getCouldNotCompute()
4926         && SE->hasOperand(ENT->ExactNotTaken, S)) {
4927       return true;
4928     }
4929   }
4930   return false;
4931 }
4932
4933 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
4934 /// computable exit into a persistent ExitNotTakenInfo array.
4935 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
4936   SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
4937   bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
4938
4939   if (!Complete)
4940     ExitNotTaken.setIncomplete();
4941
4942   unsigned NumExits = ExitCounts.size();
4943   if (NumExits == 0) return;
4944
4945   ExitNotTaken.ExitingBlock = ExitCounts[0].first;
4946   ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
4947   if (NumExits == 1) return;
4948
4949   // Handle the rare case of multiple computable exits.
4950   ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
4951
4952   ExitNotTakenInfo *PrevENT = &ExitNotTaken;
4953   for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
4954     PrevENT->setNextExit(ENT);
4955     ENT->ExitingBlock = ExitCounts[i].first;
4956     ENT->ExactNotTaken = ExitCounts[i].second;
4957   }
4958 }
4959
4960 /// clear - Invalidate this result and free the ExitNotTakenInfo array.
4961 void ScalarEvolution::BackedgeTakenInfo::clear() {
4962   ExitNotTaken.ExitingBlock = nullptr;
4963   ExitNotTaken.ExactNotTaken = nullptr;
4964   delete[] ExitNotTaken.getNextExit();
4965 }
4966
4967 /// ComputeBackedgeTakenCount - Compute the number of times the backedge
4968 /// of the specified loop will execute.
4969 ScalarEvolution::BackedgeTakenInfo
4970 ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
4971   SmallVector<BasicBlock *, 8> ExitingBlocks;
4972   L->getExitingBlocks(ExitingBlocks);
4973
4974   SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
4975   bool CouldComputeBECount = true;
4976   BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
4977   const SCEV *MustExitMaxBECount = nullptr;
4978   const SCEV *MayExitMaxBECount = nullptr;
4979
4980   // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
4981   // and compute maxBECount.
4982   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
4983     BasicBlock *ExitBB = ExitingBlocks[i];
4984     ExitLimit EL = ComputeExitLimit(L, ExitBB);
4985
4986     // 1. For each exit that can be computed, add an entry to ExitCounts.
4987     // CouldComputeBECount is true only if all exits can be computed.
4988     if (EL.Exact == getCouldNotCompute())
4989       // We couldn't compute an exact value for this exit, so
4990       // we won't be able to compute an exact value for the loop.
4991       CouldComputeBECount = false;
4992     else
4993       ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact));
4994
4995     // 2. Derive the loop's MaxBECount from each exit's max number of
4996     // non-exiting iterations. Partition the loop exits into two kinds:
4997     // LoopMustExits and LoopMayExits.
4998     //
4999     // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
5000     // is a LoopMayExit.  If any computable LoopMustExit is found, then
5001     // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
5002     // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
5003     // considered greater than any computable EL.Max.
5004     if (EL.Max != getCouldNotCompute() && Latch &&
5005         DT.dominates(ExitBB, Latch)) {
5006       if (!MustExitMaxBECount)
5007         MustExitMaxBECount = EL.Max;
5008       else {
5009         MustExitMaxBECount =
5010           getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
5011       }
5012     } else if (MayExitMaxBECount != getCouldNotCompute()) {
5013       if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
5014         MayExitMaxBECount = EL.Max;
5015       else {
5016         MayExitMaxBECount =
5017           getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
5018       }
5019     }
5020   }
5021   const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
5022     (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
5023   return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
5024 }
5025
5026 /// ComputeExitLimit - Compute the number of times the backedge of the specified
5027 /// loop will execute if it exits via the specified block.
5028 ScalarEvolution::ExitLimit
5029 ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
5030
5031   // Okay, we've chosen an exiting block.  See what condition causes us to
5032   // exit at this block and remember the exit block and whether all other targets
5033   // lead to the loop header.
5034   bool MustExecuteLoopHeader = true;
5035   BasicBlock *Exit = nullptr;
5036   for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
5037        SI != SE; ++SI)
5038     if (!L->contains(*SI)) {
5039       if (Exit) // Multiple exit successors.
5040         return getCouldNotCompute();
5041       Exit = *SI;
5042     } else if (*SI != L->getHeader()) {
5043       MustExecuteLoopHeader = false;
5044     }
5045
5046   // At this point, we know we have a conditional branch that determines whether
5047   // the loop is exited.  However, we don't know if the branch is executed each
5048   // time through the loop.  If not, then the execution count of the branch will
5049   // not be equal to the trip count of the loop.
5050   //
5051   // Currently we check for this by checking to see if the Exit branch goes to
5052   // the loop header.  If so, we know it will always execute the same number of
5053   // times as the loop.  We also handle the case where the exit block *is* the
5054   // loop header.  This is common for un-rotated loops.
5055   //
5056   // If both of those tests fail, walk up the unique predecessor chain to the
5057   // header, stopping if there is an edge that doesn't exit the loop. If the
5058   // header is reached, the execution count of the branch will be equal to the
5059   // trip count of the loop.
5060   //
5061   //  More extensive analysis could be done to handle more cases here.
5062   //
5063   if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
5064     // The simple checks failed, try climbing the unique predecessor chain
5065     // up to the header.
5066     bool Ok = false;
5067     for (BasicBlock *BB = ExitingBlock; BB; ) {
5068       BasicBlock *Pred = BB->getUniquePredecessor();
5069       if (!Pred)
5070         return getCouldNotCompute();
5071       TerminatorInst *PredTerm = Pred->getTerminator();
5072       for (const BasicBlock *PredSucc : PredTerm->successors()) {
5073         if (PredSucc == BB)
5074           continue;
5075         // If the predecessor has a successor that isn't BB and isn't
5076         // outside the loop, assume the worst.
5077         if (L->contains(PredSucc))
5078           return getCouldNotCompute();
5079       }
5080       if (Pred == L->getHeader()) {
5081         Ok = true;
5082         break;
5083       }
5084       BB = Pred;
5085     }
5086     if (!Ok)
5087       return getCouldNotCompute();
5088   }
5089
5090   bool IsOnlyExit = (L->getExitingBlock() != nullptr);
5091   TerminatorInst *Term = ExitingBlock->getTerminator();
5092   if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
5093     assert(BI->isConditional() && "If unconditional, it can't be in loop!");
5094     // Proceed to the next level to examine the exit condition expression.
5095     return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
5096                                     BI->getSuccessor(1),
5097                                     /*ControlsExit=*/IsOnlyExit);
5098   }
5099
5100   if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
5101     return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
5102                                                 /*ControlsExit=*/IsOnlyExit);
5103
5104   return getCouldNotCompute();
5105 }
5106
5107 /// ComputeExitLimitFromCond - Compute the number of times the
5108 /// backedge of the specified loop will execute if its exit condition
5109 /// were a conditional branch of ExitCond, TBB, and FBB.
5110 ///
5111 /// @param ControlsExit is true if ExitCond directly controls the exit
5112 /// branch. In this case, we can assume that the loop exits only if the
5113 /// condition is true and can infer that failing to meet the condition prior to
5114 /// integer wraparound results in undefined behavior.
5115 ScalarEvolution::ExitLimit
5116 ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
5117                                           Value *ExitCond,
5118                                           BasicBlock *TBB,
5119                                           BasicBlock *FBB,
5120                                           bool ControlsExit) {
5121   // Check if the controlling expression for this loop is an And or Or.
5122   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
5123     if (BO->getOpcode() == Instruction::And) {
5124       // Recurse on the operands of the and.
5125       bool EitherMayExit = L->contains(TBB);
5126       ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
5127                                                ControlsExit && !EitherMayExit);
5128       ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
5129                                                ControlsExit && !EitherMayExit);
5130       const SCEV *BECount = getCouldNotCompute();
5131       const SCEV *MaxBECount = getCouldNotCompute();
5132       if (EitherMayExit) {
5133         // Both conditions must be true for the loop to continue executing.
5134         // Choose the less conservative count.
5135         if (EL0.Exact == getCouldNotCompute() ||
5136             EL1.Exact == getCouldNotCompute())
5137           BECount = getCouldNotCompute();
5138         else
5139           BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
5140         if (EL0.Max == getCouldNotCompute())
5141           MaxBECount = EL1.Max;
5142         else if (EL1.Max == getCouldNotCompute())
5143           MaxBECount = EL0.Max;
5144         else
5145           MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
5146       } else {
5147         // Both conditions must be true at the same time for the loop to exit.
5148         // For now, be conservative.
5149         assert(L->contains(FBB) && "Loop block has no successor in loop!");
5150         if (EL0.Max == EL1.Max)
5151           MaxBECount = EL0.Max;
5152         if (EL0.Exact == EL1.Exact)
5153           BECount = EL0.Exact;
5154       }
5155
5156       return ExitLimit(BECount, MaxBECount);
5157     }
5158     if (BO->getOpcode() == Instruction::Or) {
5159       // Recurse on the operands of the or.
5160       bool EitherMayExit = L->contains(FBB);
5161       ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
5162                                                ControlsExit && !EitherMayExit);
5163       ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
5164                                                ControlsExit && !EitherMayExit);
5165       const SCEV *BECount = getCouldNotCompute();
5166       const SCEV *MaxBECount = getCouldNotCompute();
5167       if (EitherMayExit) {
5168         // Both conditions must be false for the loop to continue executing.
5169         // Choose the less conservative count.
5170         if (EL0.Exact == getCouldNotCompute() ||
5171             EL1.Exact == getCouldNotCompute())
5172           BECount = getCouldNotCompute();
5173         else
5174           BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
5175         if (EL0.Max == getCouldNotCompute())
5176           MaxBECount = EL1.Max;
5177         else if (EL1.Max == getCouldNotCompute())
5178           MaxBECount = EL0.Max;
5179         else
5180           MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
5181       } else {
5182         // Both conditions must be false at the same time for the loop to exit.
5183         // For now, be conservative.
5184         assert(L->contains(TBB) && "Loop block has no successor in loop!");
5185         if (EL0.Max == EL1.Max)
5186           MaxBECount = EL0.Max;
5187         if (EL0.Exact == EL1.Exact)
5188           BECount = EL0.Exact;
5189       }
5190
5191       return ExitLimit(BECount, MaxBECount);
5192     }
5193   }
5194
5195   // With an icmp, it may be feasible to compute an exact backedge-taken count.
5196   // Proceed to the next level to examine the icmp.
5197   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
5198     return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
5199
5200   // Check for a constant condition. These are normally stripped out by
5201   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
5202   // preserve the CFG and is temporarily leaving constant conditions
5203   // in place.
5204   if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
5205     if (L->contains(FBB) == !CI->getZExtValue())
5206       // The backedge is always taken.
5207       return getCouldNotCompute();
5208     else
5209       // The backedge is never taken.
5210       return getConstant(CI->getType(), 0);
5211   }
5212
5213   // If it's not an integer or pointer comparison then compute it the hard way.
5214   return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
5215 }
5216
5217 /// ComputeExitLimitFromICmp - Compute the number of times the
5218 /// backedge of the specified loop will execute if its exit condition
5219 /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
5220 ScalarEvolution::ExitLimit
5221 ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
5222                                           ICmpInst *ExitCond,
5223                                           BasicBlock *TBB,
5224                                           BasicBlock *FBB,
5225                                           bool ControlsExit) {
5226
5227   // If the condition was exit on true, convert the condition to exit on false
5228   ICmpInst::Predicate Cond;
5229   if (!L->contains(FBB))
5230     Cond = ExitCond->getPredicate();
5231   else
5232     Cond = ExitCond->getInversePredicate();
5233
5234   // Handle common loops like: for (X = "string"; *X; ++X)
5235   if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
5236     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
5237       ExitLimit ItCnt =
5238         ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
5239       if (ItCnt.hasAnyInfo())
5240         return ItCnt;
5241     }
5242
5243   const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
5244   const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
5245
5246   // Try to evaluate any dependencies out of the loop.
5247   LHS = getSCEVAtScope(LHS, L);
5248   RHS = getSCEVAtScope(RHS, L);
5249
5250   // At this point, we would like to compute how many iterations of the
5251   // loop the predicate will return true for these inputs.
5252   if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
5253     // If there is a loop-invariant, force it into the RHS.
5254     std::swap(LHS, RHS);
5255     Cond = ICmpInst::getSwappedPredicate(Cond);
5256   }
5257
5258   // Simplify the operands before analyzing them.
5259   (void)SimplifyICmpOperands(Cond, LHS, RHS);
5260
5261   // If we have a comparison of a chrec against a constant, try to use value
5262   // ranges to answer this query.
5263   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
5264     if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
5265       if (AddRec->getLoop() == L) {
5266         // Form the constant range.
5267         ConstantRange CompRange(
5268             ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
5269
5270         const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
5271         if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
5272       }
5273
5274   switch (Cond) {
5275   case ICmpInst::ICMP_NE: {                     // while (X != Y)
5276     // Convert to: while (X-Y != 0)
5277     ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
5278     if (EL.hasAnyInfo()) return EL;
5279     break;
5280   }
5281   case ICmpInst::ICMP_EQ: {                     // while (X == Y)
5282     // Convert to: while (X-Y == 0)
5283     ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
5284     if (EL.hasAnyInfo()) return EL;
5285     break;
5286   }
5287   case ICmpInst::ICMP_SLT:
5288   case ICmpInst::ICMP_ULT: {                    // while (X < Y)
5289     bool IsSigned = Cond == ICmpInst::ICMP_SLT;
5290     ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
5291     if (EL.hasAnyInfo()) return EL;
5292     break;
5293   }
5294   case ICmpInst::ICMP_SGT:
5295   case ICmpInst::ICMP_UGT: {                    // while (X > Y)
5296     bool IsSigned = Cond == ICmpInst::ICMP_SGT;
5297     ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
5298     if (EL.hasAnyInfo()) return EL;
5299     break;
5300   }
5301   default:
5302 #if 0
5303     dbgs() << "ComputeBackedgeTakenCount ";
5304     if (ExitCond->getOperand(0)->getType()->isUnsigned())
5305       dbgs() << "[unsigned] ";
5306     dbgs() << *LHS << "   "
5307          << Instruction::getOpcodeName(Instruction::ICmp)
5308          << "   " << *RHS << "\n";
5309 #endif
5310     break;
5311   }
5312   return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
5313 }
5314
5315 ScalarEvolution::ExitLimit
5316 ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
5317                                                       SwitchInst *Switch,
5318                                                       BasicBlock *ExitingBlock,
5319                                                       bool ControlsExit) {
5320   assert(!L->contains(ExitingBlock) && "Not an exiting block!");
5321
5322   // Give up if the exit is the default dest of a switch.
5323   if (Switch->getDefaultDest() == ExitingBlock)
5324     return getCouldNotCompute();
5325
5326   assert(L->contains(Switch->getDefaultDest()) &&
5327          "Default case must not exit the loop!");
5328   const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
5329   const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
5330
5331   // while (X != Y) --> while (X-Y != 0)
5332   ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
5333   if (EL.hasAnyInfo())
5334     return EL;
5335
5336   return getCouldNotCompute();
5337 }
5338
5339 static ConstantInt *
5340 EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
5341                                 ScalarEvolution &SE) {
5342   const SCEV *InVal = SE.getConstant(C);
5343   const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
5344   assert(isa<SCEVConstant>(Val) &&
5345          "Evaluation of SCEV at constant didn't fold correctly?");
5346   return cast<SCEVConstant>(Val)->getValue();
5347 }
5348
5349 /// ComputeLoadConstantCompareExitLimit - Given an exit condition of
5350 /// 'icmp op load X, cst', try to see if we can compute the backedge
5351 /// execution count.
5352 ScalarEvolution::ExitLimit
5353 ScalarEvolution::ComputeLoadConstantCompareExitLimit(
5354   LoadInst *LI,
5355   Constant *RHS,
5356   const Loop *L,
5357   ICmpInst::Predicate predicate) {
5358
5359   if (LI->isVolatile()) return getCouldNotCompute();
5360
5361   // Check to see if the loaded pointer is a getelementptr of a global.
5362   // TODO: Use SCEV instead of manually grubbing with GEPs.
5363   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
5364   if (!GEP) return getCouldNotCompute();
5365
5366   // Make sure that it is really a constant global we are gepping, with an
5367   // initializer, and make sure the first IDX is really 0.
5368   GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
5369   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
5370       GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
5371       !cast<Constant>(GEP->getOperand(1))->isNullValue())
5372     return getCouldNotCompute();
5373
5374   // Okay, we allow one non-constant index into the GEP instruction.
5375   Value *VarIdx = nullptr;
5376   std::vector<Constant*> Indexes;
5377   unsigned VarIdxNum = 0;
5378   for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
5379     if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
5380       Indexes.push_back(CI);
5381     } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
5382       if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
5383       VarIdx = GEP->getOperand(i);
5384       VarIdxNum = i-2;
5385       Indexes.push_back(nullptr);
5386     }
5387
5388   // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
5389   if (!VarIdx)
5390     return getCouldNotCompute();
5391
5392   // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
5393   // Check to see if X is a loop variant variable value now.
5394   const SCEV *Idx = getSCEV(VarIdx);
5395   Idx = getSCEVAtScope(Idx, L);
5396
5397   // We can only recognize very limited forms of loop index expressions, in
5398   // particular, only affine AddRec's like {C1,+,C2}.
5399   const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
5400   if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
5401       !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
5402       !isa<SCEVConstant>(IdxExpr->getOperand(1)))
5403     return getCouldNotCompute();
5404
5405   unsigned MaxSteps = MaxBruteForceIterations;
5406   for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
5407     ConstantInt *ItCst = ConstantInt::get(
5408                            cast<IntegerType>(IdxExpr->getType()), IterationNum);
5409     ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
5410
5411     // Form the GEP offset.
5412     Indexes[VarIdxNum] = Val;
5413
5414     Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
5415                                                          Indexes);
5416     if (!Result) break;  // Cannot compute!
5417
5418     // Evaluate the condition for this iteration.
5419     Result = ConstantExpr::getICmp(predicate, Result, RHS);
5420     if (!isa<ConstantInt>(Result)) break;  // Couldn't decide for sure
5421     if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
5422 #if 0
5423       dbgs() << "\n***\n*** Computed loop count " << *ItCst
5424              << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
5425              << "***\n";
5426 #endif
5427       ++NumArrayLenItCounts;
5428       return getConstant(ItCst);   // Found terminating iteration!
5429     }
5430   }
5431   return getCouldNotCompute();
5432 }
5433
5434
5435 /// CanConstantFold - Return true if we can constant fold an instruction of the
5436 /// specified type, assuming that all operands were constants.
5437 static bool CanConstantFold(const Instruction *I) {
5438   if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
5439       isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
5440       isa<LoadInst>(I))
5441     return true;
5442
5443   if (const CallInst *CI = dyn_cast<CallInst>(I))
5444     if (const Function *F = CI->getCalledFunction())
5445       return canConstantFoldCallTo(F);
5446   return false;
5447 }
5448
5449 /// Determine whether this instruction can constant evolve within this loop
5450 /// assuming its operands can all constant evolve.
5451 static bool canConstantEvolve(Instruction *I, const Loop *L) {
5452   // An instruction outside of the loop can't be derived from a loop PHI.
5453   if (!L->contains(I)) return false;
5454
5455   if (isa<PHINode>(I)) {
5456     // We don't currently keep track of the control flow needed to evaluate
5457     // PHIs, so we cannot handle PHIs inside of loops.
5458     return L->getHeader() == I->getParent();
5459   }
5460
5461   // If we won't be able to constant fold this expression even if the operands
5462   // are constants, bail early.
5463   return CanConstantFold(I);
5464 }
5465
5466 /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
5467 /// recursing through each instruction operand until reaching a loop header phi.
5468 static PHINode *
5469 getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
5470                                DenseMap<Instruction *, PHINode *> &PHIMap) {
5471
5472   // Otherwise, we can evaluate this instruction if all of its operands are
5473   // constant or derived from a PHI node themselves.
5474   PHINode *PHI = nullptr;
5475   for (Instruction::op_iterator OpI = UseInst->op_begin(),
5476          OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
5477
5478     if (isa<Constant>(*OpI)) continue;
5479
5480     Instruction *OpInst = dyn_cast<Instruction>(*OpI);
5481     if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
5482
5483     PHINode *P = dyn_cast<PHINode>(OpInst);
5484     if (!P)
5485       // If this operand is already visited, reuse the prior result.
5486       // We may have P != PHI if this is the deepest point at which the
5487       // inconsistent paths meet.
5488       P = PHIMap.lookup(OpInst);
5489     if (!P) {
5490       // Recurse and memoize the results, whether a phi is found or not.
5491       // This recursive call invalidates pointers into PHIMap.
5492       P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
5493       PHIMap[OpInst] = P;
5494     }
5495     if (!P)
5496       return nullptr;  // Not evolving from PHI
5497     if (PHI && PHI != P)
5498       return nullptr;  // Evolving from multiple different PHIs.
5499     PHI = P;
5500   }
5501   // This is a expression evolving from a constant PHI!
5502   return PHI;
5503 }
5504
5505 /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
5506 /// in the loop that V is derived from.  We allow arbitrary operations along the
5507 /// way, but the operands of an operation must either be constants or a value
5508 /// derived from a constant PHI.  If this expression does not fit with these
5509 /// constraints, return null.
5510 static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
5511   Instruction *I = dyn_cast<Instruction>(V);
5512   if (!I || !canConstantEvolve(I, L)) return nullptr;
5513
5514   if (PHINode *PN = dyn_cast<PHINode>(I)) {
5515     return PN;
5516   }
5517
5518   // Record non-constant instructions contained by the loop.
5519   DenseMap<Instruction *, PHINode *> PHIMap;
5520   return getConstantEvolvingPHIOperands(I, L, PHIMap);
5521 }
5522
5523 /// EvaluateExpression - Given an expression that passes the
5524 /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
5525 /// in the loop has the value PHIVal.  If we can't fold this expression for some
5526 /// reason, return null.
5527 static Constant *EvaluateExpression(Value *V, const Loop *L,
5528                                     DenseMap<Instruction *, Constant *> &Vals,
5529                                     const DataLayout &DL,
5530                                     const TargetLibraryInfo *TLI) {
5531   // Convenient constant check, but redundant for recursive calls.
5532   if (Constant *C = dyn_cast<Constant>(V)) return C;
5533   Instruction *I = dyn_cast<Instruction>(V);
5534   if (!I) return nullptr;
5535
5536   if (Constant *C = Vals.lookup(I)) return C;
5537
5538   // An instruction inside the loop depends on a value outside the loop that we
5539   // weren't given a mapping for, or a value such as a call inside the loop.
5540   if (!canConstantEvolve(I, L)) return nullptr;
5541
5542   // An unmapped PHI can be due to a branch or another loop inside this loop,
5543   // or due to this not being the initial iteration through a loop where we
5544   // couldn't compute the evolution of this particular PHI last time.
5545   if (isa<PHINode>(I)) return nullptr;
5546
5547   std::vector<Constant*> Operands(I->getNumOperands());
5548
5549   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
5550     Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
5551     if (!Operand) {
5552       Operands[i] = dyn_cast<Constant>(I->getOperand(i));
5553       if (!Operands[i]) return nullptr;
5554       continue;
5555     }
5556     Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
5557     Vals[Operand] = C;
5558     if (!C) return nullptr;
5559     Operands[i] = C;
5560   }
5561
5562   if (CmpInst *CI = dyn_cast<CmpInst>(I))
5563     return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
5564                                            Operands[1], DL, TLI);
5565   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
5566     if (!LI->isVolatile())
5567       return ConstantFoldLoadFromConstPtr(Operands[0], DL);
5568   }
5569   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, DL,
5570                                   TLI);
5571 }
5572
5573 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
5574 /// in the header of its containing loop, we know the loop executes a
5575 /// constant number of times, and the PHI node is just a recurrence
5576 /// involving constants, fold it.
5577 Constant *
5578 ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
5579                                                    const APInt &BEs,
5580                                                    const Loop *L) {
5581   DenseMap<PHINode*, Constant*>::const_iterator I =
5582     ConstantEvolutionLoopExitValue.find(PN);
5583   if (I != ConstantEvolutionLoopExitValue.end())
5584     return I->second;
5585
5586   if (BEs.ugt(MaxBruteForceIterations))
5587     return ConstantEvolutionLoopExitValue[PN] = nullptr;  // Not going to evaluate it.
5588
5589   Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
5590
5591   DenseMap<Instruction *, Constant *> CurrentIterVals;
5592   BasicBlock *Header = L->getHeader();
5593   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
5594
5595   // Since the loop is canonicalized, the PHI node must have two entries.  One
5596   // entry must be a constant (coming in from outside of the loop), and the
5597   // second must be derived from the same PHI.
5598   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
5599   PHINode *PHI = nullptr;
5600   for (BasicBlock::iterator I = Header->begin();
5601        (PHI = dyn_cast<PHINode>(I)); ++I) {
5602     Constant *StartCST =
5603       dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
5604     if (!StartCST) continue;
5605     CurrentIterVals[PHI] = StartCST;
5606   }
5607   if (!CurrentIterVals.count(PN))
5608     return RetVal = nullptr;
5609
5610   Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
5611
5612   // Execute the loop symbolically to determine the exit value.
5613   if (BEs.getActiveBits() >= 32)
5614     return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
5615
5616   unsigned NumIterations = BEs.getZExtValue(); // must be in range
5617   unsigned IterationNum = 0;
5618   const DataLayout &DL = F.getParent()->getDataLayout();
5619   for (; ; ++IterationNum) {
5620     if (IterationNum == NumIterations)
5621       return RetVal = CurrentIterVals[PN];  // Got exit value!
5622
5623     // Compute the value of the PHIs for the next iteration.
5624     // EvaluateExpression adds non-phi values to the CurrentIterVals map.
5625     DenseMap<Instruction *, Constant *> NextIterVals;
5626     Constant *NextPHI =
5627         EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
5628     if (!NextPHI)
5629       return nullptr;        // Couldn't evaluate!
5630     NextIterVals[PN] = NextPHI;
5631
5632     bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
5633
5634     // Also evaluate the other PHI nodes.  However, we don't get to stop if we
5635     // cease to be able to evaluate one of them or if they stop evolving,
5636     // because that doesn't necessarily prevent us from computing PN.
5637     SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
5638     for (DenseMap<Instruction *, Constant *>::const_iterator
5639            I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
5640       PHINode *PHI = dyn_cast<PHINode>(I->first);
5641       if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
5642       PHIsToCompute.push_back(std::make_pair(PHI, I->second));
5643     }
5644     // We use two distinct loops because EvaluateExpression may invalidate any
5645     // iterators into CurrentIterVals.
5646     for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
5647              I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
5648       PHINode *PHI = I->first;
5649       Constant *&NextPHI = NextIterVals[PHI];
5650       if (!NextPHI) {   // Not already computed.
5651         Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
5652         NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
5653       }
5654       if (NextPHI != I->second)
5655         StoppedEvolving = false;
5656     }
5657
5658     // If all entries in CurrentIterVals == NextIterVals then we can stop
5659     // iterating, the loop can't continue to change.
5660     if (StoppedEvolving)
5661       return RetVal = CurrentIterVals[PN];
5662
5663     CurrentIterVals.swap(NextIterVals);
5664   }
5665 }
5666
5667 /// ComputeExitCountExhaustively - If the loop is known to execute a
5668 /// constant number of times (the condition evolves only from constants),
5669 /// try to evaluate a few iterations of the loop until we get the exit
5670 /// condition gets a value of ExitWhen (true or false).  If we cannot
5671 /// evaluate the trip count of the loop, return getCouldNotCompute().
5672 const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
5673                                                           Value *Cond,
5674                                                           bool ExitWhen) {
5675   PHINode *PN = getConstantEvolvingPHI(Cond, L);
5676   if (!PN) return getCouldNotCompute();
5677
5678   // If the loop is canonicalized, the PHI will have exactly two entries.
5679   // That's the only form we support here.
5680   if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
5681
5682   DenseMap<Instruction *, Constant *> CurrentIterVals;
5683   BasicBlock *Header = L->getHeader();
5684   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
5685
5686   // One entry must be a constant (coming in from outside of the loop), and the
5687   // second must be derived from the same PHI.
5688   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
5689   PHINode *PHI = nullptr;
5690   for (BasicBlock::iterator I = Header->begin();
5691        (PHI = dyn_cast<PHINode>(I)); ++I) {
5692     Constant *StartCST =
5693       dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
5694     if (!StartCST) continue;
5695     CurrentIterVals[PHI] = StartCST;
5696   }
5697   if (!CurrentIterVals.count(PN))
5698     return getCouldNotCompute();
5699
5700   // Okay, we find a PHI node that defines the trip count of this loop.  Execute
5701   // the loop symbolically to determine when the condition gets a value of
5702   // "ExitWhen".
5703   unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
5704   const DataLayout &DL = F.getParent()->getDataLayout();
5705   for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
5706     ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(
5707         EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
5708
5709     // Couldn't symbolically evaluate.
5710     if (!CondVal) return getCouldNotCompute();
5711
5712     if (CondVal->getValue() == uint64_t(ExitWhen)) {
5713       ++NumBruteForceTripCountsComputed;
5714       return getConstant(Type::getInt32Ty(getContext()), IterationNum);
5715     }
5716
5717     // Update all the PHI nodes for the next iteration.
5718     DenseMap<Instruction *, Constant *> NextIterVals;
5719
5720     // Create a list of which PHIs we need to compute. We want to do this before
5721     // calling EvaluateExpression on them because that may invalidate iterators
5722     // into CurrentIterVals.
5723     SmallVector<PHINode *, 8> PHIsToCompute;
5724     for (DenseMap<Instruction *, Constant *>::const_iterator
5725            I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
5726       PHINode *PHI = dyn_cast<PHINode>(I->first);
5727       if (!PHI || PHI->getParent() != Header) continue;
5728       PHIsToCompute.push_back(PHI);
5729     }
5730     for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
5731              E = PHIsToCompute.end(); I != E; ++I) {
5732       PHINode *PHI = *I;
5733       Constant *&NextPHI = NextIterVals[PHI];
5734       if (NextPHI) continue;    // Already computed!
5735
5736       Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
5737       NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
5738     }
5739     CurrentIterVals.swap(NextIterVals);
5740   }
5741
5742   // Too many iterations were needed to evaluate.
5743   return getCouldNotCompute();
5744 }
5745
5746 /// getSCEVAtScope - Return a SCEV expression for the specified value
5747 /// at the specified scope in the program.  The L value specifies a loop
5748 /// nest to evaluate the expression at, where null is the top-level or a
5749 /// specified loop is immediately inside of the loop.
5750 ///
5751 /// This method can be used to compute the exit value for a variable defined
5752 /// in a loop by querying what the value will hold in the parent loop.
5753 ///
5754 /// In the case that a relevant loop exit value cannot be computed, the
5755 /// original value V is returned.
5756 const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
5757   // Check to see if we've folded this expression at this loop before.
5758   SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
5759   for (unsigned u = 0; u < Values.size(); u++) {
5760     if (Values[u].first == L)
5761       return Values[u].second ? Values[u].second : V;
5762   }
5763   Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
5764   // Otherwise compute it.
5765   const SCEV *C = computeSCEVAtScope(V, L);
5766   SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
5767   for (unsigned u = Values2.size(); u > 0; u--) {
5768     if (Values2[u - 1].first == L) {
5769       Values2[u - 1].second = C;
5770       break;
5771     }
5772   }
5773   return C;
5774 }
5775
5776 /// This builds up a Constant using the ConstantExpr interface.  That way, we
5777 /// will return Constants for objects which aren't represented by a
5778 /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
5779 /// Returns NULL if the SCEV isn't representable as a Constant.
5780 static Constant *BuildConstantFromSCEV(const SCEV *V) {
5781   switch (static_cast<SCEVTypes>(V->getSCEVType())) {
5782     case scCouldNotCompute:
5783     case scAddRecExpr:
5784       break;
5785     case scConstant:
5786       return cast<SCEVConstant>(V)->getValue();
5787     case scUnknown:
5788       return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
5789     case scSignExtend: {
5790       const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
5791       if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
5792         return ConstantExpr::getSExt(CastOp, SS->getType());
5793       break;
5794     }
5795     case scZeroExtend: {
5796       const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
5797       if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
5798         return ConstantExpr::getZExt(CastOp, SZ->getType());
5799       break;
5800     }
5801     case scTruncate: {
5802       const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
5803       if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
5804         return ConstantExpr::getTrunc(CastOp, ST->getType());
5805       break;
5806     }
5807     case scAddExpr: {
5808       const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
5809       if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
5810         if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
5811           unsigned AS = PTy->getAddressSpace();
5812           Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
5813           C = ConstantExpr::getBitCast(C, DestPtrTy);
5814         }
5815         for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
5816           Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
5817           if (!C2) return nullptr;
5818
5819           // First pointer!
5820           if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
5821             unsigned AS = C2->getType()->getPointerAddressSpace();
5822             std::swap(C, C2);
5823             Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
5824             // The offsets have been converted to bytes.  We can add bytes to an
5825             // i8* by GEP with the byte count in the first index.
5826             C = ConstantExpr::getBitCast(C, DestPtrTy);
5827           }
5828
5829           // Don't bother trying to sum two pointers. We probably can't
5830           // statically compute a load that results from it anyway.
5831           if (C2->getType()->isPointerTy())
5832             return nullptr;
5833
5834           if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
5835             if (PTy->getElementType()->isStructTy())
5836               C2 = ConstantExpr::getIntegerCast(
5837                   C2, Type::getInt32Ty(C->getContext()), true);
5838             C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
5839           } else
5840             C = ConstantExpr::getAdd(C, C2);
5841         }
5842         return C;
5843       }
5844       break;
5845     }
5846     case scMulExpr: {
5847       const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
5848       if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
5849         // Don't bother with pointers at all.
5850         if (C->getType()->isPointerTy()) return nullptr;
5851         for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
5852           Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
5853           if (!C2 || C2->getType()->isPointerTy()) return nullptr;
5854           C = ConstantExpr::getMul(C, C2);
5855         }
5856         return C;
5857       }
5858       break;
5859     }
5860     case scUDivExpr: {
5861       const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
5862       if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
5863         if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
5864           if (LHS->getType() == RHS->getType())
5865             return ConstantExpr::getUDiv(LHS, RHS);
5866       break;
5867     }
5868     case scSMaxExpr:
5869     case scUMaxExpr:
5870       break; // TODO: smax, umax.
5871   }
5872   return nullptr;
5873 }
5874
5875 const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
5876   if (isa<SCEVConstant>(V)) return V;
5877
5878   // If this instruction is evolved from a constant-evolving PHI, compute the
5879   // exit value from the loop without using SCEVs.
5880   if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
5881     if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
5882       const Loop *LI = this->LI[I->getParent()];
5883       if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.
5884         if (PHINode *PN = dyn_cast<PHINode>(I))
5885           if (PN->getParent() == LI->getHeader()) {
5886             // Okay, there is no closed form solution for the PHI node.  Check
5887             // to see if the loop that contains it has a known backedge-taken
5888             // count.  If so, we may be able to force computation of the exit
5889             // value.
5890             const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
5891             if (const SCEVConstant *BTCC =
5892                   dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
5893               // Okay, we know how many times the containing loop executes.  If
5894               // this is a constant evolving PHI node, get the final value at
5895               // the specified iteration number.
5896               Constant *RV = getConstantEvolutionLoopExitValue(PN,
5897                                                    BTCC->getValue()->getValue(),
5898                                                                LI);
5899               if (RV) return getSCEV(RV);
5900             }
5901           }
5902
5903       // Okay, this is an expression that we cannot symbolically evaluate
5904       // into a SCEV.  Check to see if it's possible to symbolically evaluate
5905       // the arguments into constants, and if so, try to constant propagate the
5906       // result.  This is particularly useful for computing loop exit values.
5907       if (CanConstantFold(I)) {
5908         SmallVector<Constant *, 4> Operands;
5909         bool MadeImprovement = false;
5910         for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
5911           Value *Op = I->getOperand(i);
5912           if (Constant *C = dyn_cast<Constant>(Op)) {
5913             Operands.push_back(C);
5914             continue;
5915           }
5916
5917           // If any of the operands is non-constant and if they are
5918           // non-integer and non-pointer, don't even try to analyze them
5919           // with scev techniques.
5920           if (!isSCEVable(Op->getType()))
5921             return V;
5922
5923           const SCEV *OrigV = getSCEV(Op);
5924           const SCEV *OpV = getSCEVAtScope(OrigV, L);
5925           MadeImprovement |= OrigV != OpV;
5926
5927           Constant *C = BuildConstantFromSCEV(OpV);
5928           if (!C) return V;
5929           if (C->getType() != Op->getType())
5930             C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
5931                                                               Op->getType(),
5932                                                               false),
5933                                       C, Op->getType());
5934           Operands.push_back(C);
5935         }
5936
5937         // Check to see if getSCEVAtScope actually made an improvement.
5938         if (MadeImprovement) {
5939           Constant *C = nullptr;
5940           const DataLayout &DL = F.getParent()->getDataLayout();
5941           if (const CmpInst *CI = dyn_cast<CmpInst>(I))
5942             C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
5943                                                 Operands[1], DL, &TLI);
5944           else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
5945             if (!LI->isVolatile())
5946               C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
5947           } else
5948             C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
5949                                          DL, &TLI);
5950           if (!C) return V;
5951           return getSCEV(C);
5952         }
5953       }
5954     }
5955
5956     // This is some other type of SCEVUnknown, just return it.
5957     return V;
5958   }
5959
5960   if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
5961     // Avoid performing the look-up in the common case where the specified
5962     // expression has no loop-variant portions.
5963     for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
5964       const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
5965       if (OpAtScope != Comm->getOperand(i)) {
5966         // Okay, at least one of these operands is loop variant but might be
5967         // foldable.  Build a new instance of the folded commutative expression.
5968         SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
5969                                             Comm->op_begin()+i);
5970         NewOps.push_back(OpAtScope);
5971
5972         for (++i; i != e; ++i) {
5973           OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
5974           NewOps.push_back(OpAtScope);
5975         }
5976         if (isa<SCEVAddExpr>(Comm))
5977           return getAddExpr(NewOps);
5978         if (isa<SCEVMulExpr>(Comm))
5979           return getMulExpr(NewOps);
5980         if (isa<SCEVSMaxExpr>(Comm))
5981           return getSMaxExpr(NewOps);
5982         if (isa<SCEVUMaxExpr>(Comm))
5983           return getUMaxExpr(NewOps);
5984         llvm_unreachable("Unknown commutative SCEV type!");
5985       }
5986     }
5987     // If we got here, all operands are loop invariant.
5988     return Comm;
5989   }
5990
5991   if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
5992     const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
5993     const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
5994     if (LHS == Div->getLHS() && RHS == Div->getRHS())
5995       return Div;   // must be loop invariant
5996     return getUDivExpr(LHS, RHS);
5997   }
5998
5999   // If this is a loop recurrence for a loop that does not contain L, then we
6000   // are dealing with the final value computed by the loop.
6001   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
6002     // First, attempt to evaluate each operand.
6003     // Avoid performing the look-up in the common case where the specified
6004     // expression has no loop-variant portions.
6005     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
6006       const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
6007       if (OpAtScope == AddRec->getOperand(i))
6008         continue;
6009
6010       // Okay, at least one of these operands is loop variant but might be
6011       // foldable.  Build a new instance of the folded commutative expression.
6012       SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
6013                                           AddRec->op_begin()+i);
6014       NewOps.push_back(OpAtScope);
6015       for (++i; i != e; ++i)
6016         NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
6017
6018       const SCEV *FoldedRec =
6019         getAddRecExpr(NewOps, AddRec->getLoop(),
6020                       AddRec->getNoWrapFlags(SCEV::FlagNW));
6021       AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
6022       // The addrec may be folded to a nonrecurrence, for example, if the
6023       // induction variable is multiplied by zero after constant folding. Go
6024       // ahead and return the folded value.
6025       if (!AddRec)
6026         return FoldedRec;
6027       break;
6028     }
6029
6030     // If the scope is outside the addrec's loop, evaluate it by using the
6031     // loop exit value of the addrec.
6032     if (!AddRec->getLoop()->contains(L)) {
6033       // To evaluate this recurrence, we need to know how many times the AddRec
6034       // loop iterates.  Compute this now.
6035       const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
6036       if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
6037
6038       // Then, evaluate the AddRec.
6039       return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
6040     }
6041
6042     return AddRec;
6043   }
6044
6045   if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
6046     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
6047     if (Op == Cast->getOperand())
6048       return Cast;  // must be loop invariant
6049     return getZeroExtendExpr(Op, Cast->getType());
6050   }
6051
6052   if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
6053     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
6054     if (Op == Cast->getOperand())
6055       return Cast;  // must be loop invariant
6056     return getSignExtendExpr(Op, Cast->getType());
6057   }
6058
6059   if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
6060     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
6061     if (Op == Cast->getOperand())
6062       return Cast;  // must be loop invariant
6063     return getTruncateExpr(Op, Cast->getType());
6064   }
6065
6066   llvm_unreachable("Unknown SCEV type!");
6067 }
6068
6069 /// getSCEVAtScope - This is a convenience function which does
6070 /// getSCEVAtScope(getSCEV(V), L).
6071 const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
6072   return getSCEVAtScope(getSCEV(V), L);
6073 }
6074
6075 /// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
6076 /// following equation:
6077 ///
6078 ///     A * X = B (mod N)
6079 ///
6080 /// where N = 2^BW and BW is the common bit width of A and B. The signedness of
6081 /// A and B isn't important.
6082 ///
6083 /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
6084 static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
6085                                                ScalarEvolution &SE) {
6086   uint32_t BW = A.getBitWidth();
6087   assert(BW == B.getBitWidth() && "Bit widths must be the same.");
6088   assert(A != 0 && "A must be non-zero.");
6089
6090   // 1. D = gcd(A, N)
6091   //
6092   // The gcd of A and N may have only one prime factor: 2. The number of
6093   // trailing zeros in A is its multiplicity
6094   uint32_t Mult2 = A.countTrailingZeros();
6095   // D = 2^Mult2
6096
6097   // 2. Check if B is divisible by D.
6098   //
6099   // B is divisible by D if and only if the multiplicity of prime factor 2 for B
6100   // is not less than multiplicity of this prime factor for D.
6101   if (B.countTrailingZeros() < Mult2)
6102     return SE.getCouldNotCompute();
6103
6104   // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
6105   // modulo (N / D).
6106   //
6107   // (N / D) may need BW+1 bits in its representation.  Hence, we'll use this
6108   // bit width during computations.
6109   APInt AD = A.lshr(Mult2).zext(BW + 1);  // AD = A / D
6110   APInt Mod(BW + 1, 0);
6111   Mod.setBit(BW - Mult2);  // Mod = N / D
6112   APInt I = AD.multiplicativeInverse(Mod);
6113
6114   // 4. Compute the minimum unsigned root of the equation:
6115   // I * (B / D) mod (N / D)
6116   APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
6117
6118   // The result is guaranteed to be less than 2^BW so we may truncate it to BW
6119   // bits.
6120   return SE.getConstant(Result.trunc(BW));
6121 }
6122
6123 /// SolveQuadraticEquation - Find the roots of the quadratic equation for the
6124 /// given quadratic chrec {L,+,M,+,N}.  This returns either the two roots (which
6125 /// might be the same) or two SCEVCouldNotCompute objects.
6126 ///
6127 static std::pair<const SCEV *,const SCEV *>
6128 SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
6129   assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
6130   const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
6131   const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
6132   const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
6133
6134   // We currently can only solve this if the coefficients are constants.
6135   if (!LC || !MC || !NC) {
6136     const SCEV *CNC = SE.getCouldNotCompute();
6137     return std::make_pair(CNC, CNC);
6138   }
6139
6140   uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
6141   const APInt &L = LC->getValue()->getValue();
6142   const APInt &M = MC->getValue()->getValue();
6143   const APInt &N = NC->getValue()->getValue();
6144   APInt Two(BitWidth, 2);
6145   APInt Four(BitWidth, 4);
6146
6147   {
6148     using namespace APIntOps;
6149     const APInt& C = L;
6150     // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
6151     // The B coefficient is M-N/2
6152     APInt B(M);
6153     B -= sdiv(N,Two);
6154
6155     // The A coefficient is N/2
6156     APInt A(N.sdiv(Two));
6157
6158     // Compute the B^2-4ac term.
6159     APInt SqrtTerm(B);
6160     SqrtTerm *= B;
6161     SqrtTerm -= Four * (A * C);
6162
6163     if (SqrtTerm.isNegative()) {
6164       // The loop is provably infinite.
6165       const SCEV *CNC = SE.getCouldNotCompute();
6166       return std::make_pair(CNC, CNC);
6167     }
6168
6169     // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
6170     // integer value or else APInt::sqrt() will assert.
6171     APInt SqrtVal(SqrtTerm.sqrt());
6172
6173     // Compute the two solutions for the quadratic formula.
6174     // The divisions must be performed as signed divisions.
6175     APInt NegB(-B);
6176     APInt TwoA(A << 1);
6177     if (TwoA.isMinValue()) {
6178       const SCEV *CNC = SE.getCouldNotCompute();
6179       return std::make_pair(CNC, CNC);
6180     }
6181
6182     LLVMContext &Context = SE.getContext();
6183
6184     ConstantInt *Solution1 =
6185       ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
6186     ConstantInt *Solution2 =
6187       ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
6188
6189     return std::make_pair(SE.getConstant(Solution1),
6190                           SE.getConstant(Solution2));
6191   } // end APIntOps namespace
6192 }
6193
6194 /// HowFarToZero - Return the number of times a backedge comparing the specified
6195 /// value to zero will execute.  If not computable, return CouldNotCompute.
6196 ///
6197 /// This is only used for loops with a "x != y" exit test. The exit condition is
6198 /// now expressed as a single expression, V = x-y. So the exit test is
6199 /// effectively V != 0.  We know and take advantage of the fact that this
6200 /// expression only being used in a comparison by zero context.
6201 ScalarEvolution::ExitLimit
6202 ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
6203   // If the value is a constant
6204   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
6205     // If the value is already zero, the branch will execute zero times.
6206     if (C->getValue()->isZero()) return C;
6207     return getCouldNotCompute();  // Otherwise it will loop infinitely.
6208   }
6209
6210   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
6211   if (!AddRec || AddRec->getLoop() != L)
6212     return getCouldNotCompute();
6213
6214   // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
6215   // the quadratic equation to solve it.
6216   if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
6217     std::pair<const SCEV *,const SCEV *> Roots =
6218       SolveQuadraticEquation(AddRec, *this);
6219     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
6220     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
6221     if (R1 && R2) {
6222 #if 0
6223       dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
6224              << "  sol#2: " << *R2 << "\n";
6225 #endif
6226       // Pick the smallest positive root value.
6227       if (ConstantInt *CB =
6228           dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
6229                                                       R1->getValue(),
6230                                                       R2->getValue()))) {
6231         if (!CB->getZExtValue())
6232           std::swap(R1, R2);   // R1 is the minimum root now.
6233
6234         // We can only use this value if the chrec ends up with an exact zero
6235         // value at this index.  When solving for "X*X != 5", for example, we
6236         // should not accept a root of 2.
6237         const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
6238         if (Val->isZero())
6239           return R1;  // We found a quadratic root!
6240       }
6241     }
6242     return getCouldNotCompute();
6243   }
6244
6245   // Otherwise we can only handle this if it is affine.
6246   if (!AddRec->isAffine())
6247     return getCouldNotCompute();
6248
6249   // If this is an affine expression, the execution count of this branch is
6250   // the minimum unsigned root of the following equation:
6251   //
6252   //     Start + Step*N = 0 (mod 2^BW)
6253   //
6254   // equivalent to:
6255   //
6256   //             Step*N = -Start (mod 2^BW)
6257   //
6258   // where BW is the common bit width of Start and Step.
6259
6260   // Get the initial value for the loop.
6261   const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
6262   const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
6263
6264   // For now we handle only constant steps.
6265   //
6266   // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
6267   // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
6268   // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
6269   // We have not yet seen any such cases.
6270   const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
6271   if (!StepC || StepC->getValue()->equalsInt(0))
6272     return getCouldNotCompute();
6273
6274   // For positive steps (counting up until unsigned overflow):
6275   //   N = -Start/Step (as unsigned)
6276   // For negative steps (counting down to zero):
6277   //   N = Start/-Step
6278   // First compute the unsigned distance from zero in the direction of Step.
6279   bool CountDown = StepC->getValue()->getValue().isNegative();
6280   const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
6281
6282   // Handle unitary steps, which cannot wraparound.
6283   // 1*N = -Start; -1*N = Start (mod 2^BW), so:
6284   //   N = Distance (as unsigned)
6285   if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
6286     ConstantRange CR = getUnsignedRange(Start);
6287     const SCEV *MaxBECount;
6288     if (!CountDown && CR.getUnsignedMin().isMinValue())
6289       // When counting up, the worst starting value is 1, not 0.
6290       MaxBECount = CR.getUnsignedMax().isMinValue()
6291         ? getConstant(APInt::getMinValue(CR.getBitWidth()))
6292         : getConstant(APInt::getMaxValue(CR.getBitWidth()));
6293     else
6294       MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
6295                                          : -CR.getUnsignedMin());
6296     return ExitLimit(Distance, MaxBECount);
6297   }
6298
6299   // As a special case, handle the instance where Step is a positive power of
6300   // two. In this case, determining whether Step divides Distance evenly can be
6301   // done by counting and comparing the number of trailing zeros of Step and
6302   // Distance.
6303   if (!CountDown) {
6304     const APInt &StepV = StepC->getValue()->getValue();
6305     // StepV.isPowerOf2() returns true if StepV is an positive power of two.  It
6306     // also returns true if StepV is maximally negative (eg, INT_MIN), but that
6307     // case is not handled as this code is guarded by !CountDown.
6308     if (StepV.isPowerOf2() &&
6309         GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros())
6310       return getUDivExactExpr(Distance, Step);
6311   }
6312
6313   // If the condition controls loop exit (the loop exits only if the expression
6314   // is true) and the addition is no-wrap we can use unsigned divide to
6315   // compute the backedge count.  In this case, the step may not divide the
6316   // distance, but we don't care because if the condition is "missed" the loop
6317   // will have undefined behavior due to wrapping.
6318   if (ControlsExit && AddRec->getNoWrapFlags(SCEV::FlagNW)) {
6319     const SCEV *Exact =
6320         getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
6321     return ExitLimit(Exact, Exact);
6322   }
6323
6324   // Then, try to solve the above equation provided that Start is constant.
6325   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
6326     return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
6327                                         -StartC->getValue()->getValue(),
6328                                         *this);
6329   return getCouldNotCompute();
6330 }
6331
6332 /// HowFarToNonZero - Return the number of times a backedge checking the
6333 /// specified value for nonzero will execute.  If not computable, return
6334 /// CouldNotCompute
6335 ScalarEvolution::ExitLimit
6336 ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
6337   // Loops that look like: while (X == 0) are very strange indeed.  We don't
6338   // handle them yet except for the trivial case.  This could be expanded in the
6339   // future as needed.
6340
6341   // If the value is a constant, check to see if it is known to be non-zero
6342   // already.  If so, the backedge will execute zero times.
6343   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
6344     if (!C->getValue()->isNullValue())
6345       return getConstant(C->getType(), 0);
6346     return getCouldNotCompute();  // Otherwise it will loop infinitely.
6347   }
6348
6349   // We could implement others, but I really doubt anyone writes loops like
6350   // this, and if they did, they would already be constant folded.
6351   return getCouldNotCompute();
6352 }
6353
6354 /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
6355 /// (which may not be an immediate predecessor) which has exactly one
6356 /// successor from which BB is reachable, or null if no such block is
6357 /// found.
6358 ///
6359 std::pair<BasicBlock *, BasicBlock *>
6360 ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
6361   // If the block has a unique predecessor, then there is no path from the
6362   // predecessor to the block that does not go through the direct edge
6363   // from the predecessor to the block.
6364   if (BasicBlock *Pred = BB->getSinglePredecessor())
6365     return std::make_pair(Pred, BB);
6366
6367   // A loop's header is defined to be a block that dominates the loop.
6368   // If the header has a unique predecessor outside the loop, it must be
6369   // a block that has exactly one successor that can reach the loop.
6370   if (Loop *L = LI.getLoopFor(BB))
6371     return std::make_pair(L->getLoopPredecessor(), L->getHeader());
6372
6373   return std::pair<BasicBlock *, BasicBlock *>();
6374 }
6375
6376 /// HasSameValue - SCEV structural equivalence is usually sufficient for
6377 /// testing whether two expressions are equal, however for the purposes of
6378 /// looking for a condition guarding a loop, it can be useful to be a little
6379 /// more general, since a front-end may have replicated the controlling
6380 /// expression.
6381 ///
6382 static bool HasSameValue(const SCEV *A, const SCEV *B) {
6383   // Quick check to see if they are the same SCEV.
6384   if (A == B) return true;
6385
6386   // Otherwise, if they're both SCEVUnknown, it's possible that they hold
6387   // two different instructions with the same value. Check for this case.
6388   if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
6389     if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
6390       if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
6391         if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
6392           if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
6393             return true;
6394
6395   // Otherwise assume they may have a different value.
6396   return false;
6397 }
6398
6399 /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
6400 /// predicate Pred. Return true iff any changes were made.
6401 ///
6402 bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
6403                                            const SCEV *&LHS, const SCEV *&RHS,
6404                                            unsigned Depth) {
6405   bool Changed = false;
6406
6407   // If we hit the max recursion limit bail out.
6408   if (Depth >= 3)
6409     return false;
6410
6411   // Canonicalize a constant to the right side.
6412   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
6413     // Check for both operands constant.
6414     if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
6415       if (ConstantExpr::getICmp(Pred,
6416                                 LHSC->getValue(),
6417                                 RHSC->getValue())->isNullValue())
6418         goto trivially_false;
6419       else
6420         goto trivially_true;
6421     }
6422     // Otherwise swap the operands to put the constant on the right.
6423     std::swap(LHS, RHS);
6424     Pred = ICmpInst::getSwappedPredicate(Pred);
6425     Changed = true;
6426   }
6427
6428   // If we're comparing an addrec with a value which is loop-invariant in the
6429   // addrec's loop, put the addrec on the left. Also make a dominance check,
6430   // as both operands could be addrecs loop-invariant in each other's loop.
6431   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
6432     const Loop *L = AR->getLoop();
6433     if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
6434       std::swap(LHS, RHS);
6435       Pred = ICmpInst::getSwappedPredicate(Pred);
6436       Changed = true;
6437     }
6438   }
6439
6440   // If there's a constant operand, canonicalize comparisons with boundary
6441   // cases, and canonicalize *-or-equal comparisons to regular comparisons.
6442   if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
6443     const APInt &RA = RC->getValue()->getValue();
6444     switch (Pred) {
6445     default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
6446     case ICmpInst::ICMP_EQ:
6447     case ICmpInst::ICMP_NE:
6448       // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
6449       if (!RA)
6450         if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
6451           if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
6452             if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
6453                 ME->getOperand(0)->isAllOnesValue()) {
6454               RHS = AE->getOperand(1);
6455               LHS = ME->getOperand(1);
6456               Changed = true;
6457             }
6458       break;
6459     case ICmpInst::ICMP_UGE:
6460       if ((RA - 1).isMinValue()) {
6461         Pred = ICmpInst::ICMP_NE;
6462         RHS = getConstant(RA - 1);
6463         Changed = true;
6464         break;
6465       }
6466       if (RA.isMaxValue()) {
6467         Pred = ICmpInst::ICMP_EQ;
6468         Changed = true;
6469         break;
6470       }
6471       if (RA.isMinValue()) goto trivially_true;
6472
6473       Pred = ICmpInst::ICMP_UGT;
6474       RHS = getConstant(RA - 1);
6475       Changed = true;
6476       break;
6477     case ICmpInst::ICMP_ULE:
6478       if ((RA + 1).isMaxValue()) {
6479         Pred = ICmpInst::ICMP_NE;
6480         RHS = getConstant(RA + 1);
6481         Changed = true;
6482         break;
6483       }
6484       if (RA.isMinValue()) {
6485         Pred = ICmpInst::ICMP_EQ;
6486         Changed = true;
6487         break;
6488       }
6489       if (RA.isMaxValue()) goto trivially_true;
6490
6491       Pred = ICmpInst::ICMP_ULT;
6492       RHS = getConstant(RA + 1);
6493       Changed = true;
6494       break;
6495     case ICmpInst::ICMP_SGE:
6496       if ((RA - 1).isMinSignedValue()) {
6497         Pred = ICmpInst::ICMP_NE;
6498         RHS = getConstant(RA - 1);
6499         Changed = true;
6500         break;
6501       }
6502       if (RA.isMaxSignedValue()) {
6503         Pred = ICmpInst::ICMP_EQ;
6504         Changed = true;
6505         break;
6506       }
6507       if (RA.isMinSignedValue()) goto trivially_true;
6508
6509       Pred = ICmpInst::ICMP_SGT;
6510       RHS = getConstant(RA - 1);
6511       Changed = true;
6512       break;
6513     case ICmpInst::ICMP_SLE:
6514       if ((RA + 1).isMaxSignedValue()) {
6515         Pred = ICmpInst::ICMP_NE;
6516         RHS = getConstant(RA + 1);
6517         Changed = true;
6518         break;
6519       }
6520       if (RA.isMinSignedValue()) {
6521         Pred = ICmpInst::ICMP_EQ;
6522         Changed = true;
6523         break;
6524       }
6525       if (RA.isMaxSignedValue()) goto trivially_true;
6526
6527       Pred = ICmpInst::ICMP_SLT;
6528       RHS = getConstant(RA + 1);
6529       Changed = true;
6530       break;
6531     case ICmpInst::ICMP_UGT:
6532       if (RA.isMinValue()) {
6533         Pred = ICmpInst::ICMP_NE;
6534         Changed = true;
6535         break;
6536       }
6537       if ((RA + 1).isMaxValue()) {
6538         Pred = ICmpInst::ICMP_EQ;
6539         RHS = getConstant(RA + 1);
6540         Changed = true;
6541         break;
6542       }
6543       if (RA.isMaxValue()) goto trivially_false;
6544       break;
6545     case ICmpInst::ICMP_ULT:
6546       if (RA.isMaxValue()) {
6547         Pred = ICmpInst::ICMP_NE;
6548         Changed = true;
6549         break;
6550       }
6551       if ((RA - 1).isMinValue()) {
6552         Pred = ICmpInst::ICMP_EQ;
6553         RHS = getConstant(RA - 1);
6554         Changed = true;
6555         break;
6556       }
6557       if (RA.isMinValue()) goto trivially_false;
6558       break;
6559     case ICmpInst::ICMP_SGT:
6560       if (RA.isMinSignedValue()) {
6561         Pred = ICmpInst::ICMP_NE;
6562         Changed = true;
6563         break;
6564       }
6565       if ((RA + 1).isMaxSignedValue()) {
6566         Pred = ICmpInst::ICMP_EQ;
6567         RHS = getConstant(RA + 1);
6568         Changed = true;
6569         break;
6570       }
6571       if (RA.isMaxSignedValue()) goto trivially_false;
6572       break;
6573     case ICmpInst::ICMP_SLT:
6574       if (RA.isMaxSignedValue()) {
6575         Pred = ICmpInst::ICMP_NE;
6576         Changed = true;
6577         break;
6578       }
6579       if ((RA - 1).isMinSignedValue()) {
6580        Pred = ICmpInst::ICMP_EQ;
6581        RHS = getConstant(RA - 1);
6582         Changed = true;
6583        break;
6584       }
6585       if (RA.isMinSignedValue()) goto trivially_false;
6586       break;
6587     }
6588   }
6589
6590   // Check for obvious equality.
6591   if (HasSameValue(LHS, RHS)) {
6592     if (ICmpInst::isTrueWhenEqual(Pred))
6593       goto trivially_true;
6594     if (ICmpInst::isFalseWhenEqual(Pred))
6595       goto trivially_false;
6596   }
6597
6598   // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
6599   // adding or subtracting 1 from one of the operands.
6600   switch (Pred) {
6601   case ICmpInst::ICMP_SLE:
6602     if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
6603       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
6604                        SCEV::FlagNSW);
6605       Pred = ICmpInst::ICMP_SLT;
6606       Changed = true;
6607     } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
6608       LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
6609                        SCEV::FlagNSW);
6610       Pred = ICmpInst::ICMP_SLT;
6611       Changed = true;
6612     }
6613     break;
6614   case ICmpInst::ICMP_SGE:
6615     if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
6616       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
6617                        SCEV::FlagNSW);
6618       Pred = ICmpInst::ICMP_SGT;
6619       Changed = true;
6620     } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
6621       LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
6622                        SCEV::FlagNSW);
6623       Pred = ICmpInst::ICMP_SGT;
6624       Changed = true;
6625     }
6626     break;
6627   case ICmpInst::ICMP_ULE:
6628     if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
6629       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
6630                        SCEV::FlagNUW);
6631       Pred = ICmpInst::ICMP_ULT;
6632       Changed = true;
6633     } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
6634       LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
6635                        SCEV::FlagNUW);
6636       Pred = ICmpInst::ICMP_ULT;
6637       Changed = true;
6638     }
6639     break;
6640   case ICmpInst::ICMP_UGE:
6641     if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
6642       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
6643                        SCEV::FlagNUW);
6644       Pred = ICmpInst::ICMP_UGT;
6645       Changed = true;
6646     } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
6647       LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
6648                        SCEV::FlagNUW);
6649       Pred = ICmpInst::ICMP_UGT;
6650       Changed = true;
6651     }
6652     break;
6653   default:
6654     break;
6655   }
6656
6657   // TODO: More simplifications are possible here.
6658
6659   // Recursively simplify until we either hit a recursion limit or nothing
6660   // changes.
6661   if (Changed)
6662     return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
6663
6664   return Changed;
6665
6666 trivially_true:
6667   // Return 0 == 0.
6668   LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
6669   Pred = ICmpInst::ICMP_EQ;
6670   return true;
6671
6672 trivially_false:
6673   // Return 0 != 0.
6674   LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
6675   Pred = ICmpInst::ICMP_NE;
6676   return true;
6677 }
6678
6679 bool ScalarEvolution::isKnownNegative(const SCEV *S) {
6680   return getSignedRange(S).getSignedMax().isNegative();
6681 }
6682
6683 bool ScalarEvolution::isKnownPositive(const SCEV *S) {
6684   return getSignedRange(S).getSignedMin().isStrictlyPositive();
6685 }
6686
6687 bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
6688   return !getSignedRange(S).getSignedMin().isNegative();
6689 }
6690
6691 bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
6692   return !getSignedRange(S).getSignedMax().isStrictlyPositive();
6693 }
6694
6695 bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
6696   return isKnownNegative(S) || isKnownPositive(S);
6697 }
6698
6699 bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
6700                                        const SCEV *LHS, const SCEV *RHS) {
6701   // Canonicalize the inputs first.
6702   (void)SimplifyICmpOperands(Pred, LHS, RHS);
6703
6704   // If LHS or RHS is an addrec, check to see if the condition is true in
6705   // every iteration of the loop.
6706   // If LHS and RHS are both addrec, both conditions must be true in
6707   // every iteration of the loop.
6708   const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
6709   const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
6710   bool LeftGuarded = false;
6711   bool RightGuarded = false;
6712   if (LAR) {
6713     const Loop *L = LAR->getLoop();
6714     if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) &&
6715         isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) {
6716       if (!RAR) return true;
6717       LeftGuarded = true;
6718     }
6719   }
6720   if (RAR) {
6721     const Loop *L = RAR->getLoop();
6722     if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) &&
6723         isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) {
6724       if (!LAR) return true;
6725       RightGuarded = true;
6726     }
6727   }
6728   if (LeftGuarded && RightGuarded)
6729     return true;
6730
6731   // Otherwise see what can be done with known constant ranges.
6732   return isKnownPredicateWithRanges(Pred, LHS, RHS);
6733 }
6734
6735 bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
6736                                            ICmpInst::Predicate Pred,
6737                                            bool &Increasing) {
6738   bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing);
6739
6740 #ifndef NDEBUG
6741   // Verify an invariant: inverting the predicate should turn a monotonically
6742   // increasing change to a monotonically decreasing one, and vice versa.
6743   bool IncreasingSwapped;
6744   bool ResultSwapped = isMonotonicPredicateImpl(
6745       LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped);
6746
6747   assert(Result == ResultSwapped && "should be able to analyze both!");
6748   if (ResultSwapped)
6749     assert(Increasing == !IncreasingSwapped &&
6750            "monotonicity should flip as we flip the predicate");
6751 #endif
6752
6753   return Result;
6754 }
6755
6756 bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
6757                                                ICmpInst::Predicate Pred,
6758                                                bool &Increasing) {
6759
6760   // A zero step value for LHS means the induction variable is essentially a
6761   // loop invariant value. We don't really depend on the predicate actually
6762   // flipping from false to true (for increasing predicates, and the other way
6763   // around for decreasing predicates), all we care about is that *if* the
6764   // predicate changes then it only changes from false to true.
6765   //
6766   // A zero step value in itself is not very useful, but there may be places
6767   // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
6768   // as general as possible.
6769
6770   switch (Pred) {
6771   default:
6772     return false; // Conservative answer
6773
6774   case ICmpInst::ICMP_UGT:
6775   case ICmpInst::ICMP_UGE:
6776   case ICmpInst::ICMP_ULT:
6777   case ICmpInst::ICMP_ULE:
6778     if (!LHS->getNoWrapFlags(SCEV::FlagNUW))
6779       return false;
6780
6781     Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE;
6782     return true;
6783
6784   case ICmpInst::ICMP_SGT:
6785   case ICmpInst::ICMP_SGE:
6786   case ICmpInst::ICMP_SLT:
6787   case ICmpInst::ICMP_SLE: {
6788     if (!LHS->getNoWrapFlags(SCEV::FlagNSW))
6789       return false;
6790
6791     const SCEV *Step = LHS->getStepRecurrence(*this);
6792
6793     if (isKnownNonNegative(Step)) {
6794       Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE;
6795       return true;
6796     }
6797
6798     if (isKnownNonPositive(Step)) {
6799       Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
6800       return true;
6801     }
6802
6803     return false;
6804   }
6805
6806   }
6807
6808   llvm_unreachable("switch has default clause!");
6809 }
6810
6811 bool ScalarEvolution::isLoopInvariantPredicate(
6812     ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
6813     ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
6814     const SCEV *&InvariantRHS) {
6815
6816   // If there is a loop-invariant, force it into the RHS, otherwise bail out.
6817   if (!isLoopInvariant(RHS, L)) {
6818     if (!isLoopInvariant(LHS, L))
6819       return false;
6820
6821     std::swap(LHS, RHS);
6822     Pred = ICmpInst::getSwappedPredicate(Pred);
6823   }
6824
6825   const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
6826   if (!ArLHS || ArLHS->getLoop() != L)
6827     return false;
6828
6829   bool Increasing;
6830   if (!isMonotonicPredicate(ArLHS, Pred, Increasing))
6831     return false;
6832
6833   // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
6834   // true as the loop iterates, and the backedge is control dependent on
6835   // "ArLHS `Pred` RHS" == true then we can reason as follows:
6836   //
6837   //   * if the predicate was false in the first iteration then the predicate
6838   //     is never evaluated again, since the loop exits without taking the
6839   //     backedge.
6840   //   * if the predicate was true in the first iteration then it will
6841   //     continue to be true for all future iterations since it is
6842   //     monotonically increasing.
6843   //
6844   // For both the above possibilities, we can replace the loop varying
6845   // predicate with its value on the first iteration of the loop (which is
6846   // loop invariant).
6847   //
6848   // A similar reasoning applies for a monotonically decreasing predicate, by
6849   // replacing true with false and false with true in the above two bullets.
6850
6851   auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
6852
6853   if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
6854     return false;
6855
6856   InvariantPred = Pred;
6857   InvariantLHS = ArLHS->getStart();
6858   InvariantRHS = RHS;
6859   return true;
6860 }
6861
6862 bool
6863 ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
6864                                             const SCEV *LHS, const SCEV *RHS) {
6865   if (HasSameValue(LHS, RHS))
6866     return ICmpInst::isTrueWhenEqual(Pred);
6867
6868   // This code is split out from isKnownPredicate because it is called from
6869   // within isLoopEntryGuardedByCond.
6870   switch (Pred) {
6871   default:
6872     llvm_unreachable("Unexpected ICmpInst::Predicate value!");
6873   case ICmpInst::ICMP_SGT:
6874     std::swap(LHS, RHS);
6875   case ICmpInst::ICMP_SLT: {
6876     ConstantRange LHSRange = getSignedRange(LHS);
6877     ConstantRange RHSRange = getSignedRange(RHS);
6878     if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
6879       return true;
6880     if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
6881       return false;
6882     break;
6883   }
6884   case ICmpInst::ICMP_SGE:
6885     std::swap(LHS, RHS);
6886   case ICmpInst::ICMP_SLE: {
6887     ConstantRange LHSRange = getSignedRange(LHS);
6888     ConstantRange RHSRange = getSignedRange(RHS);
6889     if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
6890       return true;
6891     if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
6892       return false;
6893     break;
6894   }
6895   case ICmpInst::ICMP_UGT:
6896     std::swap(LHS, RHS);
6897   case ICmpInst::ICMP_ULT: {
6898     ConstantRange LHSRange = getUnsignedRange(LHS);
6899     ConstantRange RHSRange = getUnsignedRange(RHS);
6900     if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
6901       return true;
6902     if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
6903       return false;
6904     break;
6905   }
6906   case ICmpInst::ICMP_UGE:
6907     std::swap(LHS, RHS);
6908   case ICmpInst::ICMP_ULE: {
6909     ConstantRange LHSRange = getUnsignedRange(LHS);
6910     ConstantRange RHSRange = getUnsignedRange(RHS);
6911     if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
6912       return true;
6913     if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
6914       return false;
6915     break;
6916   }
6917   case ICmpInst::ICMP_NE: {
6918     if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
6919       return true;
6920     if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
6921       return true;
6922
6923     const SCEV *Diff = getMinusSCEV(LHS, RHS);
6924     if (isKnownNonZero(Diff))
6925       return true;
6926     break;
6927   }
6928   case ICmpInst::ICMP_EQ:
6929     // The check at the top of the function catches the case where
6930     // the values are known to be equal.
6931     break;
6932   }
6933   return false;
6934 }
6935
6936 /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
6937 /// protected by a conditional between LHS and RHS.  This is used to
6938 /// to eliminate casts.
6939 bool
6940 ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
6941                                              ICmpInst::Predicate Pred,
6942                                              const SCEV *LHS, const SCEV *RHS) {
6943   // Interpret a null as meaning no loop, where there is obviously no guard
6944   // (interprocedural conditions notwithstanding).
6945   if (!L) return true;
6946
6947   if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
6948
6949   BasicBlock *Latch = L->getLoopLatch();
6950   if (!Latch)
6951     return false;
6952
6953   BranchInst *LoopContinuePredicate =
6954     dyn_cast<BranchInst>(Latch->getTerminator());
6955   if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
6956       isImpliedCond(Pred, LHS, RHS,
6957                     LoopContinuePredicate->getCondition(),
6958                     LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
6959     return true;
6960
6961   // Check conditions due to any @llvm.assume intrinsics.
6962   for (auto &AssumeVH : AC.assumptions()) {
6963     if (!AssumeVH)
6964       continue;
6965     auto *CI = cast<CallInst>(AssumeVH);
6966     if (!DT.dominates(CI, Latch->getTerminator()))
6967       continue;
6968
6969     if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
6970       return true;
6971   }
6972
6973   struct ClearWalkingBEDominatingCondsOnExit {
6974     ScalarEvolution &SE;
6975
6976     explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE)
6977         : SE(SE){}
6978
6979     ~ClearWalkingBEDominatingCondsOnExit() {
6980       SE.WalkingBEDominatingConds = false;
6981     }
6982   };
6983
6984   // We don't want more than one activation of the following loop on the stack
6985   // -- that can lead to O(n!) time complexity.
6986   if (WalkingBEDominatingConds)
6987     return false;
6988
6989   WalkingBEDominatingConds = true;
6990   ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
6991
6992   // If the loop is not reachable from the entry block, we risk running into an
6993   // infinite loop as we walk up into the dom tree.  These loops do not matter
6994   // anyway, so we just return a conservative answer when we see them.
6995   if (!DT.isReachableFromEntry(L->getHeader()))
6996     return false;
6997
6998   for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
6999        DTN != HeaderDTN; DTN = DTN->getIDom()) {
7000
7001     assert(DTN && "should reach the loop header before reaching the root!");
7002
7003     BasicBlock *BB = DTN->getBlock();
7004     BasicBlock *PBB = BB->getSinglePredecessor();
7005     if (!PBB)
7006       continue;
7007
7008     BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
7009     if (!ContinuePredicate || !ContinuePredicate->isConditional())
7010       continue;
7011
7012     Value *Condition = ContinuePredicate->getCondition();
7013
7014     // If we have an edge `E` within the loop body that dominates the only
7015     // latch, the condition guarding `E` also guards the backedge.  This
7016     // reasoning works only for loops with a single latch.
7017
7018     BasicBlockEdge DominatingEdge(PBB, BB);
7019     if (DominatingEdge.isSingleEdge()) {
7020       // We're constructively (and conservatively) enumerating edges within the
7021       // loop body that dominate the latch.  The dominator tree better agree
7022       // with us on this:
7023       assert(DT.dominates(DominatingEdge, Latch) && "should be!");
7024
7025       if (isImpliedCond(Pred, LHS, RHS, Condition,
7026                         BB != ContinuePredicate->getSuccessor(0)))
7027         return true;
7028     }
7029   }
7030
7031   return false;
7032 }
7033
7034 /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
7035 /// by a conditional between LHS and RHS.  This is used to help avoid max
7036 /// expressions in loop trip counts, and to eliminate casts.
7037 bool
7038 ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
7039                                           ICmpInst::Predicate Pred,
7040                                           const SCEV *LHS, const SCEV *RHS) {
7041   // Interpret a null as meaning no loop, where there is obviously no guard
7042   // (interprocedural conditions notwithstanding).
7043   if (!L) return false;
7044
7045   if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
7046
7047   // Starting at the loop predecessor, climb up the predecessor chain, as long
7048   // as there are predecessors that can be found that have unique successors
7049   // leading to the original header.
7050   for (std::pair<BasicBlock *, BasicBlock *>
7051          Pair(L->getLoopPredecessor(), L->getHeader());
7052        Pair.first;
7053        Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
7054
7055     BranchInst *LoopEntryPredicate =
7056       dyn_cast<BranchInst>(Pair.first->getTerminator());
7057     if (!LoopEntryPredicate ||
7058         LoopEntryPredicate->isUnconditional())
7059       continue;
7060
7061     if (isImpliedCond(Pred, LHS, RHS,
7062                       LoopEntryPredicate->getCondition(),
7063                       LoopEntryPredicate->getSuccessor(0) != Pair.second))
7064       return true;
7065   }
7066
7067   // Check conditions due to any @llvm.assume intrinsics.
7068   for (auto &AssumeVH : AC.assumptions()) {
7069     if (!AssumeVH)
7070       continue;
7071     auto *CI = cast<CallInst>(AssumeVH);
7072     if (!DT.dominates(CI, L->getHeader()))
7073       continue;
7074
7075     if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
7076       return true;
7077   }
7078
7079   return false;
7080 }
7081
7082 /// RAII wrapper to prevent recursive application of isImpliedCond.
7083 /// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
7084 /// currently evaluating isImpliedCond.
7085 struct MarkPendingLoopPredicate {
7086   Value *Cond;
7087   DenseSet<Value*> &LoopPreds;
7088   bool Pending;
7089
7090   MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
7091     : Cond(C), LoopPreds(LP) {
7092     Pending = !LoopPreds.insert(Cond).second;
7093   }
7094   ~MarkPendingLoopPredicate() {
7095     if (!Pending)
7096       LoopPreds.erase(Cond);
7097   }
7098 };
7099
7100 /// isImpliedCond - Test whether the condition described by Pred, LHS,
7101 /// and RHS is true whenever the given Cond value evaluates to true.
7102 bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
7103                                     const SCEV *LHS, const SCEV *RHS,
7104                                     Value *FoundCondValue,
7105                                     bool Inverse) {
7106   MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
7107   if (Mark.Pending)
7108     return false;
7109
7110   // Recursively handle And and Or conditions.
7111   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
7112     if (BO->getOpcode() == Instruction::And) {
7113       if (!Inverse)
7114         return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
7115                isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
7116     } else if (BO->getOpcode() == Instruction::Or) {
7117       if (Inverse)
7118         return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
7119                isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
7120     }
7121   }
7122
7123   ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
7124   if (!ICI) return false;
7125
7126   // Now that we found a conditional branch that dominates the loop or controls
7127   // the loop latch. Check to see if it is the comparison we are looking for.
7128   ICmpInst::Predicate FoundPred;
7129   if (Inverse)
7130     FoundPred = ICI->getInversePredicate();
7131   else
7132     FoundPred = ICI->getPredicate();
7133
7134   const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
7135   const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
7136
7137   // Balance the types.
7138   if (getTypeSizeInBits(LHS->getType()) <
7139       getTypeSizeInBits(FoundLHS->getType())) {
7140     if (CmpInst::isSigned(Pred)) {
7141       LHS = getSignExtendExpr(LHS, FoundLHS->getType());
7142       RHS = getSignExtendExpr(RHS, FoundLHS->getType());
7143     } else {
7144       LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
7145       RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
7146     }
7147   } else if (getTypeSizeInBits(LHS->getType()) >
7148       getTypeSizeInBits(FoundLHS->getType())) {
7149     if (CmpInst::isSigned(FoundPred)) {
7150       FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
7151       FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
7152     } else {
7153       FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
7154       FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
7155     }
7156   }
7157
7158   // Canonicalize the query to match the way instcombine will have
7159   // canonicalized the comparison.
7160   if (SimplifyICmpOperands(Pred, LHS, RHS))
7161     if (LHS == RHS)
7162       return CmpInst::isTrueWhenEqual(Pred);
7163   if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
7164     if (FoundLHS == FoundRHS)
7165       return CmpInst::isFalseWhenEqual(FoundPred);
7166
7167   // Check to see if we can make the LHS or RHS match.
7168   if (LHS == FoundRHS || RHS == FoundLHS) {
7169     if (isa<SCEVConstant>(RHS)) {
7170       std::swap(FoundLHS, FoundRHS);
7171       FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
7172     } else {
7173       std::swap(LHS, RHS);
7174       Pred = ICmpInst::getSwappedPredicate(Pred);
7175     }
7176   }
7177
7178   // Check whether the found predicate is the same as the desired predicate.
7179   if (FoundPred == Pred)
7180     return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
7181
7182   // Check whether swapping the found predicate makes it the same as the
7183   // desired predicate.
7184   if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
7185     if (isa<SCEVConstant>(RHS))
7186       return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
7187     else
7188       return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
7189                                    RHS, LHS, FoundLHS, FoundRHS);
7190   }
7191
7192   // Check if we can make progress by sharpening ranges.
7193   if (FoundPred == ICmpInst::ICMP_NE &&
7194       (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
7195
7196     const SCEVConstant *C = nullptr;
7197     const SCEV *V = nullptr;
7198
7199     if (isa<SCEVConstant>(FoundLHS)) {
7200       C = cast<SCEVConstant>(FoundLHS);
7201       V = FoundRHS;
7202     } else {
7203       C = cast<SCEVConstant>(FoundRHS);
7204       V = FoundLHS;
7205     }
7206
7207     // The guarding predicate tells us that C != V. If the known range
7208     // of V is [C, t), we can sharpen the range to [C + 1, t).  The
7209     // range we consider has to correspond to same signedness as the
7210     // predicate we're interested in folding.
7211
7212     APInt Min = ICmpInst::isSigned(Pred) ?
7213         getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
7214
7215     if (Min == C->getValue()->getValue()) {
7216       // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
7217       // This is true even if (Min + 1) wraps around -- in case of
7218       // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
7219
7220       APInt SharperMin = Min + 1;
7221
7222       switch (Pred) {
7223         case ICmpInst::ICMP_SGE:
7224         case ICmpInst::ICMP_UGE:
7225           // We know V `Pred` SharperMin.  If this implies LHS `Pred`
7226           // RHS, we're done.
7227           if (isImpliedCondOperands(Pred, LHS, RHS, V,
7228                                     getConstant(SharperMin)))
7229             return true;
7230
7231         case ICmpInst::ICMP_SGT:
7232         case ICmpInst::ICMP_UGT:
7233           // We know from the range information that (V `Pred` Min ||
7234           // V == Min).  We know from the guarding condition that !(V
7235           // == Min).  This gives us
7236           //
7237           //       V `Pred` Min || V == Min && !(V == Min)
7238           //   =>  V `Pred` Min
7239           //
7240           // If V `Pred` Min implies LHS `Pred` RHS, we're done.
7241
7242           if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min)))
7243             return true;
7244
7245         default:
7246           // No change
7247           break;
7248       }
7249     }
7250   }
7251
7252   // Check whether the actual condition is beyond sufficient.
7253   if (FoundPred == ICmpInst::ICMP_EQ)
7254     if (ICmpInst::isTrueWhenEqual(Pred))
7255       if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
7256         return true;
7257   if (Pred == ICmpInst::ICMP_NE)
7258     if (!ICmpInst::isTrueWhenEqual(FoundPred))
7259       if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
7260         return true;
7261
7262   // Otherwise assume the worst.
7263   return false;
7264 }
7265
7266 /// isImpliedCondOperands - Test whether the condition described by Pred,
7267 /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
7268 /// and FoundRHS is true.
7269 bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
7270                                             const SCEV *LHS, const SCEV *RHS,
7271                                             const SCEV *FoundLHS,
7272                                             const SCEV *FoundRHS) {
7273   if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
7274     return true;
7275
7276   return isImpliedCondOperandsHelper(Pred, LHS, RHS,
7277                                      FoundLHS, FoundRHS) ||
7278          // ~x < ~y --> x > y
7279          isImpliedCondOperandsHelper(Pred, LHS, RHS,
7280                                      getNotSCEV(FoundRHS),
7281                                      getNotSCEV(FoundLHS));
7282 }
7283
7284
7285 /// If Expr computes ~A, return A else return nullptr
7286 static const SCEV *MatchNotExpr(const SCEV *Expr) {
7287   const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
7288   if (!Add || Add->getNumOperands() != 2) return nullptr;
7289
7290   const SCEVConstant *AddLHS = dyn_cast<SCEVConstant>(Add->getOperand(0));
7291   if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue()))
7292     return nullptr;
7293
7294   const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
7295   if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr;
7296
7297   const SCEVConstant *MulLHS = dyn_cast<SCEVConstant>(AddRHS->getOperand(0));
7298   if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue()))
7299     return nullptr;
7300
7301   return AddRHS->getOperand(1);
7302 }
7303
7304
7305 /// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
7306 template<typename MaxExprType>
7307 static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
7308                               const SCEV *Candidate) {
7309   const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
7310   if (!MaxExpr) return false;
7311
7312   auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate);
7313   return It != MaxExpr->op_end();
7314 }
7315
7316
7317 /// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
7318 template<typename MaxExprType>
7319 static bool IsMinConsistingOf(ScalarEvolution &SE,
7320                               const SCEV *MaybeMinExpr,
7321                               const SCEV *Candidate) {
7322   const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
7323   if (!MaybeMaxExpr)
7324     return false;
7325
7326   return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
7327 }
7328
7329 static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
7330                                            ICmpInst::Predicate Pred,
7331                                            const SCEV *LHS, const SCEV *RHS) {
7332
7333   // If both sides are affine addrecs for the same loop, with equal
7334   // steps, and we know the recurrences don't wrap, then we only
7335   // need to check the predicate on the starting values.
7336
7337   if (!ICmpInst::isRelational(Pred))
7338     return false;
7339
7340   const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
7341   if (!LAR)
7342     return false;
7343   const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
7344   if (!RAR)
7345     return false;
7346   if (LAR->getLoop() != RAR->getLoop())
7347     return false;
7348   if (!LAR->isAffine() || !RAR->isAffine())
7349     return false;
7350
7351   if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
7352     return false;
7353
7354   SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ?
7355                          SCEV::FlagNSW : SCEV::FlagNUW;
7356   if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW))
7357     return false;
7358
7359   return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
7360 }
7361
7362 /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
7363 /// expression?
7364 static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
7365                                         ICmpInst::Predicate Pred,
7366                                         const SCEV *LHS, const SCEV *RHS) {
7367   switch (Pred) {
7368   default:
7369     return false;
7370
7371   case ICmpInst::ICMP_SGE:
7372     std::swap(LHS, RHS);
7373     // fall through
7374   case ICmpInst::ICMP_SLE:
7375     return
7376       // min(A, ...) <= A
7377       IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
7378       // A <= max(A, ...)
7379       IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
7380
7381   case ICmpInst::ICMP_UGE:
7382     std::swap(LHS, RHS);
7383     // fall through
7384   case ICmpInst::ICMP_ULE:
7385     return
7386       // min(A, ...) <= A
7387       IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
7388       // A <= max(A, ...)
7389       IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
7390   }
7391
7392   llvm_unreachable("covered switch fell through?!");
7393 }
7394
7395 /// isImpliedCondOperandsHelper - Test whether the condition described by
7396 /// Pred, LHS, and RHS is true whenever the condition described by Pred,
7397 /// FoundLHS, and FoundRHS is true.
7398 bool
7399 ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
7400                                              const SCEV *LHS, const SCEV *RHS,
7401                                              const SCEV *FoundLHS,
7402                                              const SCEV *FoundRHS) {
7403   auto IsKnownPredicateFull =
7404       [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
7405     return isKnownPredicateWithRanges(Pred, LHS, RHS) ||
7406         IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
7407         IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS);
7408   };
7409
7410   switch (Pred) {
7411   default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
7412   case ICmpInst::ICMP_EQ:
7413   case ICmpInst::ICMP_NE:
7414     if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
7415       return true;
7416     break;
7417   case ICmpInst::ICMP_SLT:
7418   case ICmpInst::ICMP_SLE:
7419     if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
7420         IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS))
7421       return true;
7422     break;
7423   case ICmpInst::ICMP_SGT:
7424   case ICmpInst::ICMP_SGE:
7425     if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
7426         IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS))
7427       return true;
7428     break;
7429   case ICmpInst::ICMP_ULT:
7430   case ICmpInst::ICMP_ULE:
7431     if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
7432         IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS))
7433       return true;
7434     break;
7435   case ICmpInst::ICMP_UGT:
7436   case ICmpInst::ICMP_UGE:
7437     if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
7438         IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS))
7439       return true;
7440     break;
7441   }
7442
7443   return false;
7444 }
7445
7446 /// isImpliedCondOperandsViaRanges - helper function for isImpliedCondOperands.
7447 /// Tries to get cases like "X `sgt` 0 => X - 1 `sgt` -1".
7448 bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
7449                                                      const SCEV *LHS,
7450                                                      const SCEV *RHS,
7451                                                      const SCEV *FoundLHS,
7452                                                      const SCEV *FoundRHS) {
7453   if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
7454     // The restriction on `FoundRHS` be lifted easily -- it exists only to
7455     // reduce the compile time impact of this optimization.
7456     return false;
7457
7458   const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS);
7459   if (!AddLHS || AddLHS->getOperand(1) != FoundLHS ||
7460       !isa<SCEVConstant>(AddLHS->getOperand(0)))
7461     return false;
7462
7463   APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue();
7464
7465   // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
7466   // antecedent "`FoundLHS` `Pred` `FoundRHS`".
7467   ConstantRange FoundLHSRange =
7468       ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
7469
7470   // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
7471   // for `LHS`:
7472   APInt Addend =
7473       cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue();
7474   ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
7475
7476   // We can also compute the range of values for `LHS` that satisfy the
7477   // consequent, "`LHS` `Pred` `RHS`":
7478   APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue();
7479   ConstantRange SatisfyingLHSRange =
7480       ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
7481
7482   // The antecedent implies the consequent if every value of `LHS` that
7483   // satisfies the antecedent also satisfies the consequent.
7484   return SatisfyingLHSRange.contains(LHSRange);
7485 }
7486
7487 // Verify if an linear IV with positive stride can overflow when in a
7488 // less-than comparison, knowing the invariant term of the comparison, the
7489 // stride and the knowledge of NSW/NUW flags on the recurrence.
7490 bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
7491                                          bool IsSigned, bool NoWrap) {
7492   if (NoWrap) return false;
7493
7494   unsigned BitWidth = getTypeSizeInBits(RHS->getType());
7495   const SCEV *One = getConstant(Stride->getType(), 1);
7496
7497   if (IsSigned) {
7498     APInt MaxRHS = getSignedRange(RHS).getSignedMax();
7499     APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
7500     APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
7501                                 .getSignedMax();
7502
7503     // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
7504     return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
7505   }
7506
7507   APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
7508   APInt MaxValue = APInt::getMaxValue(BitWidth);
7509   APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
7510                               .getUnsignedMax();
7511
7512   // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
7513   return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
7514 }
7515
7516 // Verify if an linear IV with negative stride can overflow when in a
7517 // greater-than comparison, knowing the invariant term of the comparison,
7518 // the stride and the knowledge of NSW/NUW flags on the recurrence.
7519 bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
7520                                          bool IsSigned, bool NoWrap) {
7521   if (NoWrap) return false;
7522
7523   unsigned BitWidth = getTypeSizeInBits(RHS->getType());
7524   const SCEV *One = getConstant(Stride->getType(), 1);
7525
7526   if (IsSigned) {
7527     APInt MinRHS = getSignedRange(RHS).getSignedMin();
7528     APInt MinValue = APInt::getSignedMinValue(BitWidth);
7529     APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
7530                                .getSignedMax();
7531
7532     // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
7533     return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
7534   }
7535
7536   APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
7537   APInt MinValue = APInt::getMinValue(BitWidth);
7538   APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
7539                             .getUnsignedMax();
7540
7541   // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
7542   return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
7543 }
7544
7545 // Compute the backedge taken count knowing the interval difference, the
7546 // stride and presence of the equality in the comparison.
7547 const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
7548                                             bool Equality) {
7549   const SCEV *One = getConstant(Step->getType(), 1);
7550   Delta = Equality ? getAddExpr(Delta, Step)
7551                    : getAddExpr(Delta, getMinusSCEV(Step, One));
7552   return getUDivExpr(Delta, Step);
7553 }
7554
7555 /// HowManyLessThans - Return the number of times a backedge containing the
7556 /// specified less-than comparison will execute.  If not computable, return
7557 /// CouldNotCompute.
7558 ///
7559 /// @param ControlsExit is true when the LHS < RHS condition directly controls
7560 /// the branch (loops exits only if condition is true). In this case, we can use
7561 /// NoWrapFlags to skip overflow checks.
7562 ScalarEvolution::ExitLimit
7563 ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
7564                                   const Loop *L, bool IsSigned,
7565                                   bool ControlsExit) {
7566   // We handle only IV < Invariant
7567   if (!isLoopInvariant(RHS, L))
7568     return getCouldNotCompute();
7569
7570   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
7571
7572   // Avoid weird loops
7573   if (!IV || IV->getLoop() != L || !IV->isAffine())
7574     return getCouldNotCompute();
7575
7576   bool NoWrap = ControlsExit &&
7577                 IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
7578
7579   const SCEV *Stride = IV->getStepRecurrence(*this);
7580
7581   // Avoid negative or zero stride values
7582   if (!isKnownPositive(Stride))
7583     return getCouldNotCompute();
7584
7585   // Avoid proven overflow cases: this will ensure that the backedge taken count
7586   // will not generate any unsigned overflow. Relaxed no-overflow conditions
7587   // exploit NoWrapFlags, allowing to optimize in presence of undefined
7588   // behaviors like the case of C language.
7589   if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
7590     return getCouldNotCompute();
7591
7592   ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
7593                                       : ICmpInst::ICMP_ULT;
7594   const SCEV *Start = IV->getStart();
7595   const SCEV *End = RHS;
7596   if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) {
7597     const SCEV *Diff = getMinusSCEV(RHS, Start);
7598     // If we have NoWrap set, then we can assume that the increment won't
7599     // overflow, in which case if RHS - Start is a constant, we don't need to
7600     // do a max operation since we can just figure it out statically
7601     if (NoWrap && isa<SCEVConstant>(Diff)) {
7602       APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
7603       if (D.isNegative())
7604         End = Start;
7605     } else
7606       End = IsSigned ? getSMaxExpr(RHS, Start)
7607                      : getUMaxExpr(RHS, Start);
7608   }
7609
7610   const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
7611
7612   APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
7613                             : getUnsignedRange(Start).getUnsignedMin();
7614
7615   APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
7616                              : getUnsignedRange(Stride).getUnsignedMin();
7617
7618   unsigned BitWidth = getTypeSizeInBits(LHS->getType());
7619   APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
7620                          : APInt::getMaxValue(BitWidth) - (MinStride - 1);
7621
7622   // Although End can be a MAX expression we estimate MaxEnd considering only
7623   // the case End = RHS. This is safe because in the other case (End - Start)
7624   // is zero, leading to a zero maximum backedge taken count.
7625   APInt MaxEnd =
7626     IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
7627              : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
7628
7629   const SCEV *MaxBECount;
7630   if (isa<SCEVConstant>(BECount))
7631     MaxBECount = BECount;
7632   else
7633     MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
7634                                 getConstant(MinStride), false);
7635
7636   if (isa<SCEVCouldNotCompute>(MaxBECount))
7637     MaxBECount = BECount;
7638
7639   return ExitLimit(BECount, MaxBECount);
7640 }
7641
7642 ScalarEvolution::ExitLimit
7643 ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
7644                                      const Loop *L, bool IsSigned,
7645                                      bool ControlsExit) {
7646   // We handle only IV > Invariant
7647   if (!isLoopInvariant(RHS, L))
7648     return getCouldNotCompute();
7649
7650   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
7651
7652   // Avoid weird loops
7653   if (!IV || IV->getLoop() != L || !IV->isAffine())
7654     return getCouldNotCompute();
7655
7656   bool NoWrap = ControlsExit &&
7657                 IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
7658
7659   const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
7660
7661   // Avoid negative or zero stride values
7662   if (!isKnownPositive(Stride))
7663     return getCouldNotCompute();
7664
7665   // Avoid proven overflow cases: this will ensure that the backedge taken count
7666   // will not generate any unsigned overflow. Relaxed no-overflow conditions
7667   // exploit NoWrapFlags, allowing to optimize in presence of undefined
7668   // behaviors like the case of C language.
7669   if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
7670     return getCouldNotCompute();
7671
7672   ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
7673                                       : ICmpInst::ICMP_UGT;
7674
7675   const SCEV *Start = IV->getStart();
7676   const SCEV *End = RHS;
7677   if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) {
7678     const SCEV *Diff = getMinusSCEV(RHS, Start);
7679     // If we have NoWrap set, then we can assume that the increment won't
7680     // overflow, in which case if RHS - Start is a constant, we don't need to
7681     // do a max operation since we can just figure it out statically
7682     if (NoWrap && isa<SCEVConstant>(Diff)) {
7683       APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
7684       if (!D.isNegative())
7685         End = Start;
7686     } else
7687       End = IsSigned ? getSMinExpr(RHS, Start)
7688                      : getUMinExpr(RHS, Start);
7689   }
7690
7691   const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
7692
7693   APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
7694                             : getUnsignedRange(Start).getUnsignedMax();
7695
7696   APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
7697                              : getUnsignedRange(Stride).getUnsignedMin();
7698
7699   unsigned BitWidth = getTypeSizeInBits(LHS->getType());
7700   APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
7701                          : APInt::getMinValue(BitWidth) + (MinStride - 1);
7702
7703   // Although End can be a MIN expression we estimate MinEnd considering only
7704   // the case End = RHS. This is safe because in the other case (Start - End)
7705   // is zero, leading to a zero maximum backedge taken count.
7706   APInt MinEnd =
7707     IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
7708              : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
7709
7710
7711   const SCEV *MaxBECount = getCouldNotCompute();
7712   if (isa<SCEVConstant>(BECount))
7713     MaxBECount = BECount;
7714   else
7715     MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
7716                                 getConstant(MinStride), false);
7717
7718   if (isa<SCEVCouldNotCompute>(MaxBECount))
7719     MaxBECount = BECount;
7720
7721   return ExitLimit(BECount, MaxBECount);
7722 }
7723
7724 /// getNumIterationsInRange - Return the number of iterations of this loop that
7725 /// produce values in the specified constant range.  Another way of looking at
7726 /// this is that it returns the first iteration number where the value is not in
7727 /// the condition, thus computing the exit count. If the iteration count can't
7728 /// be computed, an instance of SCEVCouldNotCompute is returned.
7729 const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
7730                                                     ScalarEvolution &SE) const {
7731   if (Range.isFullSet())  // Infinite loop.
7732     return SE.getCouldNotCompute();
7733
7734   // If the start is a non-zero constant, shift the range to simplify things.
7735   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
7736     if (!SC->getValue()->isZero()) {
7737       SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
7738       Operands[0] = SE.getConstant(SC->getType(), 0);
7739       const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
7740                                              getNoWrapFlags(FlagNW));
7741       if (const SCEVAddRecExpr *ShiftedAddRec =
7742             dyn_cast<SCEVAddRecExpr>(Shifted))
7743         return ShiftedAddRec->getNumIterationsInRange(
7744                            Range.subtract(SC->getValue()->getValue()), SE);
7745       // This is strange and shouldn't happen.
7746       return SE.getCouldNotCompute();
7747     }
7748
7749   // The only time we can solve this is when we have all constant indices.
7750   // Otherwise, we cannot determine the overflow conditions.
7751   for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
7752     if (!isa<SCEVConstant>(getOperand(i)))
7753       return SE.getCouldNotCompute();
7754
7755
7756   // Okay at this point we know that all elements of the chrec are constants and
7757   // that the start element is zero.
7758
7759   // First check to see if the range contains zero.  If not, the first
7760   // iteration exits.
7761   unsigned BitWidth = SE.getTypeSizeInBits(getType());
7762   if (!Range.contains(APInt(BitWidth, 0)))
7763     return SE.getConstant(getType(), 0);
7764
7765   if (isAffine()) {
7766     // If this is an affine expression then we have this situation:
7767     //   Solve {0,+,A} in Range  ===  Ax in Range
7768
7769     // We know that zero is in the range.  If A is positive then we know that
7770     // the upper value of the range must be the first possible exit value.
7771     // If A is negative then the lower of the range is the last possible loop
7772     // value.  Also note that we already checked for a full range.
7773     APInt One(BitWidth,1);
7774     APInt A     = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
7775     APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
7776
7777     // The exit value should be (End+A)/A.
7778     APInt ExitVal = (End + A).udiv(A);
7779     ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
7780
7781     // Evaluate at the exit value.  If we really did fall out of the valid
7782     // range, then we computed our trip count, otherwise wrap around or other
7783     // things must have happened.
7784     ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
7785     if (Range.contains(Val->getValue()))
7786       return SE.getCouldNotCompute();  // Something strange happened
7787
7788     // Ensure that the previous value is in the range.  This is a sanity check.
7789     assert(Range.contains(
7790            EvaluateConstantChrecAtConstant(this,
7791            ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
7792            "Linear scev computation is off in a bad way!");
7793     return SE.getConstant(ExitValue);
7794   } else if (isQuadratic()) {
7795     // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
7796     // quadratic equation to solve it.  To do this, we must frame our problem in
7797     // terms of figuring out when zero is crossed, instead of when
7798     // Range.getUpper() is crossed.
7799     SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
7800     NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
7801     const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
7802                                              // getNoWrapFlags(FlagNW)
7803                                              FlagAnyWrap);
7804
7805     // Next, solve the constructed addrec
7806     std::pair<const SCEV *,const SCEV *> Roots =
7807       SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
7808     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
7809     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
7810     if (R1) {
7811       // Pick the smallest positive root value.
7812       if (ConstantInt *CB =
7813           dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
7814                          R1->getValue(), R2->getValue()))) {
7815         if (!CB->getZExtValue())
7816           std::swap(R1, R2);   // R1 is the minimum root now.
7817
7818         // Make sure the root is not off by one.  The returned iteration should
7819         // not be in the range, but the previous one should be.  When solving
7820         // for "X*X < 5", for example, we should not return a root of 2.
7821         ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
7822                                                              R1->getValue(),
7823                                                              SE);
7824         if (Range.contains(R1Val->getValue())) {
7825           // The next iteration must be out of the range...
7826           ConstantInt *NextVal =
7827                 ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
7828
7829           R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
7830           if (!Range.contains(R1Val->getValue()))
7831             return SE.getConstant(NextVal);
7832           return SE.getCouldNotCompute();  // Something strange happened
7833         }
7834
7835         // If R1 was not in the range, then it is a good return value.  Make
7836         // sure that R1-1 WAS in the range though, just in case.
7837         ConstantInt *NextVal =
7838                ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
7839         R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
7840         if (Range.contains(R1Val->getValue()))
7841           return R1;
7842         return SE.getCouldNotCompute();  // Something strange happened
7843       }
7844     }
7845   }
7846
7847   return SE.getCouldNotCompute();
7848 }
7849
7850 namespace {
7851 struct FindUndefs {
7852   bool Found;
7853   FindUndefs() : Found(false) {}
7854
7855   bool follow(const SCEV *S) {
7856     if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) {
7857       if (isa<UndefValue>(C->getValue()))
7858         Found = true;
7859     } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
7860       if (isa<UndefValue>(C->getValue()))
7861         Found = true;
7862     }
7863
7864     // Keep looking if we haven't found it yet.
7865     return !Found;
7866   }
7867   bool isDone() const {
7868     // Stop recursion if we have found an undef.
7869     return Found;
7870   }
7871 };
7872 }
7873
7874 // Return true when S contains at least an undef value.
7875 static inline bool
7876 containsUndefs(const SCEV *S) {
7877   FindUndefs F;
7878   SCEVTraversal<FindUndefs> ST(F);
7879   ST.visitAll(S);
7880
7881   return F.Found;
7882 }
7883
7884 namespace {
7885 // Collect all steps of SCEV expressions.
7886 struct SCEVCollectStrides {
7887   ScalarEvolution &SE;
7888   SmallVectorImpl<const SCEV *> &Strides;
7889
7890   SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
7891       : SE(SE), Strides(S) {}
7892
7893   bool follow(const SCEV *S) {
7894     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
7895       Strides.push_back(AR->getStepRecurrence(SE));
7896     return true;
7897   }
7898   bool isDone() const { return false; }
7899 };
7900
7901 // Collect all SCEVUnknown and SCEVMulExpr expressions.
7902 struct SCEVCollectTerms {
7903   SmallVectorImpl<const SCEV *> &Terms;
7904
7905   SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T)
7906       : Terms(T) {}
7907
7908   bool follow(const SCEV *S) {
7909     if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) {
7910       if (!containsUndefs(S))
7911         Terms.push_back(S);
7912
7913       // Stop recursion: once we collected a term, do not walk its operands.
7914       return false;
7915     }
7916
7917     // Keep looking.
7918     return true;
7919   }
7920   bool isDone() const { return false; }
7921 };
7922 }
7923
7924 /// Find parametric terms in this SCEVAddRecExpr.
7925 void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
7926     SmallVectorImpl<const SCEV *> &Terms) {
7927   SmallVector<const SCEV *, 4> Strides;
7928   SCEVCollectStrides StrideCollector(*this, Strides);
7929   visitAll(Expr, StrideCollector);
7930
7931   DEBUG({
7932       dbgs() << "Strides:\n";
7933       for (const SCEV *S : Strides)
7934         dbgs() << *S << "\n";
7935     });
7936
7937   for (const SCEV *S : Strides) {
7938     SCEVCollectTerms TermCollector(Terms);
7939     visitAll(S, TermCollector);
7940   }
7941
7942   DEBUG({
7943       dbgs() << "Terms:\n";
7944       for (const SCEV *T : Terms)
7945         dbgs() << *T << "\n";
7946     });
7947 }
7948
7949 static bool findArrayDimensionsRec(ScalarEvolution &SE,
7950                                    SmallVectorImpl<const SCEV *> &Terms,
7951                                    SmallVectorImpl<const SCEV *> &Sizes) {
7952   int Last = Terms.size() - 1;
7953   const SCEV *Step = Terms[Last];
7954
7955   // End of recursion.
7956   if (Last == 0) {
7957     if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
7958       SmallVector<const SCEV *, 2> Qs;
7959       for (const SCEV *Op : M->operands())
7960         if (!isa<SCEVConstant>(Op))
7961           Qs.push_back(Op);
7962
7963       Step = SE.getMulExpr(Qs);
7964     }
7965
7966     Sizes.push_back(Step);
7967     return true;
7968   }
7969
7970   for (const SCEV *&Term : Terms) {
7971     // Normalize the terms before the next call to findArrayDimensionsRec.
7972     const SCEV *Q, *R;
7973     SCEVDivision::divide(SE, Term, Step, &Q, &R);
7974
7975     // Bail out when GCD does not evenly divide one of the terms.
7976     if (!R->isZero())
7977       return false;
7978
7979     Term = Q;
7980   }
7981
7982   // Remove all SCEVConstants.
7983   Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) {
7984                 return isa<SCEVConstant>(E);
7985               }),
7986               Terms.end());
7987
7988   if (Terms.size() > 0)
7989     if (!findArrayDimensionsRec(SE, Terms, Sizes))
7990       return false;
7991
7992   Sizes.push_back(Step);
7993   return true;
7994 }
7995
7996 namespace {
7997 struct FindParameter {
7998   bool FoundParameter;
7999   FindParameter() : FoundParameter(false) {}
8000
8001   bool follow(const SCEV *S) {
8002     if (isa<SCEVUnknown>(S)) {
8003       FoundParameter = true;
8004       // Stop recursion: we found a parameter.
8005       return false;
8006     }
8007     // Keep looking.
8008     return true;
8009   }
8010   bool isDone() const {
8011     // Stop recursion if we have found a parameter.
8012     return FoundParameter;
8013   }
8014 };
8015 }
8016
8017 // Returns true when S contains at least a SCEVUnknown parameter.
8018 static inline bool
8019 containsParameters(const SCEV *S) {
8020   FindParameter F;
8021   SCEVTraversal<FindParameter> ST(F);
8022   ST.visitAll(S);
8023
8024   return F.FoundParameter;
8025 }
8026
8027 // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
8028 static inline bool
8029 containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
8030   for (const SCEV *T : Terms)
8031     if (containsParameters(T))
8032       return true;
8033   return false;
8034 }
8035
8036 // Return the number of product terms in S.
8037 static inline int numberOfTerms(const SCEV *S) {
8038   if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
8039     return Expr->getNumOperands();
8040   return 1;
8041 }
8042
8043 static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
8044   if (isa<SCEVConstant>(T))
8045     return nullptr;
8046
8047   if (isa<SCEVUnknown>(T))
8048     return T;
8049
8050   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
8051     SmallVector<const SCEV *, 2> Factors;
8052     for (const SCEV *Op : M->operands())
8053       if (!isa<SCEVConstant>(Op))
8054         Factors.push_back(Op);
8055
8056     return SE.getMulExpr(Factors);
8057   }
8058
8059   return T;
8060 }
8061
8062 /// Return the size of an element read or written by Inst.
8063 const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
8064   Type *Ty;
8065   if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
8066     Ty = Store->getValueOperand()->getType();
8067   else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
8068     Ty = Load->getType();
8069   else
8070     return nullptr;
8071
8072   Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
8073   return getSizeOfExpr(ETy, Ty);
8074 }
8075
8076 /// Second step of delinearization: compute the array dimensions Sizes from the
8077 /// set of Terms extracted from the memory access function of this SCEVAddRec.
8078 void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
8079                                           SmallVectorImpl<const SCEV *> &Sizes,
8080                                           const SCEV *ElementSize) const {
8081
8082   if (Terms.size() < 1 || !ElementSize)
8083     return;
8084
8085   // Early return when Terms do not contain parameters: we do not delinearize
8086   // non parametric SCEVs.
8087   if (!containsParameters(Terms))
8088     return;
8089
8090   DEBUG({
8091       dbgs() << "Terms:\n";
8092       for (const SCEV *T : Terms)
8093         dbgs() << *T << "\n";
8094     });
8095
8096   // Remove duplicates.
8097   std::sort(Terms.begin(), Terms.end());
8098   Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
8099
8100   // Put larger terms first.
8101   std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
8102     return numberOfTerms(LHS) > numberOfTerms(RHS);
8103   });
8104
8105   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
8106
8107   // Divide all terms by the element size.
8108   for (const SCEV *&Term : Terms) {
8109     const SCEV *Q, *R;
8110     SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
8111     Term = Q;
8112   }
8113
8114   SmallVector<const SCEV *, 4> NewTerms;
8115
8116   // Remove constant factors.
8117   for (const SCEV *T : Terms)
8118     if (const SCEV *NewT = removeConstantFactors(SE, T))
8119       NewTerms.push_back(NewT);
8120
8121   DEBUG({
8122       dbgs() << "Terms after sorting:\n";
8123       for (const SCEV *T : NewTerms)
8124         dbgs() << *T << "\n";
8125     });
8126
8127   if (NewTerms.empty() ||
8128       !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
8129     Sizes.clear();
8130     return;
8131   }
8132
8133   // The last element to be pushed into Sizes is the size of an element.
8134   Sizes.push_back(ElementSize);
8135
8136   DEBUG({
8137       dbgs() << "Sizes:\n";
8138       for (const SCEV *S : Sizes)
8139         dbgs() << *S << "\n";
8140     });
8141 }
8142
8143 /// Third step of delinearization: compute the access functions for the
8144 /// Subscripts based on the dimensions in Sizes.
8145 void ScalarEvolution::computeAccessFunctions(
8146     const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
8147     SmallVectorImpl<const SCEV *> &Sizes) {
8148
8149   // Early exit in case this SCEV is not an affine multivariate function.
8150   if (Sizes.empty())
8151     return;
8152
8153   if (auto AR = dyn_cast<SCEVAddRecExpr>(Expr))
8154     if (!AR->isAffine())
8155       return;
8156
8157   const SCEV *Res = Expr;
8158   int Last = Sizes.size() - 1;
8159   for (int i = Last; i >= 0; i--) {
8160     const SCEV *Q, *R;
8161     SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);
8162
8163     DEBUG({
8164         dbgs() << "Res: " << *Res << "\n";
8165         dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
8166         dbgs() << "Res divided by Sizes[i]:\n";
8167         dbgs() << "Quotient: " << *Q << "\n";
8168         dbgs() << "Remainder: " << *R << "\n";
8169       });
8170
8171     Res = Q;
8172
8173     // Do not record the last subscript corresponding to the size of elements in
8174     // the array.
8175     if (i == Last) {
8176
8177       // Bail out if the remainder is too complex.
8178       if (isa<SCEVAddRecExpr>(R)) {
8179         Subscripts.clear();
8180         Sizes.clear();
8181         return;
8182       }
8183
8184       continue;
8185     }
8186
8187     // Record the access function for the current subscript.
8188     Subscripts.push_back(R);
8189   }
8190
8191   // Also push in last position the remainder of the last division: it will be
8192   // the access function of the innermost dimension.
8193   Subscripts.push_back(Res);
8194
8195   std::reverse(Subscripts.begin(), Subscripts.end());
8196
8197   DEBUG({
8198       dbgs() << "Subscripts:\n";
8199       for (const SCEV *S : Subscripts)
8200         dbgs() << *S << "\n";
8201     });
8202 }
8203
8204 /// Splits the SCEV into two vectors of SCEVs representing the subscripts and
8205 /// sizes of an array access. Returns the remainder of the delinearization that
8206 /// is the offset start of the array.  The SCEV->delinearize algorithm computes
8207 /// the multiples of SCEV coefficients: that is a pattern matching of sub
8208 /// expressions in the stride and base of a SCEV corresponding to the
8209 /// computation of a GCD (greatest common divisor) of base and stride.  When
8210 /// SCEV->delinearize fails, it returns the SCEV unchanged.
8211 ///
8212 /// For example: when analyzing the memory access A[i][j][k] in this loop nest
8213 ///
8214 ///  void foo(long n, long m, long o, double A[n][m][o]) {
8215 ///
8216 ///    for (long i = 0; i < n; i++)
8217 ///      for (long j = 0; j < m; j++)
8218 ///        for (long k = 0; k < o; k++)
8219 ///          A[i][j][k] = 1.0;
8220 ///  }
8221 ///
8222 /// the delinearization input is the following AddRec SCEV:
8223 ///
8224 ///  AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
8225 ///
8226 /// From this SCEV, we are able to say that the base offset of the access is %A
8227 /// because it appears as an offset that does not divide any of the strides in
8228 /// the loops:
8229 ///
8230 ///  CHECK: Base offset: %A
8231 ///
8232 /// and then SCEV->delinearize determines the size of some of the dimensions of
8233 /// the array as these are the multiples by which the strides are happening:
8234 ///
8235 ///  CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
8236 ///
8237 /// Note that the outermost dimension remains of UnknownSize because there are
8238 /// no strides that would help identifying the size of the last dimension: when
8239 /// the array has been statically allocated, one could compute the size of that
8240 /// dimension by dividing the overall size of the array by the size of the known
8241 /// dimensions: %m * %o * 8.
8242 ///
8243 /// Finally delinearize provides the access functions for the array reference
8244 /// that does correspond to A[i][j][k] of the above C testcase:
8245 ///
8246 ///  CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
8247 ///
8248 /// The testcases are checking the output of a function pass:
8249 /// DelinearizationPass that walks through all loads and stores of a function
8250 /// asking for the SCEV of the memory access with respect to all enclosing
8251 /// loops, calling SCEV->delinearize on that and printing the results.
8252
8253 void ScalarEvolution::delinearize(const SCEV *Expr,
8254                                  SmallVectorImpl<const SCEV *> &Subscripts,
8255                                  SmallVectorImpl<const SCEV *> &Sizes,
8256                                  const SCEV *ElementSize) {
8257   // First step: collect parametric terms.
8258   SmallVector<const SCEV *, 4> Terms;
8259   collectParametricTerms(Expr, Terms);
8260
8261   if (Terms.empty())
8262     return;
8263
8264   // Second step: find subscript sizes.
8265   findArrayDimensions(Terms, Sizes, ElementSize);
8266
8267   if (Sizes.empty())
8268     return;
8269
8270   // Third step: compute the access functions for each subscript.
8271   computeAccessFunctions(Expr, Subscripts, Sizes);
8272
8273   if (Subscripts.empty())
8274     return;
8275
8276   DEBUG({
8277       dbgs() << "succeeded to delinearize " << *Expr << "\n";
8278       dbgs() << "ArrayDecl[UnknownSize]";
8279       for (const SCEV *S : Sizes)
8280         dbgs() << "[" << *S << "]";
8281
8282       dbgs() << "\nArrayRef";
8283       for (const SCEV *S : Subscripts)
8284         dbgs() << "[" << *S << "]";
8285       dbgs() << "\n";
8286     });
8287 }
8288
8289 //===----------------------------------------------------------------------===//
8290 //                   SCEVCallbackVH Class Implementation
8291 //===----------------------------------------------------------------------===//
8292
8293 void ScalarEvolution::SCEVCallbackVH::deleted() {
8294   assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
8295   if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
8296     SE->ConstantEvolutionLoopExitValue.erase(PN);
8297   SE->ValueExprMap.erase(getValPtr());
8298   // this now dangles!
8299 }
8300
8301 void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
8302   assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
8303
8304   // Forget all the expressions associated with users of the old value,
8305   // so that future queries will recompute the expressions using the new
8306   // value.
8307   Value *Old = getValPtr();
8308   SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end());
8309   SmallPtrSet<User *, 8> Visited;
8310   while (!Worklist.empty()) {
8311     User *U = Worklist.pop_back_val();
8312     // Deleting the Old value will cause this to dangle. Postpone
8313     // that until everything else is done.
8314     if (U == Old)
8315       continue;
8316     if (!Visited.insert(U).second)
8317       continue;
8318     if (PHINode *PN = dyn_cast<PHINode>(U))
8319       SE->ConstantEvolutionLoopExitValue.erase(PN);
8320     SE->ValueExprMap.erase(U);
8321     Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
8322   }
8323   // Delete the Old value.
8324   if (PHINode *PN = dyn_cast<PHINode>(Old))
8325     SE->ConstantEvolutionLoopExitValue.erase(PN);
8326   SE->ValueExprMap.erase(Old);
8327   // this now dangles!
8328 }
8329
8330 ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
8331   : CallbackVH(V), SE(se) {}
8332
8333 //===----------------------------------------------------------------------===//
8334 //                   ScalarEvolution Class Implementation
8335 //===----------------------------------------------------------------------===//
8336
8337 ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
8338                                  AssumptionCache &AC, DominatorTree &DT,
8339                                  LoopInfo &LI)
8340     : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
8341       CouldNotCompute(new SCEVCouldNotCompute()),
8342       WalkingBEDominatingConds(false), ValuesAtScopes(64), LoopDispositions(64),
8343       BlockDispositions(64), FirstUnknown(nullptr) {}
8344
8345 ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
8346     : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI),
8347       CouldNotCompute(std::move(Arg.CouldNotCompute)),
8348       ValueExprMap(std::move(Arg.ValueExprMap)),
8349       WalkingBEDominatingConds(false),
8350       BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
8351       ConstantEvolutionLoopExitValue(
8352           std::move(Arg.ConstantEvolutionLoopExitValue)),
8353       ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
8354       LoopDispositions(std::move(Arg.LoopDispositions)),
8355       BlockDispositions(std::move(Arg.BlockDispositions)),
8356       UnsignedRanges(std::move(Arg.UnsignedRanges)),
8357       SignedRanges(std::move(Arg.SignedRanges)),
8358       UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
8359       SCEVAllocator(std::move(Arg.SCEVAllocator)),
8360       FirstUnknown(Arg.FirstUnknown) {
8361   Arg.FirstUnknown = nullptr;
8362 }
8363
8364 ScalarEvolution::~ScalarEvolution() {
8365   // Iterate through all the SCEVUnknown instances and call their
8366   // destructors, so that they release their references to their values.
8367   for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
8368     U->~SCEVUnknown();
8369   FirstUnknown = nullptr;
8370
8371   ValueExprMap.clear();
8372
8373   // Free any extra memory created for ExitNotTakenInfo in the unlikely event
8374   // that a loop had multiple computable exits.
8375   for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
8376          BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
8377        I != E; ++I) {
8378     I->second.clear();
8379   }
8380
8381   assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
8382   assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
8383 }
8384
8385 bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
8386   return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
8387 }
8388
8389 static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
8390                           const Loop *L) {
8391   // Print all inner loops first
8392   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
8393     PrintLoopInfo(OS, SE, *I);
8394
8395   OS << "Loop ";
8396   L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
8397   OS << ": ";
8398
8399   SmallVector<BasicBlock *, 8> ExitBlocks;
8400   L->getExitBlocks(ExitBlocks);
8401   if (ExitBlocks.size() != 1)
8402     OS << "<multiple exits> ";
8403
8404   if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
8405     OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
8406   } else {
8407     OS << "Unpredictable backedge-taken count. ";
8408   }
8409
8410   OS << "\n"
8411         "Loop ";
8412   L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
8413   OS << ": ";
8414
8415   if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
8416     OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
8417   } else {
8418     OS << "Unpredictable max backedge-taken count. ";
8419   }
8420
8421   OS << "\n";
8422 }
8423
8424 void ScalarEvolution::print(raw_ostream &OS) const {
8425   // ScalarEvolution's implementation of the print method is to print
8426   // out SCEV values of all instructions that are interesting. Doing
8427   // this potentially causes it to create new SCEV objects though,
8428   // which technically conflicts with the const qualifier. This isn't
8429   // observable from outside the class though, so casting away the
8430   // const isn't dangerous.
8431   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
8432
8433   OS << "Classifying expressions for: ";
8434   F.printAsOperand(OS, /*PrintType=*/false);
8435   OS << "\n";
8436   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
8437     if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
8438       OS << *I << '\n';
8439       OS << "  -->  ";
8440       const SCEV *SV = SE.getSCEV(&*I);
8441       SV->print(OS);
8442       if (!isa<SCEVCouldNotCompute>(SV)) {
8443         OS << " U: ";
8444         SE.getUnsignedRange(SV).print(OS);
8445         OS << " S: ";
8446         SE.getSignedRange(SV).print(OS);
8447       }
8448
8449       const Loop *L = LI.getLoopFor((*I).getParent());
8450
8451       const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
8452       if (AtUse != SV) {
8453         OS << "  -->  ";
8454         AtUse->print(OS);
8455         if (!isa<SCEVCouldNotCompute>(AtUse)) {
8456           OS << " U: ";
8457           SE.getUnsignedRange(AtUse).print(OS);
8458           OS << " S: ";
8459           SE.getSignedRange(AtUse).print(OS);
8460         }
8461       }
8462
8463       if (L) {
8464         OS << "\t\t" "Exits: ";
8465         const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
8466         if (!SE.isLoopInvariant(ExitValue, L)) {
8467           OS << "<<Unknown>>";
8468         } else {
8469           OS << *ExitValue;
8470         }
8471       }
8472
8473       OS << "\n";
8474     }
8475
8476   OS << "Determining loop execution counts for: ";
8477   F.printAsOperand(OS, /*PrintType=*/false);
8478   OS << "\n";
8479   for (LoopInfo::iterator I = LI.begin(), E = LI.end(); I != E; ++I)
8480     PrintLoopInfo(OS, &SE, *I);
8481 }
8482
8483 ScalarEvolution::LoopDisposition
8484 ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
8485   auto &Values = LoopDispositions[S];
8486   for (auto &V : Values) {
8487     if (V.getPointer() == L)
8488       return V.getInt();
8489   }
8490   Values.emplace_back(L, LoopVariant);
8491   LoopDisposition D = computeLoopDisposition(S, L);
8492   auto &Values2 = LoopDispositions[S];
8493   for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
8494     if (V.getPointer() == L) {
8495       V.setInt(D);
8496       break;
8497     }
8498   }
8499   return D;
8500 }
8501
8502 ScalarEvolution::LoopDisposition
8503 ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
8504   switch (static_cast<SCEVTypes>(S->getSCEVType())) {
8505   case scConstant:
8506     return LoopInvariant;
8507   case scTruncate:
8508   case scZeroExtend:
8509   case scSignExtend:
8510     return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
8511   case scAddRecExpr: {
8512     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
8513
8514     // If L is the addrec's loop, it's computable.
8515     if (AR->getLoop() == L)
8516       return LoopComputable;
8517
8518     // Add recurrences are never invariant in the function-body (null loop).
8519     if (!L)
8520       return LoopVariant;
8521
8522     // This recurrence is variant w.r.t. L if L contains AR's loop.
8523     if (L->contains(AR->getLoop()))
8524       return LoopVariant;
8525
8526     // This recurrence is invariant w.r.t. L if AR's loop contains L.
8527     if (AR->getLoop()->contains(L))
8528       return LoopInvariant;
8529
8530     // This recurrence is variant w.r.t. L if any of its operands
8531     // are variant.
8532     for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
8533          I != E; ++I)
8534       if (!isLoopInvariant(*I, L))
8535         return LoopVariant;
8536
8537     // Otherwise it's loop-invariant.
8538     return LoopInvariant;
8539   }
8540   case scAddExpr:
8541   case scMulExpr:
8542   case scUMaxExpr:
8543   case scSMaxExpr: {
8544     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
8545     bool HasVarying = false;
8546     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
8547          I != E; ++I) {
8548       LoopDisposition D = getLoopDisposition(*I, L);
8549       if (D == LoopVariant)
8550         return LoopVariant;
8551       if (D == LoopComputable)
8552         HasVarying = true;
8553     }
8554     return HasVarying ? LoopComputable : LoopInvariant;
8555   }
8556   case scUDivExpr: {
8557     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
8558     LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
8559     if (LD == LoopVariant)
8560       return LoopVariant;
8561     LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
8562     if (RD == LoopVariant)
8563       return LoopVariant;
8564     return (LD == LoopInvariant && RD == LoopInvariant) ?
8565            LoopInvariant : LoopComputable;
8566   }
8567   case scUnknown:
8568     // All non-instruction values are loop invariant.  All instructions are loop
8569     // invariant if they are not contained in the specified loop.
8570     // Instructions are never considered invariant in the function body
8571     // (null loop) because they are defined within the "loop".
8572     if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
8573       return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
8574     return LoopInvariant;
8575   case scCouldNotCompute:
8576     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
8577   }
8578   llvm_unreachable("Unknown SCEV kind!");
8579 }
8580
8581 bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
8582   return getLoopDisposition(S, L) == LoopInvariant;
8583 }
8584
8585 bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
8586   return getLoopDisposition(S, L) == LoopComputable;
8587 }
8588
8589 ScalarEvolution::BlockDisposition
8590 ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
8591   auto &Values = BlockDispositions[S];
8592   for (auto &V : Values) {
8593     if (V.getPointer() == BB)
8594       return V.getInt();
8595   }
8596   Values.emplace_back(BB, DoesNotDominateBlock);
8597   BlockDisposition D = computeBlockDisposition(S, BB);
8598   auto &Values2 = BlockDispositions[S];
8599   for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
8600     if (V.getPointer() == BB) {
8601       V.setInt(D);
8602       break;
8603     }
8604   }
8605   return D;
8606 }
8607
8608 ScalarEvolution::BlockDisposition
8609 ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
8610   switch (static_cast<SCEVTypes>(S->getSCEVType())) {
8611   case scConstant:
8612     return ProperlyDominatesBlock;
8613   case scTruncate:
8614   case scZeroExtend:
8615   case scSignExtend:
8616     return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
8617   case scAddRecExpr: {
8618     // This uses a "dominates" query instead of "properly dominates" query
8619     // to test for proper dominance too, because the instruction which
8620     // produces the addrec's value is a PHI, and a PHI effectively properly
8621     // dominates its entire containing block.
8622     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
8623     if (!DT.dominates(AR->getLoop()->getHeader(), BB))
8624       return DoesNotDominateBlock;
8625   }
8626   // FALL THROUGH into SCEVNAryExpr handling.
8627   case scAddExpr:
8628   case scMulExpr:
8629   case scUMaxExpr:
8630   case scSMaxExpr: {
8631     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
8632     bool Proper = true;
8633     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
8634          I != E; ++I) {
8635       BlockDisposition D = getBlockDisposition(*I, BB);
8636       if (D == DoesNotDominateBlock)
8637         return DoesNotDominateBlock;
8638       if (D == DominatesBlock)
8639         Proper = false;
8640     }
8641     return Proper ? ProperlyDominatesBlock : DominatesBlock;
8642   }
8643   case scUDivExpr: {
8644     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
8645     const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
8646     BlockDisposition LD = getBlockDisposition(LHS, BB);
8647     if (LD == DoesNotDominateBlock)
8648       return DoesNotDominateBlock;
8649     BlockDisposition RD = getBlockDisposition(RHS, BB);
8650     if (RD == DoesNotDominateBlock)
8651       return DoesNotDominateBlock;
8652     return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
8653       ProperlyDominatesBlock : DominatesBlock;
8654   }
8655   case scUnknown:
8656     if (Instruction *I =
8657           dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
8658       if (I->getParent() == BB)
8659         return DominatesBlock;
8660       if (DT.properlyDominates(I->getParent(), BB))
8661         return ProperlyDominatesBlock;
8662       return DoesNotDominateBlock;
8663     }
8664     return ProperlyDominatesBlock;
8665   case scCouldNotCompute:
8666     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
8667   }
8668   llvm_unreachable("Unknown SCEV kind!");
8669 }
8670
8671 bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
8672   return getBlockDisposition(S, BB) >= DominatesBlock;
8673 }
8674
8675 bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
8676   return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
8677 }
8678
8679 namespace {
8680 // Search for a SCEV expression node within an expression tree.
8681 // Implements SCEVTraversal::Visitor.
8682 struct SCEVSearch {
8683   const SCEV *Node;
8684   bool IsFound;
8685
8686   SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
8687
8688   bool follow(const SCEV *S) {
8689     IsFound |= (S == Node);
8690     return !IsFound;
8691   }
8692   bool isDone() const { return IsFound; }
8693 };
8694 }
8695
8696 bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
8697   SCEVSearch Search(Op);
8698   visitAll(S, Search);
8699   return Search.IsFound;
8700 }
8701
8702 void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
8703   ValuesAtScopes.erase(S);
8704   LoopDispositions.erase(S);
8705   BlockDispositions.erase(S);
8706   UnsignedRanges.erase(S);
8707   SignedRanges.erase(S);
8708
8709   for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
8710          BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
8711     BackedgeTakenInfo &BEInfo = I->second;
8712     if (BEInfo.hasOperand(S, this)) {
8713       BEInfo.clear();
8714       BackedgeTakenCounts.erase(I++);
8715     }
8716     else
8717       ++I;
8718   }
8719 }
8720
8721 typedef DenseMap<const Loop *, std::string> VerifyMap;
8722
8723 /// replaceSubString - Replaces all occurrences of From in Str with To.
8724 static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
8725   size_t Pos = 0;
8726   while ((Pos = Str.find(From, Pos)) != std::string::npos) {
8727     Str.replace(Pos, From.size(), To.data(), To.size());
8728     Pos += To.size();
8729   }
8730 }
8731
8732 /// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
8733 static void
8734 getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
8735   for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) {
8736     getLoopBackedgeTakenCounts(*I, Map, SE); // recurse.
8737
8738     std::string &S = Map[L];
8739     if (S.empty()) {
8740       raw_string_ostream OS(S);
8741       SE.getBackedgeTakenCount(L)->print(OS);
8742
8743       // false and 0 are semantically equivalent. This can happen in dead loops.
8744       replaceSubString(OS.str(), "false", "0");
8745       // Remove wrap flags, their use in SCEV is highly fragile.
8746       // FIXME: Remove this when SCEV gets smarter about them.
8747       replaceSubString(OS.str(), "<nw>", "");
8748       replaceSubString(OS.str(), "<nsw>", "");
8749       replaceSubString(OS.str(), "<nuw>", "");
8750     }
8751   }
8752 }
8753
8754 void ScalarEvolution::verify() const {
8755   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
8756
8757   // Gather stringified backedge taken counts for all loops using SCEV's caches.
8758   // FIXME: It would be much better to store actual values instead of strings,
8759   //        but SCEV pointers will change if we drop the caches.
8760   VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
8761   for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
8762     getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
8763
8764   // Gather stringified backedge taken counts for all loops using a fresh
8765   // ScalarEvolution object.
8766   ScalarEvolution SE2(F, TLI, AC, DT, LI);
8767   for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
8768     getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2);
8769
8770   // Now compare whether they're the same with and without caches. This allows
8771   // verifying that no pass changed the cache.
8772   assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
8773          "New loops suddenly appeared!");
8774
8775   for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
8776                            OldE = BackedgeDumpsOld.end(),
8777                            NewI = BackedgeDumpsNew.begin();
8778        OldI != OldE; ++OldI, ++NewI) {
8779     assert(OldI->first == NewI->first && "Loop order changed!");
8780
8781     // Compare the stringified SCEVs. We don't care if undef backedgetaken count
8782     // changes.
8783     // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This
8784     // means that a pass is buggy or SCEV has to learn a new pattern but is
8785     // usually not harmful.
8786     if (OldI->second != NewI->second &&
8787         OldI->second.find("undef") == std::string::npos &&
8788         NewI->second.find("undef") == std::string::npos &&
8789         OldI->second != "***COULDNOTCOMPUTE***" &&
8790         NewI->second != "***COULDNOTCOMPUTE***") {
8791       dbgs() << "SCEVValidator: SCEV for loop '"
8792              << OldI->first->getHeader()->getName()
8793              << "' changed from '" << OldI->second
8794              << "' to '" << NewI->second << "'!\n";
8795       std::abort();
8796     }
8797   }
8798
8799   // TODO: Verify more things.
8800 }
8801
8802 char ScalarEvolutionAnalysis::PassID;
8803
8804 ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
8805                                              AnalysisManager<Function> *AM) {
8806   return ScalarEvolution(F, AM->getResult<TargetLibraryAnalysis>(F),
8807                          AM->getResult<AssumptionAnalysis>(F),
8808                          AM->getResult<DominatorTreeAnalysis>(F),
8809                          AM->getResult<LoopAnalysis>(F));
8810 }
8811
8812 PreservedAnalyses
8813 ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> *AM) {
8814   AM->getResult<ScalarEvolutionAnalysis>(F).print(OS);
8815   return PreservedAnalyses::all();
8816 }
8817
8818 INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",
8819                       "Scalar Evolution Analysis", false, true)
8820 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
8821 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
8822 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
8823 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
8824 INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
8825                     "Scalar Evolution Analysis", false, true)
8826 char ScalarEvolutionWrapperPass::ID = 0;
8827
8828 ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
8829   initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry());
8830 }
8831
8832 bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
8833   SE.reset(new ScalarEvolution(
8834       F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
8835       getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
8836       getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
8837       getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
8838   return false;
8839 }
8840
8841 void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); }
8842
8843 void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const {
8844   SE->print(OS);
8845 }
8846
8847 void ScalarEvolutionWrapperPass::verifyAnalysis() const {
8848   if (!VerifySCEV)
8849     return;
8850
8851   SE->verify();
8852 }
8853
8854 void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
8855   AU.setPreservesAll();
8856   AU.addRequiredTransitive<AssumptionCacheTracker>();
8857   AU.addRequiredTransitive<LoopInfoWrapperPass>();
8858   AU.addRequiredTransitive<DominatorTreeWrapperPass>();
8859   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
8860 }