[AA] Use CallSite cast idiom. No functionality change.

[oota-llvm.git] / lib / Analysis / ScalarEvolutionExpander.cpp
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp

index b778c6e3467a909e4dfd8d4fcaa7c6426f5305fd..fee2a2d0d1830ffd2422a6e9dab387e22ea224dc 100644 (file)
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -16,15 +16,20 @@
  #include "llvm/Analysis/ScalarEvolutionExpander.h"
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/LoopInfo.h"
  #include "llvm/Analysis/TargetTransformInfo.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/Dominators.h"
  #include "llvm/IR/IntrinsicInst.h"
  #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
  #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
  
  using namespace llvm;
+using namespace PatternMatch;
  
  /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
  /// reusing an existing cast if a suitable one exists, moving an existing
@@ -44,12 +49,10 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
    // not allowed to move it.
    BasicBlock::iterator BIP = Builder.GetInsertPoint();
  
-  Instruction *Ret = NULL;
+  Instruction *Ret = nullptr;
  
    // Check to see if there is already a cast!
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
-       UI != E; ++UI) {
-    User *U = *UI;
+  for (User *U : V->users())
      if (U->getType() == Ty)
        if (CastInst *CI = dyn_cast<CastInst>(U))
          if (CI->getOpcode() == Op) {
@@ -69,7 +72,6 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
            Ret = CI;
            break;
          }
-  }
  
    // Create a new cast.
    if (!Ret)
@@ -206,11 +208,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
  /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
  /// unnecessary; in its place, just signed-divide Ops[i] by the scale and
  /// check to see if the divide was folded.
-static bool FactorOutConstant(const SCEV *&S,
-                              const SCEV *&Remainder,
-                              const SCEV *Factor,
-                              ScalarEvolution &SE,
-                              const DataLayout *DL) {
+static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
+                              const SCEV *Factor, ScalarEvolution &SE,
+                              const DataLayout &DL) {
    // Everything is divisible by one.
    if (Factor->isOne())
      return true;
@@ -250,35 +250,17 @@ static bool FactorOutConstant(const SCEV *&S,
    // In a Mul, check if there is a constant operand which is a multiple
    // of the given factor.
    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
-    if (DL) {
-      // With DataLayout, the size is known. Check if there is a constant
-      // operand which is a multiple of the given factor. If so, we can
-      // factor it.
-      const SCEVConstant *FC = cast<SCEVConstant>(Factor);
-      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
-        if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
-          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
-          NewMulOps[0] =
-            SE.getConstant(C->getValue()->getValue().sdiv(
-                                                   FC->getValue()->getValue()));
-          S = SE.getMulExpr(NewMulOps);
-          return true;
-        }
-    } else {
-      // Without DataLayout, check if Factor can be factored out of any of the
-      // Mul's operands. If so, we can just remove it.
-      for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
-        const SCEV *SOp = M->getOperand(i);
-        const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
-        if (FactorOutConstant(SOp, Remainder, Factor, SE, DL) &&
-            Remainder->isZero()) {
-          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
-          NewMulOps[i] = SOp;
-          S = SE.getMulExpr(NewMulOps);
-          return true;
-        }
+    // Size is known, check if there is a constant operand which is a multiple
+    // of the given factor. If so, we can factor it.
+    const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+      if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+        SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+        NewMulOps[0] = SE.getConstant(
+            C->getValue()->getValue().sdiv(FC->getValue()->getValue()));
+        S = SE.getMulExpr(NewMulOps);
+        return true;
        }
-    }
    }
  
    // In an AddRec, check if both start and step are divisible.
@@ -395,7 +377,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
                                      PointerType *PTy,
                                      Type *Ty,
                                      Value *V) {
-  Type *ElTy = PTy->getElementType();
+  Type *OriginalElTy = PTy->getElementType();
+  Type *ElTy = OriginalElTy;
    SmallVector<Value *, 4> GepIndices;
    SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
    bool AnyNonZeroIndices = false;
@@ -404,9 +387,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
    // without the other.
    SplitAddRecs(Ops, Ty, SE);
  
-  Type *IntPtrTy = SE.DL
-                 ? SE.DL->getIntPtrType(PTy)
-                 : Type::getInt64Ty(PTy->getContext());
+  Type *IntPtrTy = DL.getIntPtrType(PTy);
  
    // Descend down the pointer's type and attempt to convert the other
    // operands into GEP indices, at each level. The first index in a GEP
@@ -424,7 +405,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
          for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
            const SCEV *Op = Ops[i];
            const SCEV *Remainder = SE.getConstant(Ty, 0);
-          if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.DL)) {
+          if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
              // Op now has ElSize factored out.
              ScaledOps.push_back(Op);
              if (!Remainder->isZero())
@@ -458,43 +439,25 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
        bool FoundFieldNo = false;
        // An empty struct has no fields.
        if (STy->getNumElements() == 0) break;
-      if (SE.DL) {
-        // With DataLayout, field offsets are known. See if a constant offset
-        // falls within any of the struct fields.
-        if (Ops.empty()) break;
-        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
-          if (SE.getTypeSizeInBits(C->getType()) <= 64) {
-            const StructLayout &SL = *SE.DL->getStructLayout(STy);
-            uint64_t FullOffset = C->getValue()->getZExtValue();
-            if (FullOffset < SL.getSizeInBytes()) {
-              unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
-              GepIndices.push_back(
-                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
-              ElTy = STy->getTypeAtIndex(ElIdx);
-              Ops[0] =
+      // Field offsets are known. See if a constant offset falls within any of
+      // the struct fields.
+      if (Ops.empty())
+        break;
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+        if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+          const StructLayout &SL = *DL.getStructLayout(STy);
+          uint64_t FullOffset = C->getValue()->getZExtValue();
+          if (FullOffset < SL.getSizeInBytes()) {
+            unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+            GepIndices.push_back(
+                ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+            ElTy = STy->getTypeAtIndex(ElIdx);
+            Ops[0] =
                  SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
-              AnyNonZeroIndices = true;
-              FoundFieldNo = true;
-            }
-          }
-      } else {
-        // Without DataLayout, just check for an offsetof expression of the
-        // appropriate struct type.
-        for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-          if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
-            Type *CTy;
-            Constant *FieldNo;
-            if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
-              GepIndices.push_back(FieldNo);
-              ElTy =
-                STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
-              Ops[i] = SE.getConstant(Ty, 0);
-              AnyNonZeroIndices = true;
-              FoundFieldNo = true;
-              break;
-            }
+            AnyNonZeroIndices = true;
+            FoundFieldNo = true;
            }
-      }
+        }
        // If no struct field offsets were found, tentatively assume that
        // field zero was selected (since the zero offset would obviously
        // be folded away).
@@ -528,7 +491,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
      // Fold a GEP with constant operands.
      if (Constant *CLHS = dyn_cast<Constant>(V))
        if (Constant *CRHS = dyn_cast<Constant>(Idx))
-        return ConstantExpr::getGetElementPtr(CLHS, CRHS);
+        return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()),
+                                              CLHS, CRHS);
  
      // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
      unsigned ScanLimit = 6;
@@ -563,7 +527,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
      }
  
      // Emit a GEP.
-    Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+    Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
      rememberInstruction(GEP);
  
      return GEP;
@@ -599,7 +563,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
    Value *Casted = V;
    if (V->getType() != PTy)
      Casted = InsertNoopCastOfTo(Casted, PTy);
-  Value *GEP = Builder.CreateGEP(Casted,
+  Value *GEP = Builder.CreateGEP(OriginalElTy, Casted,
                                   GepIndices,
                                   "scevgep");
    Ops.push_back(SE.getUnknown(GEP));
@@ -630,21 +594,21 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
  const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
    // Test whether we've already computed the most relevant loop for this SCEV.
    std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
-    RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
+    RelevantLoops.insert(std::make_pair(S, nullptr));
    if (!Pair.second)
      return Pair.first->second;
  
    if (isa<SCEVConstant>(S))
      // A constant has no relevant loops.
-    return 0;
+    return nullptr;
    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
      if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
        return Pair.first->second = SE.LI->getLoopFor(I->getParent());
      // A non-instruction has no relevant loops.
-    return 0;
+    return nullptr;
    }
    if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
-    const Loop *L = 0;
+    const Loop *L = nullptr;
      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
        L = AR->getLoop();
      for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
@@ -719,7 +683,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
  
    // Emit instructions to add all the operands. Hoist as much as possible
    // out of loops, and form meaningful getelementptrs where possible.
-  Value *Sum = 0;
+  Value *Sum = nullptr;
    for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
         I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
      const Loop *CurLoop = I->first;
@@ -787,27 +751,32 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
  
    // Emit instructions to mul all the operands. Hoist as much as possible
    // out of loops.
-  Value *Prod = 0;
+  Value *Prod = nullptr;
    for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
-       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ++I) {
      const SCEV *Op = I->second;
      if (!Prod) {
        // This is the first operand. Just expand it.
        Prod = expand(Op);
-      ++I;
      } else if (Op->isAllOnesValue()) {
        // Instead of doing a multiply by negative one, just do a negate.
        Prod = InsertNoopCastOfTo(Prod, Ty);
        Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
-      ++I;
      } else {
        // A simple mul.
        Value *W = expandCodeFor(Op, Ty);
        Prod = InsertNoopCastOfTo(Prod, Ty);
        // Canonicalize a constant to the RHS.
        if (isa<Constant>(Prod)) std::swap(Prod, W);
-      Prod = InsertBinop(Instruction::Mul, Prod, W);
-      ++I;
+      const APInt *RHS;
+      if (match(W, m_Power2(RHS))) {
+        // Canonicalize Prod*(1<<C) to Prod<<C.
+        assert(!Ty->isVectorTy() && "vector types are not SCEVable");
+        Prod = InsertBinop(Instruction::Shl, Prod,
+                           ConstantInt::get(Ty, RHS->logBase2()));
+      } else {
+        Prod = InsertBinop(Instruction::Mul, Prod, W);
+      }
      }
    }
  
@@ -895,18 +864,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
                                             Instruction *InsertPos,
                                             bool allowScale) {
    if (IncV == InsertPos)
-    return NULL;
+    return nullptr;
  
    switch (IncV->getOpcode()) {
    default:
-    return NULL;
+    return nullptr;
    // Check for a simple Add/Sub or GEP of a loop invariant step.
    case Instruction::Add:
    case Instruction::Sub: {
      Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
      if (!OInst || SE.DT->dominates(OInst, InsertPos))
        return dyn_cast<Instruction>(IncV->getOperand(0));
-    return NULL;
+    return nullptr;
    }
    case Instruction::BitCast:
      return dyn_cast<Instruction>(IncV->getOperand(0));
@@ -917,7 +886,7 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
          continue;
        if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
          if (!SE.DT->dominates(OInst, InsertPos))
-          return NULL;
+          return nullptr;
        }
        if (allowScale) {
          // allow any kind of GEP as long as it can be hoisted.
@@ -928,11 +897,11 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
        // have 2 operands. i1* is used by the expander to represent an
        // address-size element.
        if (IncV->getNumOperands() != 2)
-        return NULL;
+        return nullptr;
        unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
        if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
            && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
-        return NULL;
+        return nullptr;
        break;
      }
      return dyn_cast<Instruction>(IncV->getOperand(0));
@@ -1065,6 +1034,34 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE,
    return false;
  }
  
+static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+  if (!isa<IntegerType>(AR->getType()))
+    return false;
+
+  unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+  Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+  const SCEV *Step = AR->getStepRecurrence(SE);
+  const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
+                                            SE.getSignExtendExpr(AR, WideTy));
+  const SCEV *ExtendAfterOp =
+    SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+  return ExtendAfterOp == OpAfterExtend;
+}
+
+static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+  if (!isa<IntegerType>(AR->getType()))
+    return false;
+
+  unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+  Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+  const SCEV *Step = AR->getStepRecurrence(SE);
+  const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
+                                            SE.getZeroExtendExpr(AR, WideTy));
+  const SCEV *ExtendAfterOp =
+    SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+  return ExtendAfterOp == OpAfterExtend;
+}
+
  /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
  /// the base addrec, which is the addrec without any non-loop-dominating
  /// values, and return the PHI.
@@ -1080,9 +1077,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
    // Reuse a previously-inserted PHI, if present.
    BasicBlock *LatchBlock = L->getLoopLatch();
    if (LatchBlock) {
-    PHINode *AddRecPhiMatch = 0;
-    Instruction *IncV = 0;
-    TruncTy = 0;
+    PHINode *AddRecPhiMatch = nullptr;
+    Instruction *IncV = nullptr;
+    TruncTy = nullptr;
      InvertStep = false;
  
      // Only try partially matching scevs that need truncation and/or
@@ -1123,7 +1120,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
        // Stop if we have found an exact match SCEV.
        if (IsMatchingSCEV) {
          IncV = TempIncV;
-        TruncTy = 0;
+        TruncTy = nullptr;
          InvertStep = false;
          AddRecPhiMatch = PN;
          break;
@@ -1190,6 +1187,12 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
    // Expand the step somewhere that dominates the loop header.
    Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
  
+  // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
+  // we actually do emit an addition.  It does not apply if we emit a
+  // subtraction.
+  bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
+  bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
+
    // Create the PHI.
    BasicBlock *Header = L->getHeader();
    Builder.SetInsertPoint(Header, Header->begin());
@@ -1215,10 +1218,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
        IVIncInsertPos : Pred->getTerminator();
      Builder.SetInsertPoint(InsertPos);
      Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
      if (isa<OverflowingBinaryOperator>(IncV)) {
-      if (Normalized->getNoWrapFlags(SCEV::FlagNUW))
+      if (IncrementIsNUW)
          cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
-      if (Normalized->getNoWrapFlags(SCEV::FlagNSW))
+      if (IncrementIsNSW)
          cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
      }
      PN->addIncoming(IncV, Pred);
@@ -1246,13 +1250,13 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
      PostIncLoopSet Loops;
      Loops.insert(L);
      Normalized =
-      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
-                                                  Loops, SE, *SE.DT));
+      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, nullptr,
+                                                  nullptr, Loops, SE, *SE.DT));
    }
  
    // Strip off any non-loop-dominating component from the addrec start.
    const SCEV *Start = Normalized->getStart();
-  const SCEV *PostLoopOffset = 0;
+  const SCEV *PostLoopOffset = nullptr;
    if (!SE.properlyDominates(Start, L->getHeader())) {
      PostLoopOffset = Start;
      Start = SE.getConstant(Normalized->getType(), 0);
@@ -1264,7 +1268,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
  
    // Strip off any non-loop-dominating component from the addrec step.
    const SCEV *Step = Normalized->getStepRecurrence(SE);
-  const SCEV *PostLoopScale = 0;
+  const SCEV *PostLoopScale = nullptr;
    if (!SE.dominates(Step, L->getHeader())) {
      PostLoopScale = Step;
      Step = SE.getConstant(Normalized->getType(), 1);
@@ -1279,7 +1283,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
    Type *ExpandTy = PostLoopScale ? IntTy : STy;
    // In some cases, we decide to reuse an existing phi node but need to truncate
    // it and/or invert the step.
-  Type *TruncTy = 0;
+  Type *TruncTy = nullptr;
    bool InvertStep = false;
    PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy,
                                            TruncTy, InvertStep);
@@ -1375,7 +1379,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
    const Loop *L = S->getLoop();
  
    // First check for an existing canonical IV in a suitable type.
-  PHINode *CanonicalIV = 0;
+  PHINode *CanonicalIV = nullptr;
    if (PHINode *PN = L->getCanonicalInductionVariable())
      if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
        CanonicalIV = PN;
@@ -1391,12 +1395,12 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
      Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
                                         S->getNoWrapFlags(SCEV::FlagNW)));
      BasicBlock::iterator NewInsertPt =
-      llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+      std::next(BasicBlock::iterator(cast<Instruction>(V)));
      BuilderType::InsertPointGuard Guard(Builder);
      while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
             isa<LandingPadInst>(NewInsertPt))
        ++NewInsertPt;
-    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
                        NewInsertPt);
      return V;
    }
@@ -1445,8 +1449,12 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
      Constant *One = ConstantInt::get(Ty, 1);
      for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
        BasicBlock *HP = *HPI;
-      if (!PredSeen.insert(HP))
+      if (!PredSeen.insert(HP).second) {
+        // There must be an incoming value for each predecessor, even the
+        // duplicates!
+        CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
          continue;
+      }
  
        if (L->contains(HP)) {
          // Insert a unit add instruction right before the terminator
@@ -1619,7 +1627,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
        while (InsertPt != Builder.GetInsertPoint()
               && (isInsertedInstruction(InsertPt)
                   || isa<DbgInfoIntrinsic>(InsertPt))) {
-        InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
+        InsertPt = std::next(BasicBlock::iterator(InsertPt));
        }
        break;
      }
@@ -1669,20 +1677,12 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
  
    // Emit code for it.
    BuilderType::InsertPointGuard Guard(Builder);
-  PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
+  PHINode *V = cast<PHINode>(expandCodeFor(H, nullptr,
+                                           L->getHeader()->begin()));
  
    return V;
  }
  
-/// Sort values by integer width for replaceCongruentIVs.
-static bool width_descending(Value *lhs, Value *rhs) {
-  // Put pointers at the back and make sure pointer < pointer = false.
-  if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy())
-    return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy();
-  return rhs->getType()->getPrimitiveSizeInBits()
-    < lhs->getType()->getPrimitiveSizeInBits();
-}
-
  /// replaceCongruentIVs - Check for congruent phis in this loop header and
  /// replace them with their most canonical representative. Return the number of
  /// phis eliminated.
@@ -1699,11 +1699,17 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
      Phis.push_back(Phi);
    }
    if (TTI)
-    std::sort(Phis.begin(), Phis.end(), width_descending);
+    std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
+      // Put pointers at the back and make sure pointer < pointer = false.
+      if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+        return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
+      return RHS->getType()->getPrimitiveSizeInBits() <
+             LHS->getType()->getPrimitiveSizeInBits();
+    });
  
    unsigned NumElim = 0;
    DenseMap<const SCEV *, PHINode *> ExprToIVMap;
-  // Process phis from wide to narrow. Mapping wide phis to the their truncation
+  // Process phis from wide to narrow. Map wide phis to their truncation
    // so narrow phis can reuse them.
    for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(),
           PEnd = Phis.end(); PIter != PEnd; ++PIter) {
@@ -1711,9 +1717,9 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
  
      // Fold constant phis. They may be congruent to other constant phis and
      // would confuse the logic below that expects proper IVs.
-    if (Value *V = Phi->hasConstantValue()) {
+    if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) {
        Phi->replaceAllUsesWith(V);
-      DeadInsts.push_back(Phi);
+      DeadInsts.emplace_back(Phi);
        ++NumElim;
        DEBUG_WITH_TYPE(DebugType, dbgs()
                        << "INDVARS: Eliminated constant iv: " << *Phi << '\n');
@@ -1776,16 +1782,19 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
                          << *IsomorphicInc << '\n');
          Value *NewInc = OrigInc;
          if (OrigInc->getType() != IsomorphicInc->getType()) {
-          Instruction *IP = isa<PHINode>(OrigInc)
-            ? (Instruction*)L->getHeader()->getFirstInsertionPt()
-            : OrigInc->getNextNode();
+          Instruction *IP = nullptr;
+          if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
+            IP = PN->getParent()->getFirstInsertionPt();
+          else
+            IP = OrigInc->getNextNode();
+
            IRBuilder<> Builder(IP);
            Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
            NewInc = Builder.
              CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
          }
          IsomorphicInc->replaceAllUsesWith(NewInc);
-        DeadInsts.push_back(IsomorphicInc);
+        DeadInsts.emplace_back(IsomorphicInc);
        }
      }
      DEBUG_WITH_TYPE(DebugType, dbgs()
@@ -1798,11 +1807,93 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
        NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
      }
      Phi->replaceAllUsesWith(NewIV);
-    DeadInsts.push_back(Phi);
+    DeadInsts.emplace_back(Phi);
    }
    return NumElim;
  }
  
+bool SCEVExpander::isHighCostExpansionHelper(
+    const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+
+  // Zero/One operand expressions
+  switch (S->getSCEVType()) {
+  case scUnknown:
+  case scConstant:
+    return false;
+  case scTruncate:
+    return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L,
+                                     Processed);
+  case scZeroExtend:
+    return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(),
+                                     L, Processed);
+  case scSignExtend:
+    return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(),
+                                     L, Processed);
+  }
+
+  if (!Processed.insert(S).second)
+    return false;
+
+  if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
+    // If the divisor is a power of two and the SCEV type fits in a native
+    // integer, consider the divison cheap irrespective of whether it occurs in
+    // the user code since it can be lowered into a right shift.
+    if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
+      if (SC->getValue()->getValue().isPowerOf2()) {
+        const DataLayout &DL =
+            L->getHeader()->getParent()->getParent()->getDataLayout();
+        unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
+        return DL.isIllegalInteger(Width);
+      }
+
+    // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
+    // HowManyLessThans produced to compute a precise expression, rather than a
+    // UDiv from the user's code. If we can't find a UDiv in the code with some
+    // simple searching, assume the former consider UDivExpr expensive to
+    // compute.
+    BasicBlock *ExitingBB = L->getExitingBlock();
+    if (!ExitingBB)
+      return true;
+
+    BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+    if (!ExitingBI || !ExitingBI->isConditional())
+      return true;
+
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition());
+    if (!OrigCond)
+      return true;
+
+    const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1));
+    RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1));
+    if (RHS != S) {
+      const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0));
+      LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1));
+      if (LHS != S)
+        return true;
+    }
+  }
+
+  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+  // the exit condition.
+  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+    return true;
+
+  // Recurse past nary expressions, which commonly occur in the
+  // BackedgeTakenCount. They may already exist in program code, and if not,
+  // they are not too expensive rematerialize.
+  if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      if (isHighCostExpansionHelper(*I, L, Processed))
+        return true;
+    }
+  }
+
+  // If we haven't recognized an expensive SCEV pattern, assume it's an
+  // expression produced by program code.
+  return false;
+}
+
  namespace {
  // Search for a SCEV subexpression that is not safe to expand.  Any expression
  // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely