[RwriteStatepointsForGC] Minor indentation and naming [NFC]

[oota-llvm.git] / lib / Transforms / Scalar / IndVarSimplify.cpp
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp

index dfdd99f027e3cd805253a09a21bae5ddb314580d..ba44bec0793bbcfdc21451f2d8323ad42b104ba6 100644 (file)
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -24,32 +24,34 @@
  //
  //===----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "indvars"
  #include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
  #include "llvm/Analysis/LoopInfo.h"
  #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
  #include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
  using namespace llvm;
  
+#define DEBUG_TYPE "indvars"
+
  STATISTIC(NumWidened     , "Number of indvars widened");
  STATISTIC(NumReplaced    , "Number of exit values replaced");
  STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
@@ -63,29 +65,32 @@ static cl::opt<bool> VerifyIndvars(
    "verify-indvars", cl::Hidden,
    cl::desc("Verify the ScalarEvolution result after running indvars"));
  
+static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
+  cl::desc("Reduce live induction variables."));
+
  namespace {
    class IndVarSimplify : public LoopPass {
-    LoopInfo        *LI;
-    ScalarEvolution *SE;
-    DominatorTree   *DT;
-    DataLayout      *TD;
-    TargetLibraryInfo *TLI;
+    LoopInfo                  *LI;
+    ScalarEvolution           *SE;
+    DominatorTree             *DT;
+    TargetLibraryInfo         *TLI;
+    const TargetTransformInfo *TTI;
  
      SmallVector<WeakVH, 16> DeadInsts;
      bool Changed;
    public:
  
      static char ID; // Pass identification, replacement for typeid
-    IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0),
-                       Changed(false) {
+    IndVarSimplify()
+        : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
        initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
      }
  
-    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
  
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<DominatorTree>();
-      AU.addRequired<LoopInfo>();
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addRequired<LoopInfoWrapperPass>();
        AU.addRequired<ScalarEvolution>();
        AU.addRequiredID(LoopSimplifyID);
        AU.addRequiredID(LCSSAID);
@@ -96,7 +101,7 @@ namespace {
      }
  
    private:
-    virtual void releaseMemory() {
+    void releaseMemory() override {
        DeadInsts.clear();
      }
  
@@ -119,8 +124,8 @@ namespace {
  char IndVarSimplify::ID = 0;
  INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
                  "Induction Variable Simplification", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -193,7 +198,7 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
    if (!PHI)
      return User;
  
-  Instruction *InsertPt = 0;
+  Instruction *InsertPt = nullptr;
    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
      if (PHI->getIncomingValue(i) != Def)
        continue;
@@ -254,34 +259,34 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
    // an add or increment value can not be represented by an integer.
    BinaryOperator *Incr =
      dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
-  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
  
    // If this is not an add of the PHI with a constantfp, or if the constant fp
    // is not an integer, bail out.
    ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
    int64_t IncValue;
-  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
        !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
      return;
  
    // Check Incr uses. One user is PN and the other user is an exit condition
    // used by the conditional terminator.
-  Value::use_iterator IncrUse = Incr->use_begin();
+  Value::user_iterator IncrUse = Incr->user_begin();
    Instruction *U1 = cast<Instruction>(*IncrUse++);
-  if (IncrUse == Incr->use_end()) return;
+  if (IncrUse == Incr->user_end()) return;
    Instruction *U2 = cast<Instruction>(*IncrUse++);
-  if (IncrUse != Incr->use_end()) return;
+  if (IncrUse != Incr->user_end()) return;
  
    // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
    // only used by a branch, we can't transform it.
    FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
    if (!Compare)
      Compare = dyn_cast<FCmpInst>(U2);
-  if (Compare == 0 || !Compare->hasOneUse() ||
-      !isa<BranchInst>(Compare->use_back()))
+  if (!Compare || !Compare->hasOneUse() ||
+      !isa<BranchInst>(Compare->user_back()))
      return;
  
-  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
  
    // We need to verify that the branch actually controls the iteration count
    // of the loop.  If not, the new IV can overflow and no one will notice.
@@ -298,7 +303,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
    // transform it.
    ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
    int64_t ExitValue;
-  if (ExitValueVal == 0 ||
+  if (ExitValueVal == nullptr ||
        !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
      return;
  
@@ -494,6 +499,21 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
  
      unsigned NumPreds = PN->getNumIncomingValues();
  
+    // We would like to be able to RAUW single-incoming value PHI nodes. We
+    // have to be certain this is safe even when this is an LCSSA PHI node.
+    // While the computed exit value is no longer varying in *this* loop, the
+    // exit block may be an exit block for an outer containing loop as well,
+    // the exit value may be varying in the outer loop, and thus it may still
+    // require an LCSSA PHI node. The safe case is when this is
+    // single-predecessor PHI node (LCSSA) and the exit block containing it is
+    // part of the enclosing loop, or this is the outer most loop of the nest.
+    // In either case the exit value could (at most) be varying in the same
+    // loop body as the phi node itself. Thus if it is in turn used outside of
+    // an enclosing loop it will only be via a separate LCSSA node.
+    bool LCSSASafePhiForRAUW =
+        NumPreds == 1 &&
+        (!L->getParentLoop() || L->getParentLoop() == LI->getLoopFor(ExitBB));
+
      // Iterate over all of the PHI nodes.
      BasicBlock::iterator BBI = ExitBB->begin();
      while ((PN = dyn_cast<PHINode>(BBI++))) {
@@ -532,9 +552,49 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
          // and varies predictably *inside* the loop.  Evaluate the value it
          // contains when the loop exits, if possible.
          const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
-        if (!SE->isLoopInvariant(ExitValue, L))
+        if (!SE->isLoopInvariant(ExitValue, L) ||
+            !isSafeToExpand(ExitValue, *SE))
            continue;
  
+        // Computing the value outside of the loop brings no benefit if :
+        //  - it is definitely used inside the loop in a way which can not be
+        //    optimized away.
+        //  - no use outside of the loop can take advantage of hoisting the
+        //    computation out of the loop
+        if (ExitValue->getSCEVType()>=scMulExpr) {
+          unsigned NumHardInternalUses = 0;
+          unsigned NumSoftExternalUses = 0;
+          unsigned NumUses = 0;
+          for (auto IB = Inst->user_begin(), IE = Inst->user_end();
+               IB != IE && NumUses <= 6; ++IB) {
+            Instruction *UseInstr = cast<Instruction>(*IB);
+            unsigned Opc = UseInstr->getOpcode();
+            NumUses++;
+            if (L->contains(UseInstr)) {
+              if (Opc == Instruction::Call || Opc == Instruction::Ret)
+                NumHardInternalUses++;
+            } else {
+              if (Opc == Instruction::PHI) {
+                // Do not count the Phi as a use. LCSSA may have inserted
+                // plenty of trivial ones.
+                NumUses--;
+                for (auto PB = UseInstr->user_begin(),
+                          PE = UseInstr->user_end();
+                     PB != PE && NumUses <= 6; ++PB, ++NumUses) {
+                  unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
+                  if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
+                    NumSoftExternalUses++;
+                }
+                continue;
+              }
+              if (Opc != Instruction::Call && Opc != Instruction::Ret)
+                NumSoftExternalUses++;
+            }
+          }
+          if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
+            continue;
+        }
+
          Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
  
          DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
@@ -554,17 +614,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
          if (isInstructionTriviallyDead(Inst, TLI))
            DeadInsts.push_back(Inst);
  
-        if (NumPreds == 1) {
-          // Completely replace a single-pred PHI. This is safe, because the
-          // NewVal won't be variant in the loop, so we don't need an LCSSA phi
-          // node anymore.
+        // If we determined that this PHI is safe to replace even if an LCSSA
+        // PHI, do so.
+        if (LCSSASafePhiForRAUW) {
            PN->replaceAllUsesWith(ExitVal);
            PN->eraseFromParent();
          }
        }
-      if (NumPreds != 1) {
-        // Clone the PHI and delete the original one. This lets IVUsers and
-        // any other maps purge the original user from their records.
+
+      // If we were unable to completely replace the PHI node, clone the PHI
+      // and delete the original one. This lets IVUsers and any other maps
+      // purge the original user from their records.
+      if (!LCSSASafePhiForRAUW) {
          PHINode *NewPN = cast<PHINode>(PN->clone());
          NewPN->takeName(PN);
          NewPN->insertBefore(PN);
@@ -590,39 +651,39 @@ namespace {
    struct WideIVInfo {
      PHINode *NarrowIV;
      Type *WidestNativeType; // Widest integer type created [sz]ext
-    bool IsSigned;          // Was an sext user seen before a zext?
+    bool IsSigned;          // Was a sext user seen before a zext?
  
-    WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
-  };
-
-  class WideIVVisitor : public IVVisitor {
-    ScalarEvolution *SE;
-    const DataLayout *TD;
-
-  public:
-    WideIVInfo WI;
-
-    WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
-                  const DataLayout *TData) :
-      SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
-
-    // Implement the interface used by simplifyUsersOfIV.
-    virtual void visitCast(CastInst *Cast);
+    WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
+                   IsSigned(false) {}
    };
  }
  
  /// visitCast - Update information about the induction variable that is
  /// extended by this sign or zero extend operation. This is used to determine
  /// the final width of the IV before actually widening it.
-void WideIVVisitor::visitCast(CastInst *Cast) {
+static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
+                        const TargetTransformInfo *TTI) {
    bool IsSigned = Cast->getOpcode() == Instruction::SExt;
    if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
      return;
  
    Type *Ty = Cast->getType();
    uint64_t Width = SE->getTypeSizeInBits(Ty);
-  if (TD && !TD->isLegalInteger(Width))
+  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
+    return;
+
+  // Cast is either an sext or zext up to this point.
+  // We should not widen an indvar if arithmetics on the wider indvar are more
+  // expensive than those on the narrower indvar. We check only the cost of ADD
+  // because at least an ADD is required to increment the induction variable. We
+  // could compute more comprehensively the cost of all instructions on the
+  // induction variable when necessary.
+  if (TTI &&
+      TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
+          TTI->getArithmeticInstrCost(Instruction::Add,
+                                      Cast->getOperand(0)->getType())) {
      return;
+  }
  
    if (!WI.WidestNativeType) {
      WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
@@ -648,7 +709,7 @@ struct NarrowIVDefUse {
    Instruction *NarrowUse;
    Instruction *WideDef;
  
-  NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
+  NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
  
    NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
      NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
@@ -691,9 +752,9 @@ public:
      L(LI->getLoopFor(OrigPhi->getParent())),
      SE(SEv),
      DT(DTree),
-    WidePhi(0),
-    WideInc(0),
-    WideIncExpr(0),
+    WidePhi(nullptr),
+    WideInc(nullptr),
+    WideIncExpr(nullptr),
      DeadInsts(DI) {
      assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
    }
@@ -710,8 +771,13 @@ protected:
  
    const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
  
+  const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+                              unsigned OpCode) const;
+
    Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
  
+  bool WidenLoopCompare(NarrowIVDefUse DU);
+
    void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
  };
  } // anonymous namespace
@@ -748,7 +814,7 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
    unsigned Opcode = DU.NarrowUse->getOpcode();
    switch (Opcode) {
    default:
-    return 0;
+    return nullptr;
    case Instruction::Add:
    case Instruction::Mul:
    case Instruction::UDiv:
@@ -786,21 +852,38 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
    }
  }
  
+const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+                                     unsigned OpCode) const {
+  if (OpCode == Instruction::Add)
+    return SE->getAddExpr(LHS, RHS);
+  if (OpCode == Instruction::Sub)
+    return SE->getMinusSCEV(LHS, RHS);
+  if (OpCode == Instruction::Mul)
+    return SE->getMulExpr(LHS, RHS);
+
+  llvm_unreachable("Unsupported opcode.");
+}
+
  /// No-wrap operations can transfer sign extension of their result to their
  /// operands. Generate the SCEV value for the widened operation without
  /// actually modifying the IR yet. If the expression after extending the
  /// operands is an AddRec for this loop, return it.
  const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+
    // Handle the common case of add<nsw/nuw>
-  if (DU.NarrowUse->getOpcode() != Instruction::Add)
-    return 0;
+  const unsigned OpCode = DU.NarrowUse->getOpcode();
+  // Only Add/Sub/Mul instructions supported yet.
+  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+      OpCode != Instruction::Mul)
+    return nullptr;
  
    // One operand (NarrowDef) has already been extended to WideDef. Now determine
    // if extending the other will lead to a recurrence.
-  unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
+  const unsigned ExtendOperIdx =
+      DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
    assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
  
-  const SCEV *ExtendOperExpr = 0;
+  const SCEV *ExtendOperExpr = nullptr;
    const OverflowingBinaryOperator *OBO =
      cast<OverflowingBinaryOperator>(DU.NarrowUse);
    if (IsSigned && OBO->hasNoSignedWrap())
@@ -810,18 +893,25 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
      ExtendOperExpr = SE->getZeroExtendExpr(
        SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
    else
-    return 0;
+    return nullptr;
  
-  // When creating this AddExpr, don't apply the current operations NSW or NUW
+  // When creating this SCEV expr, don't apply the current operations NSW or NUW
    // flags. This instruction may be guarded by control flow that the no-wrap
    // behavior depends on. Non-control-equivalent instructions can be mapped to
    // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
    // semantics to those operations.
-  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
-    SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
+  const SCEV *lhs = SE->getSCEV(DU.WideDef);
+  const SCEV *rhs = ExtendOperExpr;
+
+  // Let's swap operands to the initial order for the case of non-commutative
+  // operations, like SUB. See PR21014.
+  if (ExtendOperIdx == 0)
+    std::swap(lhs, rhs);
+  const SCEVAddRecExpr *AddRec =
+      dyn_cast<SCEVAddRecExpr>(GetSCEVByOpCode(lhs, rhs, OpCode));
  
    if (!AddRec || AddRec->getLoop() != L)
-    return 0;
+    return nullptr;
    return AddRec;
  }
  
@@ -832,14 +922,14 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
  /// recurrence. Otherwise return NULL.
  const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
    if (!SE->isSCEVable(NarrowUse->getType()))
-    return 0;
+    return nullptr;
  
    const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
    if (SE->getTypeSizeInBits(NarrowExpr->getType())
        >= SE->getTypeSizeInBits(WideType)) {
      // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
      // index. So don't follow this use.
-    return 0;
+    return nullptr;
    }
  
    const SCEV *WideExpr = IsSigned ?
@@ -847,19 +937,76 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
      SE->getZeroExtendExpr(NarrowExpr, WideType);
    const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
    if (!AddRec || AddRec->getLoop() != L)
-    return 0;
+    return nullptr;
    return AddRec;
  }
  
+/// This IV user cannot be widen. Replace this use of the original narrow IV
+/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
+static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
+  DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
+        << " for user " << *DU.NarrowUse << "\n");
+  IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
+  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
+}
+
+/// If the narrow use is a compare instruction, then widen the compare
+//  (and possibly the other operand).  The extend operation is hoisted into the
+// loop preheader as far as possible.
+bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
+  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
+  if (!Cmp)
+    return false;
+
+  // Sign of IV user and compare must match.
+  if (IsSigned != CmpInst::isSigned(Cmp->getPredicate()))
+    return false;
+
+  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
+  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
+  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+  assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
+
+  // Widen the compare instruction.
+  IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+
+  // Widen the other operand of the compare, if necessary.
+  if (CastWidth < IVWidth) {
+    Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
+    DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
+  }
+  return true;
+}
+
  /// WidenIVUse - Determine whether an individual user of the narrow IV can be
  /// widened. If so, return the wide clone of the user.
  Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
  
    // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
-  if (isa<PHINode>(DU.NarrowUse) &&
-      LI->getLoopFor(DU.NarrowUse->getParent()) != L)
-    return 0;
-
+  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
+    if (LI->getLoopFor(UsePhi->getParent()) != L) {
+      // For LCSSA phis, sink the truncate outside the loop.
+      // After SimplifyCFG most loop exit targets have a single predecessor.
+      // Otherwise fall back to a truncate within the loop.
+      if (UsePhi->getNumOperands() != 1)
+        truncateIVUse(DU, DT);
+      else {
+        PHINode *WidePhi =
+          PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
+                          UsePhi);
+        WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
+        IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());
+        Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
+        UsePhi->replaceAllUsesWith(Trunc);
+        DeadInsts.push_back(UsePhi);
+        DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
+              << " to " << *WidePhi << "\n");
+      }
+      return nullptr;
+    }
+  }
    // Our raison d'etre! Eliminate sign and zero extension.
    if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
      Value *NewDef = DU.WideDef;
@@ -895,22 +1042,25 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
      // push the uses of WideDef here.
  
      // No further widening is needed. The deceased [sz]ext had done it for us.
-    return 0;
+    return nullptr;
    }
  
    // Does this user itself evaluate to a recurrence after widening?
    const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
+  if (!WideAddRec)
+    WideAddRec = GetExtendedOperandRecurrence(DU);
+
    if (!WideAddRec) {
-      WideAddRec = GetExtendedOperandRecurrence(DU);
-  }
-  if (!WideAddRec) {
+    // If use is a loop condition, try to promote the condition instead of
+    // truncating the IV first.
+    if (WidenLoopCompare(DU))
+      return nullptr;
+
      // This user does not evaluate to a recurence after widening, so don't
      // follow it. Instead insert a Trunc to kill off the original use,
      // eventually isolating the original narrow IV so it can be removed.
-    IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
-    Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
-    DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
-    return 0;
+    truncateIVUse(DU, DT);
+    return nullptr;
    }
    // Assume block terminators cannot evaluate to a recurrence. We can't to
    // insert a Trunc after a terminator if there happens to be a critical edge.
@@ -919,14 +1069,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
  
    // Reuse the IV increment that SCEVExpander created as long as it dominates
    // NarrowUse.
-  Instruction *WideUse = 0;
+  Instruction *WideUse = nullptr;
    if (WideAddRec == WideIncExpr
        && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
      WideUse = WideInc;
    else {
      WideUse = CloneIVUser(DU);
      if (!WideUse)
-      return 0;
+      return nullptr;
    }
    // Evaluation of WideAddRec ensured that the narrow expression could be
    // extended outside the loop without overflow. This suggests that the wide use
@@ -937,7 +1087,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
      DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
            << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
      DeadInsts.push_back(WideUse);
-    return 0;
+    return nullptr;
    }
  
    // Returning WideUse pushes it on the worklist.
@@ -947,15 +1097,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
  /// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
  ///
  void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
-  for (Value::use_iterator UI = NarrowDef->use_begin(),
-         UE = NarrowDef->use_end(); UI != UE; ++UI) {
-    Instruction *NarrowUse = cast<Instruction>(*UI);
+  for (User *U : NarrowDef->users()) {
+    Instruction *NarrowUser = cast<Instruction>(U);
  
      // Handle data flow merges and bizarre phi cycles.
-    if (!Widened.insert(NarrowUse))
+    if (!Widened.insert(NarrowUser).second)
        continue;
  
-    NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
+    NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef));
    }
  }
  
@@ -973,7 +1122,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
    // Is this phi an induction variable?
    const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
    if (!AddRec)
-    return NULL;
+    return nullptr;
  
    // Widen the induction variable expression.
    const SCEV *WideIVExpr = IsSigned ?
@@ -986,7 +1135,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
    // Can the IV be extended outside the loop without overflow?
    AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
    if (!AddRec || AddRec->getLoop() != L)
-    return NULL;
+    return nullptr;
  
    // An AddRec must have loop-invariant operands. Since this AddRec is
    // materialized by a loop header phi, the expression cannot have any post-loop
@@ -1039,10 +1188,38 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
    return WidePhi;
  }
  
+//===----------------------------------------------------------------------===//
+//  Live IV Reduction - Minimize IVs live across the loop.
+//===----------------------------------------------------------------------===//
+
+
  //===----------------------------------------------------------------------===//
  //  Simplification of IV users based on SCEV evaluation.
  //===----------------------------------------------------------------------===//
  
+namespace {
+  class IndVarSimplifyVisitor : public IVVisitor {
+    ScalarEvolution *SE;
+    const TargetTransformInfo *TTI;
+    PHINode *IVPhi;
+
+  public:
+    WideIVInfo WI;
+
+    IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
+                          const TargetTransformInfo *TTI,
+                          const DominatorTree *DTree)
+        : SE(SCEV), TTI(TTI), IVPhi(IV) {
+      DT = DTree;
+      WI.NarrowIV = IVPhi;
+      if (ReduceLiveIVs)
+        setSplitOverflowIntrinsics();
+    }
+
+    // Implement the interface used by simplifyUsersOfIV.
+    void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
+  };
+}
  
  /// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
  /// users. Each successive simplification may push more users which may
@@ -1074,12 +1251,12 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
        PHINode *CurrIV = LoopPhis.pop_back_val();
  
        // Information about sign/zero extensions of CurrIV.
-      WideIVVisitor WIV(CurrIV, SE, TD);
+      IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
  
-      Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
+      Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
  
-      if (WIV.WI.WidestNativeType) {
-        WideIVs.push_back(WIV.WI);
+      if (Visitor.WI.WidestNativeType) {
+        WideIVs.push_back(Visitor.WI);
        }
      } while(!LoopPhis.empty());
  
@@ -1101,9 +1278,9 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
  /// BackedgeTakenInfo. If these expressions have not been reduced, then
  /// expanding them may incur additional cost (albeit in the loop preheader).
  static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
-                                SmallPtrSet<const SCEV*, 8> &Processed,
+                                SmallPtrSetImpl<const SCEV*> &Processed,
                                  ScalarEvolution *SE) {
-  if (!Processed.insert(S))
+  if (!Processed.insert(S).second)
      return false;
  
    // If the backedge-taken count is a UDiv, it's very likely a UDiv that
@@ -1185,7 +1362,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
  static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
    Instruction *IncI = dyn_cast<Instruction>(IncV);
    if (!IncI)
-    return 0;
+    return nullptr;
  
    switch (IncI->getOpcode()) {
    case Instruction::Add:
@@ -1196,17 +1373,17 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
      if (IncI->getNumOperands() == 2)
        break;
    default:
-    return 0;
+    return nullptr;
    }
  
    PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
    if (Phi && Phi->getParent() == L->getHeader()) {
      if (isLoopInvariant(IncI->getOperand(1), L, DT))
        return Phi;
-    return 0;
+    return nullptr;
    }
    if (IncI->getOpcode() == Instruction::GetElementPtr)
-    return 0;
+    return nullptr;
  
    // Allow add/sub to be commuted.
    Phi = dyn_cast<PHINode>(IncI->getOperand(1));
@@ -1214,7 +1391,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
      if (isLoopInvariant(IncI->getOperand(0), L, DT))
        return Phi;
    }
-  return 0;
+  return nullptr;
  }
  
  /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
@@ -1224,7 +1401,7 @@ static ICmpInst *getLoopTest(Loop *L) {
    BasicBlock *LatchBlock = L->getLoopLatch();
    // Don't bother with LFTR if the loop is not properly simplified.
    if (!LatchBlock)
-    return 0;
+    return nullptr;
  
    BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
    assert(BI && "expected exit branch");
@@ -1274,7 +1451,7 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
  /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
  /// down to checking that all operands are constant and listing instructions
  /// that may hide undef.
-static bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited,
+static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
                                 unsigned Depth) {
    if (isa<Constant>(V))
      return !isa<UndefValue>(V);
@@ -1294,7 +1471,7 @@ static bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited,
  
    // Optimistically handle other instructions.
    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
-    if (!Visited.insert(*OI))
+    if (!Visited.insert(*OI).second)
        continue;
      if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
        return false;
@@ -1319,15 +1496,11 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
    int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
    Value *IncV = Phi->getIncomingValue(LatchIdx);
  
-  for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
-       UI != UE; ++UI) {
-    if (*UI != Cond && *UI != IncV) return false;
-  }
+  for (User *U : Phi->users())
+    if (U != Cond && U != IncV) return false;
  
-  for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
-       UI != UE; ++UI) {
-    if (*UI != Cond && *UI != Phi) return false;
-  }
+  for (User *U : IncV->users())
+    if (U != Cond && U != Phi) return false;
    return true;
  }
  
@@ -1344,17 +1517,16 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
  /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
  /// This is difficult in general for SCEV because of potential overflow. But we
  /// could at least handle constant BECounts.
-static PHINode *
-FindLoopCounter(Loop *L, const SCEV *BECount,
-                ScalarEvolution *SE, DominatorTree *DT, const DataLayout *TD) {
+static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
+                                ScalarEvolution *SE, DominatorTree *DT) {
    uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
  
    Value *Cond =
      cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
  
    // Loop over all of the PHI nodes, looking for a simple counter.
-  PHINode *BestPhi = 0;
-  const SCEV *BestInit = 0;
+  PHINode *BestPhi = nullptr;
+  const SCEV *BestInit = nullptr;
    BasicBlock *LatchBlock = L->getLoopLatch();
    assert(LatchBlock && "needsLFTR should guarantee a loop latch");
  
@@ -1375,7 +1547,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
      // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
      // AR may not be a narrower type, or we may never exit.
      uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
-    if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
+    if (PhiWidth < BCWidth ||
+        !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth))
        continue;
  
      const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
@@ -1428,8 +1601,7 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
  /// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
  /// holds the RHS of the new loop test.
  static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
-                           SCEVExpander &Rewriter, ScalarEvolution *SE,
-                           Type *IntPtrTy) {
+                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
    assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
    const SCEV *IVInit = AR->getStart();
@@ -1441,8 +1613,14 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
    if (IndVar->getType()->isPointerTy()
        && !IVCount->getType()->isPointerTy()) {
  
+    // IVOffset will be the new GEP offset that is interpreted by GEP as a
+    // signed value. IVCount on the other hand represents the loop trip count,
+    // which is an unsigned value. FindLoopCounter only allows induction
+    // variables that have a positive unit stride of one. This means we don't
+    // have to handle the case of negative offsets (yet) and just need to zero
+    // extend IVCount.
      Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
-    const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
+    const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
  
      // Expand the code for the iteration count.
      assert(SE->isLoopInvariant(IVOffset, L) &&
@@ -1454,13 +1632,12 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
      assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
      // We could handle pointer IVs other than i8*, but we need to compensate for
      // gep index scaling. See canExpandBackedgeTakenCount comments.
-    assert(SE->getSizeOfExpr(
-             cast<PointerType>(GEPBase->getType())->getElementType(),
-             IntPtrTy)->isOne()
+    assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
+             cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
             && "unit stride pointer IV must be i8*");
  
      IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
-    return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
+    return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
    }
    else {
      // In any other case, convert both IVInit and IVCount to integers before
@@ -1469,11 +1646,12 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
      // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
      //
      // Valid Cases: (1) both integers is most common; (2) both may be pointers
-    // for simple memset-style loops; (3) IVInit is an integer and IVCount is a
-    // pointer may occur when enable-iv-rewrite generates a canonical IV on top
-    // of case #2.
+    // for simple memset-style loops.
+    //
+    // IVInit integer and IVCount pointer would only occur if a canonical IV
+    // were generated on top of case #2, which is not expected.
  
-    const SCEV *IVLimit = 0;
+    const SCEV *IVLimit = nullptr;
      // For unit stride, IVCount = Start + BECount with 2's complement overflow.
      // For non-zero Start, compute IVCount here.
      if (AR->getStart()->isZero())
@@ -1515,49 +1693,26 @@ LinearFunctionTestReplace(Loop *L,
                            SCEVExpander &Rewriter) {
    assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
  
-  // LFTR can ignore IV overflow and truncate to the width of
-  // BECount. This avoids materializing the add(zext(add)) expression.
-  Type *CntTy = BackedgeTakenCount->getType();
-
+  // Initialize CmpIndVar and IVCount to their preincremented values.
+  Value *CmpIndVar = IndVar;
    const SCEV *IVCount = BackedgeTakenCount;
  
    // If the exiting block is the same as the backedge block, we prefer to
    // compare against the post-incremented value, otherwise we must compare
    // against the preincremented value.
-  Value *CmpIndVar;
    if (L->getExitingBlock() == L->getLoopLatch()) {
      // Add one to the "backedge-taken" count to get the trip count.
-    // If this addition may overflow, we have to be more pessimistic and
-    // cast the induction variable before doing the add.
-    const SCEV *N =
-      SE->getAddExpr(IVCount, SE->getConstant(IVCount->getType(), 1));
-    if (CntTy == IVCount->getType())
-      IVCount = N;
-    else {
-      const SCEV *Zero = SE->getConstant(IVCount->getType(), 0);
-      if ((isa<SCEVConstant>(N) && !N->isZero()) ||
-          SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
-        // No overflow. Cast the sum.
-        IVCount = SE->getTruncateOrZeroExtend(N, CntTy);
-      } else {
-        // Potential overflow. Cast before doing the add.
-        IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
-        IVCount = SE->getAddExpr(IVCount, SE->getConstant(CntTy, 1));
-      }
-    }
+    // This addition may overflow, which is valid as long as the comparison is
+    // truncated to BackedgeTakenCount->getType().
+    IVCount = SE->getAddExpr(BackedgeTakenCount,
+                             SE->getConstant(BackedgeTakenCount->getType(), 1));
      // The BackedgeTaken expression contains the number of times that the
      // backedge branches to the loop header.  This is one less than the
      // number of times the loop executes, so use the incremented indvar.
      CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
-  } else {
-    // We must use the preincremented value...
-    IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
-    CmpIndVar = IndVar;
    }
  
-  Type *IntPtrTy = TD ? TD->getIntPtrType(IndVar->getType()) :
-    IntegerType::getInt64Ty(IndVar->getContext());
-  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE, IntPtrTy);
+  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
    assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
           && "genLoopLimit missed a cast");
  
@@ -1577,12 +1732,40 @@ LinearFunctionTestReplace(Loop *L,
                 << "  IVCount:\t" << *IVCount << "\n");
  
    IRBuilder<> Builder(BI);
-  if (SE->getTypeSizeInBits(CmpIndVar->getType())
-      > SE->getTypeSizeInBits(ExitCnt->getType())) {
-    CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
-                                    "lftr.wideiv");
-  }
  
+  // LFTR can ignore IV overflow and truncate to the width of
+  // BECount. This avoids materializing the add(zext(add)) expression.
+  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
+  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
+  if (CmpIndVarSize > ExitCntSize) {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+    const SCEV *ARStart = AR->getStart();
+    const SCEV *ARStep = AR->getStepRecurrence(*SE);
+    // For constant IVCount, avoid truncation.
+    if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
+      const APInt &Start = cast<SCEVConstant>(ARStart)->getValue()->getValue();
+      APInt Count = cast<SCEVConstant>(IVCount)->getValue()->getValue();
+      // Note that the post-inc value of BackedgeTakenCount may have overflowed
+      // above such that IVCount is now zero.
+      if (IVCount != BackedgeTakenCount && Count == 0) {
+        Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
+        ++Count;
+      }
+      else
+        Count = Count.zext(CmpIndVarSize);
+      APInt NewLimit;
+      if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
+        NewLimit = Start - Count;
+      else
+        NewLimit = Start + Count;
+      ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
+
+      DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
+    } else {
+      CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+                                      "lftr.wideiv");
+    }
+  }
    Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
    Value *OrigCond = BI->getCondition();
    // It's tempting to use replaceAllUsesWith here to fully replace the old
@@ -1647,13 +1830,12 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
      // Determine if there is a use in or before the loop (direct or
      // otherwise).
      bool UsedInLoop = false;
-    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-         UI != UE; ++UI) {
-      User *U = *UI;
-      BasicBlock *UseBB = cast<Instruction>(U)->getParent();
-      if (PHINode *P = dyn_cast<PHINode>(U)) {
+    for (Use &U : I->uses()) {
+      Instruction *User = cast<Instruction>(U.getUser());
+      BasicBlock *UseBB = User->getParent();
+      if (PHINode *P = dyn_cast<PHINode>(User)) {
          unsigned i =
-          PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+          PHINode::getIncomingValueNumForOperand(U.getOperandNo());
          UseBB = P->getIncomingBlock(i);
        }
        if (UseBB == Preheader || L->contains(UseBB)) {
@@ -1693,6 +1875,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
  //===----------------------------------------------------------------------===//
  
  bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+  if (skipOptnoneFunction(L))
+    return false;
+
    // If LoopSimplify form is not available, stay out of trouble. Some notes:
    //  - LSR currently only supports LoopSimplify-form loops. Indvars'
    //    canonicalization can be a pessimization without LSR to "clean up"
@@ -1704,11 +1889,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
    if (!L->isLoopSimplifyForm())
      return false;
  
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
    SE = &getAnalysis<ScalarEvolution>();
-  DT = &getAnalysis<DominatorTree>();
-  TD = getAnalysisIfAvailable<DataLayout>();
-  TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  TLI = TLIP ? &TLIP->getTLI() : nullptr;
+  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+  TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
+  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
  
    DeadInsts.clear();
    Changed = false;
@@ -1720,7 +1908,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
    const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
  
    // Create a rewriter object which we'll use to transform the code with.
-  SCEVExpander Rewriter(*SE, "indvars");
+  SCEVExpander Rewriter(*SE, DL, "indvars");
  #ifndef NDEBUG
    Rewriter.setDebugType(DEBUG_TYPE);
  #endif
@@ -1749,13 +1937,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
    // If we have a trip count expression, rewrite the loop's exit condition
    // using it.  We can currently only handle loops with a single exit.
    if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
-    PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+    PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
      if (IndVar) {
        // Check preconditions for proper SCEVExpander operation. SCEV does not
        // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
        // pass that uses the SCEVExpander must do it. This does not work well for
-      // loop passes because SCEVExpander makes assumptions about all loops, while
-      // LoopPassManager only forces the current loop to be simplified.
+      // loop passes because SCEVExpander makes assumptions about all loops,
+      // while LoopPassManager only forces the current loop to be simplified.
        //
        // FIXME: SCEV expansion has no way to bail out, so the caller must
        // explicitly check any assumptions made by SCEV. Brittle.