Reapply r160340. LSR: Limit CollectSubexprs.

[oota-llvm.git] / lib / Transforms / Scalar / LoopStrengthReduce.cpp
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

index e0005bad854aca08189a31c2363d2a0eab645c3b..b14a713ce47ac415e61df48113c79c0a586601db 100644 (file)
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -77,8 +77,11 @@
  #include <algorithm>
  using namespace llvm;
  
-static cl::opt<bool> EnableRetry(
-  "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
+/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
+/// bail out. This threshold is far beyond the number of users that LSR can
+/// conceivably solve, so it should not affect generated code, but catches the
+/// worst cases before LSR burns too much compile time and stack space.
+static const unsigned MaxIVUsers = 200;
  
  // Temporary flag to cleanup congruent phis after LSR phi expansion.
  // It's currently disabled until we can determine whether it's truly useful or
@@ -707,8 +710,9 @@ static bool isHighCostExpansion(const SCEV *S,
          Value *UVal = U->getValue();
          for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end();
               UI != UE; ++UI) {
-          Instruction *User = cast<Instruction>(*UI);
-          if (User->getOpcode() == Instruction::Mul
+          // If U is a constant, it may be used by a ConstantExpr.
+          Instruction *User = dyn_cast<Instruction>(*UI);
+          if (User && User->getOpcode() == Instruction::Mul
                && SE.isSCEVable(User->getType())) {
              return SE.getSCEV(User) == Mul;
            }
@@ -1284,10 +1288,19 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
      // If we have low-level target information, ask the target if it can fold an
      // integer immediate on an icmp.
      if (AM.BaseOffs != 0) {
-      if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs);
-      return false;
+      if (!TLI)
+        return false;
+      // We have one of:
+      // ICmpZero     BaseReg + Offset => ICmp BaseReg, -Offset
+      // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+      // Offs is the ICmp immediate.
+      int64_t Offs = AM.BaseOffs;
+      if (AM.Scale == 0)
+        Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
+      return TLI->isLegalICmpImmediate(Offs);
      }
  
+    // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
      return true;
  
    case LSRUse::Basic:
@@ -1295,8 +1308,8 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
      return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
  
    case LSRUse::Special:
-    // Only handle -1 scales, or no scale.
-    return AM.Scale == 0 || AM.Scale == -1;
+    // Special case Basic to handle -1 scales.
+    return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
    }
  
    llvm_unreachable("Invalid LSRUse Kind!");
@@ -1426,7 +1439,41 @@ struct IVInc {
  
  // IVChain - The list of IV increments in program order.
  // We typically add the head of a chain without finding subsequent links.
-typedef SmallVector<IVInc,1> IVChain;
+struct IVChain {
+  SmallVector<IVInc,1> Incs;
+  const SCEV *ExprBase;
+
+  IVChain() : ExprBase(0) {}
+
+  IVChain(const IVInc &Head, const SCEV *Base)
+    : Incs(1, Head), ExprBase(Base) {}
+
+  typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
+
+  // begin - return the first increment in the chain.
+  const_iterator begin() const {
+    assert(!Incs.empty());
+    return llvm::next(Incs.begin());
+  }
+  const_iterator end() const {
+    return Incs.end();
+  }
+
+  // hasIncs - Returns true if this chain contains any increments.
+  bool hasIncs() const { return Incs.size() >= 2; }
+
+  // add - Add an IVInc to the end of this chain.
+  void add(const IVInc &X) { Incs.push_back(X); }
+
+  // tailUserInst - Returns the last UserInst in the chain.
+  Instruction *tailUserInst() const { return Incs.back().UserInst; }
+
+  // isProfitableIncrement - Returns true if IncExpr can be profitably added to
+  // this chain.
+  bool isProfitableIncrement(const SCEV *OperExpr,
+                             const SCEV *IncExpr,
+                             ScalarEvolution&);
+};
  
  /// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
  /// Distinguish between FarUsers that definitely cross IV increments and
@@ -2147,7 +2194,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
              return &LU;
            // This is the formula where all the registers and symbols matched;
            // there aren't going to be any others. Since we declined it, we
-          // can skip the rest of the formulae and procede to the next LSRUse.
+          // can skip the rest of the formulae and proceed to the next LSRUse.
            break;
          }
        }
@@ -2306,41 +2353,23 @@ static const SCEV *getExprBase(const SCEV *S) {
  /// increment will be an offset relative to the same base. We allow such offsets
  /// to potentially be used as chain increment as long as it's not obviously
  /// expensive to expand using real instructions.
-static const SCEV *
-getProfitableChainIncrement(Value *NextIV, Value *PrevIV,
-                            const IVChain &Chain, Loop *L,
-                            ScalarEvolution &SE, const TargetLowering *TLI) {
-  // Prune the solution space aggressively by checking that both IV operands
-  // are expressions that operate on the same unscaled SCEVUnknown. This
-  // "base" will be canceled by the subsequent getMinusSCEV call. Checking first
-  // avoids creating extra SCEV expressions.
-  const SCEV *OperExpr = SE.getSCEV(NextIV);
-  const SCEV *PrevExpr = SE.getSCEV(PrevIV);
-  if (getExprBase(OperExpr) != getExprBase(PrevExpr) && !StressIVChain)
-    return 0;
-
-  const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
-  if (!SE.isLoopInvariant(IncExpr, L))
-    return 0;
-
-  // We are not able to expand an increment unless it is loop invariant,
-  // however, the following checks are purely for profitability.
+bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
+                                    const SCEV *IncExpr,
+                                    ScalarEvolution &SE) {
+  // Aggressively form chains when -stress-ivchain.
    if (StressIVChain)
-    return IncExpr;
+    return true;
  
    // Do not replace a constant offset from IV head with a nonconstant IV
    // increment.
    if (!isa<SCEVConstant>(IncExpr)) {
-    const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Chain[0].IVOperand));
+    const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
      if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
        return 0;
    }
  
    SmallPtrSet<const SCEV*, 8> Processed;
-  if (isHighCostExpansion(IncExpr, Processed, SE))
-    return 0;
-
-  return IncExpr;
+  return !isHighCostExpansion(IncExpr, Processed, SE);
  }
  
  /// Return true if the number of registers needed for the chain is estimated to
@@ -2359,18 +2388,18 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
    if (StressIVChain)
      return true;
  
-  if (Chain.size() <= 2)
+  if (!Chain.hasIncs())
      return false;
  
    if (!Users.empty()) {
-    DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " users:\n";
+    DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
            for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
                   E = Users.end(); I != E; ++I) {
              dbgs() << "  " << **I << "\n";
            });
      return false;
    }
-  assert(!Chain.empty() && "empty IV chains are not allowed");
+  assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
  
    // The chain itself may require a register, so intialize cost to 1.
    int cost = 1;
@@ -2378,15 +2407,15 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
    // A complete chain likely eliminates the need for keeping the original IV in
    // a register. LSR does not currently know how to form a complete chain unless
    // the header phi already exists.
-  if (isa<PHINode>(Chain.back().UserInst)
-      && SE.getSCEV(Chain.back().UserInst) == Chain[0].IncExpr) {
+  if (isa<PHINode>(Chain.tailUserInst())
+      && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
      --cost;
    }
    const SCEV *LastIncExpr = 0;
    unsigned NumConstIncrements = 0;
    unsigned NumVarIncrements = 0;
    unsigned NumReusedIncrements = 0;
-  for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end();
+  for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
         I != E; ++I) {
  
      if (I->IncExpr->isZero())
@@ -2422,7 +2451,8 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
    // the stride.
    cost -= NumReusedIncrements;
  
-  DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " Cost: " << cost << "\n");
+  DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
+               << "\n");
  
    return cost < 0;
  }
@@ -2433,25 +2463,39 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
                                     SmallVectorImpl<ChainUsers> &ChainUsersVec) {
    // When IVs are used as types of varying widths, they are generally converted
    // to a wider type with some uses remaining narrow under a (free) trunc.
-  Value *NextIV = getWideOperand(IVOper);
+  Value *const NextIV = getWideOperand(IVOper);
+  const SCEV *const OperExpr = SE.getSCEV(NextIV);
+  const SCEV *const OperExprBase = getExprBase(OperExpr);
  
    // Visit all existing chains. Check if its IVOper can be computed as a
    // profitable loop invariant increment from the last link in the Chain.
    unsigned ChainIdx = 0, NChains = IVChainVec.size();
    const SCEV *LastIncExpr = 0;
    for (; ChainIdx < NChains; ++ChainIdx) {
-    Value *PrevIV = getWideOperand(IVChainVec[ChainIdx].back().IVOperand);
+    IVChain &Chain = IVChainVec[ChainIdx];
+
+    // Prune the solution space aggressively by checking that both IV operands
+    // are expressions that operate on the same unscaled SCEVUnknown. This
+    // "base" will be canceled by the subsequent getMinusSCEV call. Checking
+    // first avoids creating extra SCEV expressions.
+    if (!StressIVChain && Chain.ExprBase != OperExprBase)
+      continue;
+
+    Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
      if (!isCompatibleIVType(PrevIV, NextIV))
        continue;
  
-    // A phi nodes terminates a chain.
-    if (isa<PHINode>(UserInst)
-        && isa<PHINode>(IVChainVec[ChainIdx].back().UserInst))
+    // A phi node terminates a chain.
+    if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
+      continue;
+
+    // The increment must be loop-invariant so it can be kept in a register.
+    const SCEV *PrevExpr = SE.getSCEV(PrevIV);
+    const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
+    if (!SE.isLoopInvariant(IncExpr, L))
        continue;
  
-    if (const SCEV *IncExpr =
-        getProfitableChainIncrement(NextIV, PrevIV, IVChainVec[ChainIdx],
-                                    L, SE, TLI)) {
+    if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
        LastIncExpr = IncExpr;
        break;
      }
@@ -2465,24 +2509,24 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
        DEBUG(dbgs() << "IV Chain Limit\n");
        return;
      }
-    LastIncExpr = SE.getSCEV(NextIV);
+    LastIncExpr = OperExpr;
      // IVUsers may have skipped over sign/zero extensions. We don't currently
      // attempt to form chains involving extensions unless they can be hoisted
      // into this loop's AddRec.
      if (!isa<SCEVAddRecExpr>(LastIncExpr))
        return;
      ++NChains;
-    IVChainVec.resize(NChains);
+    IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
+                                 OperExprBase));
      ChainUsersVec.resize(NChains);
-    DEBUG(dbgs() << "IV Head: (" << *UserInst << ") IV=" << *LastIncExpr
-          << "\n");
+    DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
+                 << ") IV=" << *LastIncExpr << "\n");
+  } else {
+    DEBUG(dbgs() << "IV Chain#" << ChainIdx << "  Inc: (" << *UserInst
+                 << ") IV+" << *LastIncExpr << "\n");
+    // Add this IV user to the end of the chain.
+    IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
    }
-  else
-    DEBUG(dbgs() << "IV  Inc: (" << *UserInst << ") IV+" << *LastIncExpr
-          << "\n");
-
-  // Add this IV user to the end of the chain.
-  IVChainVec[ChainIdx].push_back(IVInc(UserInst, IVOper, LastIncExpr));
  
    SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
    // This chain's NearUsers become FarUsers.
@@ -2500,13 +2544,14 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
    for (Value::use_iterator UseIter = IVOper->use_begin(),
           UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
      Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
+    if (!OtherUse || OtherUse == UserInst)
+      continue;
      if (SE.isSCEVable(OtherUse->getType())
          && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
          && IU.isIVUserOrOperand(OtherUse)) {
        continue;
      }
-    if (OtherUse && OtherUse != UserInst)
-      NearUsers.insert(OtherUse);
+    NearUsers.insert(OtherUse);
    }
  
    // Since this user is part of the chain, it's no longer considered a use
@@ -2537,6 +2582,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
  /// loop latch. This will discover chains on side paths, but requires
  /// maintaining multiple copies of the Chains state.
  void LSRInstance::CollectChains() {
+  DEBUG(dbgs() << "Collecting IV Chains.\n");
    SmallVector<ChainUsers, 8> ChainUsersVec;
  
    SmallVector<BasicBlock *,8> LatchPath;
@@ -2608,10 +2654,10 @@ void LSRInstance::CollectChains() {
  }
  
  void LSRInstance::FinalizeChain(IVChain &Chain) {
-  assert(!Chain.empty() && "empty IV chains are not allowed");
-  DEBUG(dbgs() << "Final Chain: " << *Chain[0].UserInst << "\n");
+  assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
+  DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
  
-  for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end();
+  for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
         I != E; ++I) {
      DEBUG(dbgs() << "        Inc: " << *I->UserInst << "\n");
      User::op_iterator UseI =
@@ -2645,7 +2691,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
                                    SmallVectorImpl<WeakVH> &DeadInsts) {
    // Find the new IVOperand for the head of the chain. It may have been replaced
    // by LSR.
-  const IVInc &Head = Chain[0];
+  const IVInc &Head = Chain.Incs[0];
    User::op_iterator IVOpEnd = Head.UserInst->op_end();
    User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
                                               IVOpEnd, L, SE);
@@ -2677,7 +2723,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
    Type *IVTy = IVSrc->getType();
    Type *IntTy = SE.getEffectiveSCEVType(IVTy);
    const SCEV *LeftOverExpr = 0;
-  for (IVChain::const_iterator IncI = llvm::next(Chain.begin()),
+  for (IVChain::const_iterator IncI = Chain.begin(),
           IncE = Chain.end(); IncI != IncE; ++IncI) {
  
      Instruction *InsertPt = IncI->UserInst;
@@ -2722,7 +2768,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
    }
    // If LSR created a new, wider phi, we may also replace its postinc. We only
    // do this if we also found a wide value for the head of the chain.
-  if (isa<PHINode>(Chain.back().UserInst)) {
+  if (isa<PHINode>(Chain.tailUserInst())) {
      for (BasicBlock::iterator I = L->getHeader()->begin();
           PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
        if (!isCompatibleIVType(Phi, IVSrc))
@@ -2790,7 +2836,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
  
          // x == y  -->  x - y == 0
          const SCEV *N = SE.getSCEV(NV);
-        if (SE.isLoopInvariant(N, L)) {
+        if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
            // S is normalized, so normalize N before folding it into S
            // to keep the result normalized.
            N = TransformForPostIncUse(Normalize, N, CI, 0,
@@ -2960,42 +3006,64 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
  
  /// CollectSubexprs - Split S into subexpressions which can be pulled out into
  /// separate registers. If C is non-null, multiply each subexpression by C.
-static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
-                            SmallVectorImpl<const SCEV *> &Ops,
-                            const Loop *L,
-                            ScalarEvolution &SE) {
+///
+/// Return remainder expression after factoring the subexpressions captured by
+/// Ops. If Ops is complete, return NULL.
+static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+                                   SmallVectorImpl<const SCEV *> &Ops,
+                                   const Loop *L,
+                                   ScalarEvolution &SE,
+                                   unsigned Depth = 0) {
+  // Arbitrarily cap recursion to protect compile time.
+  if (Depth >= 3)
+    return S;
+
    if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
      // Break out add operands.
      for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I)
-      CollectSubexprs(*I, C, Ops, L, SE);
-    return;
+         I != E; ++I) {
+      const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+      if (Remainder)
+        Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+    }
+    return NULL;
    } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
      // Split a non-zero base out of an addrec.
-    if (!AR->getStart()->isZero()) {
-      CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
-                                       AR->getStepRecurrence(SE),
-                                       AR->getLoop(),
-                                       //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
-                                       SCEV::FlagAnyWrap),
-                      C, Ops, L, SE);
-      CollectSubexprs(AR->getStart(), C, Ops, L, SE);
-      return;
+    if (AR->getStart()->isZero())
+      return S;
+
+    const SCEV *Remainder = CollectSubexprs(AR->getStart(),
+                                            C, Ops, L, SE, Depth+1);
+    // Split the non-zero AddRec unless it is part of a nested recurrence that
+    // does not pertain to this loop.
+    if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
+      Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+      Remainder = NULL;
+    }
+    if (Remainder != AR->getStart()) {
+      if (!Remainder)
+        Remainder = SE.getConstant(AR->getType(), 0);
+      return SE.getAddRecExpr(Remainder,
+                              AR->getStepRecurrence(SE),
+                              AR->getLoop(),
+                              //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                              SCEV::FlagAnyWrap);
      }
    } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
      // Break (C * (a + b + c)) into C*a + C*b + C*c.
-    if (Mul->getNumOperands() == 2)
-      if (const SCEVConstant *Op0 =
-            dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
-        CollectSubexprs(Mul->getOperand(1),
-                        C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
-                        Ops, L, SE);
-        return;
-      }
+    if (Mul->getNumOperands() != 2)
+      return S;
+    if (const SCEVConstant *Op0 =
+        dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+      C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
+      const SCEV *Remainder =
+        CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
+      if (Remainder)
+        Ops.push_back(SE.getMulExpr(C, Remainder));
+      return NULL;
+    }
    }
-
-  // Otherwise use the value itself, optionally with a scale applied.
-  Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+  return S;
  }
  
  /// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -3010,7 +3078,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
      const SCEV *BaseReg = Base.BaseRegs[i];
  
      SmallVector<const SCEV *, 8> AddOps;
-    CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+    const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+    if (Remainder)
+      AddOps.push_back(Remainder);
  
      if (AddOps.size() == 1) continue;
  
@@ -3967,24 +4037,29 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
      if (LU.Regs.count(*I))
        ReqRegs.insert(*I);
  
-  bool AnySatisfiedReqRegs = false;
    SmallPtrSet<const SCEV *, 16> NewRegs;
    Cost NewCost;
-retry:
    for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
         E = LU.Formulae.end(); I != E; ++I) {
      const Formula &F = *I;
  
      // Ignore formulae which do not use any of the required registers.
+    bool SatisfiedReqReg = true;
      for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
           JE = ReqRegs.end(); J != JE; ++J) {
        const SCEV *Reg = *J;
        if ((!F.ScaledReg || F.ScaledReg != Reg) &&
            std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
-          F.BaseRegs.end())
-        goto skip;
+          F.BaseRegs.end()) {
+        SatisfiedReqReg = false;
+        break;
+      }
+    }
+    if (!SatisfiedReqReg) {
+      // If none of the formulae satisfied the required registers, then we could
+      // clear ReqRegs and try again. Currently, we simply give up in this case.
+      continue;
      }
-    AnySatisfiedReqRegs = true;
  
      // Evaluate the cost of the current formula. If it's already worse than
      // the current best, prune the search at that point.
@@ -4011,18 +4086,6 @@ retry:
        }
        Workspace.pop_back();
      }
-  skip:;
-  }
-
-  if (!EnableRetry && !AnySatisfiedReqRegs)
-    return;
-
-  // If none of the formulae had all of the required registers, relax the
-  // constraint so that we don't exclude all formulae.
-  if (!AnySatisfiedReqRegs) {
-    assert(!ReqRegs.empty() && "Solver failed even without required registers");
-    ReqRegs.clear();
-    goto retry;
    }
  }
  
@@ -4101,7 +4164,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
        // Attempt to find an insert position in the middle of the block,
        // instead of at the end, so that it can be used for other expansions.
        if (IDom == Inst->getParent() &&
-          (!BetterPos || DT.dominates(BetterPos, Inst)))
+          (!BetterPos || !DT.dominates(Inst, BetterPos)))
          BetterPos = llvm::next(BasicBlock::iterator(Inst));
      }
      if (!AllDominate)
@@ -4229,13 +4292,6 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
      Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
    }
  
-  // Flush the operand list to suppress SCEVExpander hoisting.
-  if (!Ops.empty()) {
-    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
-    Ops.clear();
-    Ops.push_back(SE.getUnknown(FullV));
-  }
-
    // Expand the ScaledReg portion.
    Value *ICmpScaledV = 0;
    if (F.AM.Scale != 0) {
@@ -4257,23 +4313,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
      } else {
        // Otherwise just expand the scaled register and an explicit scale,
        // which is expected to be matched as part of the address.
+
+      // Flush the operand list to suppress SCEVExpander hoisting address modes.
+      if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+        Ops.clear();
+        Ops.push_back(SE.getUnknown(FullV));
+      }
        ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
        ScaledS = SE.getMulExpr(ScaledS,
                                SE.getConstant(ScaledS->getType(), F.AM.Scale));
        Ops.push_back(ScaledS);
-
-      // Flush the operand list to suppress SCEVExpander hoisting.
-      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
-      Ops.clear();
-      Ops.push_back(SE.getUnknown(FullV));
      }
    }
  
    // Expand the GV portion.
    if (F.AM.BaseGV) {
+    // Flush the operand list to suppress SCEVExpander hoisting.
+    if (!Ops.empty()) {
+      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+      Ops.clear();
+      Ops.push_back(SE.getUnknown(FullV));
+    }
      Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+  }
  
-    // Flush the operand list to suppress SCEVExpander hoisting.
+  // Flush the operand list to suppress SCEVExpander hoisting of both folded and
+  // unfolded offsets. LSR assumes they both live next to their uses.
+  if (!Ops.empty()) {
      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
      Ops.clear();
      Ops.push_back(SE.getUnknown(FullV));
@@ -4478,7 +4545,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
    // Mark phi nodes that terminate chains so the expander tries to reuse them.
    for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
           ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
-    if (PHINode *PN = dyn_cast<PHINode>(ChainI->back().UserInst))
+    if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
        Rewriter.setChainedPhi(PN);
    }
  
@@ -4518,6 +4585,17 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
    // If there's no interesting work to be done, bail early.
    if (IU.empty()) return;
  
+  // If there's too much analysis to be done, bail early. We won't be able to
+  // model the problem anyway.
+  unsigned NumUsers = 0;
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    if (++NumUsers > MaxIVUsers) {
+      DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L
+            << "\n");
+      return;
+    }
+  }
+
  #ifndef NDEBUG
    // All dominating loops must have preheaders, or SCEVExpander may not be able
    // to materialize an AddRecExpr whose Start is an outer AddRecExpr.