This patch aims to improve compile time performance by increasing

[oota-llvm.git] / lib / Transforms / Scalar / LoopStrengthReduce.cpp
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

index 94c229a8e2440da07c098ec18102cd6a1bd81b9e..923707737ad6ac312044759ed1b6d5c7e446eb72 100644 (file)
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -37,8 +37,8 @@
  //
  // TODO: Handle multiple loops at a time.
  //
-// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
-//       instead of a GlobalValue?
+// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
+//       of a GlobalValue?
  //
  // TODO: When truncation is free, truncate ICmp users' operands to make it a
  //       smaller encoding (on x86 at least).
@@ -55,25 +55,25 @@
  
  #define DEBUG_TYPE "loop-reduce"
  #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/IVUsers.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
  #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
  #include "llvm/Analysis/LoopPass.h"
  #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
  #include "llvm/Assembly/Writer.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
  #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
  #include "llvm/Support/ValueHandle.h"
  #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
  #include <algorithm>
  using namespace llvm;
  
@@ -121,9 +121,11 @@ void RegSortData::print(raw_ostream &OS) const {
    OS << "[NumUses=" << UsedByIndices.count() << ']';
  }
  
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  void RegSortData::dump() const {
    print(errs()); errs() << '\n';
  }
+#endif
  
  namespace {
  
@@ -221,16 +223,24 @@ namespace {
  /// computing satisfying a use. It may include broken-out immediates and scaled
  /// registers.
  struct Formula {
-  /// AM - This is used to represent complex addressing, as well as other kinds
-  /// of interesting uses.
-  TargetLowering::AddrMode AM;
+  /// Global base address used for complex addressing.
+  GlobalValue *BaseGV;
+
+  /// Base offset for complex addressing.
+  int64_t BaseOffset;
+
+  /// Whether any complex addressing has a base register.
+  bool HasBaseReg;
+
+  /// The scale of any complex addressing.
+  int64_t Scale;
  
    /// BaseRegs - The list of "base" registers for this use. When this is
-  /// non-empty, AM.HasBaseReg should be set to true.
-  SmallVector<const SCEV *, 2> BaseRegs;
+  /// non-empty,
+  SmallVector<const SCEV *, 4> BaseRegs;
  
    /// ScaledReg - The 'scaled' register for this use. This should be non-null
-  /// when AM.Scale is not zero.
+  /// when Scale is not zero.
    const SCEV *ScaledReg;
  
    /// UnfoldedOffset - An additional constant offset which added near the
@@ -238,7 +248,9 @@ struct Formula {
    /// live in an add immediate field rather than a register.
    int64_t UnfoldedOffset;
  
-  Formula() : ScaledReg(0), UnfoldedOffset(0) {}
+  Formula()
+      : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
+        UnfoldedOffset(0) {}
  
    void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
  
@@ -324,13 +336,13 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
      const SCEV *Sum = SE.getAddExpr(Good);
      if (!Sum->isZero())
        BaseRegs.push_back(Sum);
-    AM.HasBaseReg = true;
+    HasBaseReg = true;
    }
    if (!Bad.empty()) {
      const SCEV *Sum = SE.getAddExpr(Bad);
      if (!Sum->isZero())
        BaseRegs.push_back(Sum);
-    AM.HasBaseReg = true;
+    HasBaseReg = true;
    }
  }
  
@@ -346,7 +358,7 @@ unsigned Formula::getNumRegs() const {
  Type *Formula::getType() const {
    return !BaseRegs.empty() ? BaseRegs.front()->getType() :
           ScaledReg ? ScaledReg->getType() :
-         AM.BaseGV ? AM.BaseGV->getType() :
+         BaseGV ? BaseGV->getType() :
           0;
  }
  
@@ -379,29 +391,29 @@ bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
  
  void Formula::print(raw_ostream &OS) const {
    bool First = true;
-  if (AM.BaseGV) {
+  if (BaseGV) {
      if (!First) OS << " + "; else First = false;
-    WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
    }
-  if (AM.BaseOffs != 0) {
+  if (BaseOffset != 0) {
      if (!First) OS << " + "; else First = false;
-    OS << AM.BaseOffs;
+    OS << BaseOffset;
    }
    for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
         E = BaseRegs.end(); I != E; ++I) {
      if (!First) OS << " + "; else First = false;
      OS << "reg(" << **I << ')';
    }
-  if (AM.HasBaseReg && BaseRegs.empty()) {
+  if (HasBaseReg && BaseRegs.empty()) {
      if (!First) OS << " + "; else First = false;
      OS << "**error: HasBaseReg**";
-  } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+  } else if (!HasBaseReg && !BaseRegs.empty()) {
      if (!First) OS << " + "; else First = false;
      OS << "**error: !HasBaseReg**";
    }
-  if (AM.Scale != 0) {
+  if (Scale != 0) {
      if (!First) OS << " + "; else First = false;
-    OS << AM.Scale << "*reg(";
+    OS << Scale << "*reg(";
      if (ScaledReg)
        OS << *ScaledReg;
      else
@@ -414,9 +426,11 @@ void Formula::print(raw_ostream &OS) const {
    }
  }
  
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  void Formula::dump() const {
    print(errs()); errs() << '\n';
  }
+#endif
  
  /// isAddRecSExtable - Return true if the given addrec can be sign-extended
  /// without changing its value.
@@ -738,7 +752,8 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
    bool Changed = false;
  
    while (!DeadInsts.empty()) {
-    Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
+    Value *V = DeadInsts.pop_back_val();
+    Instruction *I = dyn_cast_or_null<Instruction>(V);
  
      if (I == 0 || !isInstructionTriviallyDead(I))
        continue;
@@ -921,8 +936,8 @@ void Cost::RateFormula(const Formula &F,
    // Tally up the non-zero immediates.
    for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
         E = Offsets.end(); I != E; ++I) {
-    int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
-    if (F.AM.BaseGV)
+    int64_t Offset = (uint64_t)*I + F.BaseOffset;
+    if (F.BaseGV)
        ImmCost += 64; // Handle symbolic values conservatively.
                       // TODO: This should probably be the pointer size.
      else if (Offset != 0)
@@ -973,9 +988,11 @@ void Cost::print(raw_ostream &OS) const {
      OS << ", plus " << SetupCost << " setup cost";
  }
  
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  void Cost::dump() const {
    print(errs()); errs() << '\n';
  }
+#endif
  
  namespace {
  
@@ -1059,28 +1076,30 @@ void LSRFixup::print(raw_ostream &OS) const {
      OS << ", Offset=" << Offset;
  }
  
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  void LSRFixup::dump() const {
    print(errs()); errs() << '\n';
  }
+#endif
  
  namespace {
  
  /// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
  /// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
  struct UniquifierDenseMapInfo {
-  static SmallVector<const SCEV *, 2> getEmptyKey() {
-    SmallVector<const SCEV *, 2> V;
+  static SmallVector<const SCEV *, 4> getEmptyKey() {
+    SmallVector<const SCEV *, 4>  V;
      V.push_back(reinterpret_cast<const SCEV *>(-1));
      return V;
    }
  
-  static SmallVector<const SCEV *, 2> getTombstoneKey() {
-    SmallVector<const SCEV *, 2> V;
+  static SmallVector<const SCEV *, 4> getTombstoneKey() {
+    SmallVector<const SCEV *, 4> V;
      V.push_back(reinterpret_cast<const SCEV *>(-2));
      return V;
    }
  
-  static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+  static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
      unsigned Result = 0;
      for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
           E = V.end(); I != E; ++I)
@@ -1088,8 +1107,8 @@ struct UniquifierDenseMapInfo {
      return Result;
    }
  
-  static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
-                      const SmallVector<const SCEV *, 2> &RHS) {
+  static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
+                      const SmallVector<const SCEV *, 4> &RHS) {
      return LHS == RHS;
    }
  };
@@ -1100,7 +1119,7 @@ struct UniquifierDenseMapInfo {
  /// the user itself, and information about how the use may be satisfied.
  /// TODO: Represent multiple users of the same expression in common?
  class LSRUse {
-  DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+  DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
  
  public:
    /// KindType - An enum for a kind of use, indicating what types of
@@ -1159,7 +1178,7 @@ public:
  /// HasFormula - Test whether this use as a formula which has the same
  /// registers as the given formula.
  bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
-  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  SmallVector<const SCEV *, 4> Key = F.BaseRegs;
    if (F.ScaledReg) Key.push_back(F.ScaledReg);
    // Unstable sort by host order ok, because this is only used for uniquifying.
    std::sort(Key.begin(), Key.end());
@@ -1169,7 +1188,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
  /// InsertFormula - If the given formula has not yet been inserted, add it to
  /// the list, and return true. Return false otherwise.
  bool LSRUse::InsertFormula(const Formula &F) {
-  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  SmallVector<const SCEV *, 4> Key = F.BaseRegs;
    if (F.ScaledReg) Key.push_back(F.ScaledReg);
    // Unstable sort by host order ok, because this is only used for uniquifying.
    std::sort(Key.begin(), Key.end());
@@ -1251,53 +1270,51 @@ void LSRUse::print(raw_ostream &OS) const {
      OS << ", widest fixup type: " << *WidestFixupType;
  }
  
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  void LSRUse::dump() const {
    print(errs()); errs() << '\n';
  }
+#endif
  
  /// isLegalUse - Test whether the use described by AM is "legal", meaning it can
  /// be completely folded into the user instruction at isel time. This includes
  /// address-mode folding and special icmp tricks.
-static bool isLegalUse(const TargetLowering::AddrMode &AM,
-                       LSRUse::KindType Kind, Type *AccessTy,
-                       const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
+                       Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
+                       bool HasBaseReg, int64_t Scale) {
    switch (Kind) {
    case LSRUse::Address:
-    // If we have low-level target information, ask the target if it can
-    // completely fold this address.
-    if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+    return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
  
      // Otherwise, just guess that reg+reg addressing is legal.
-    return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+    //return ;
  
    case LSRUse::ICmpZero:
      // There's not even a target hook for querying whether it would be legal to
      // fold a GV into an ICmp.
-    if (AM.BaseGV)
+    if (BaseGV)
        return false;
  
      // ICmp only has two operands; don't allow more than two non-trivial parts.
-    if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+    if (Scale != 0 && HasBaseReg && BaseOffset != 0)
        return false;
  
      // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
      // putting the scaled register in the other operand of the icmp.
-    if (AM.Scale != 0 && AM.Scale != -1)
+    if (Scale != 0 && Scale != -1)
        return false;
  
      // If we have low-level target information, ask the target if it can fold an
      // integer immediate on an icmp.
-    if (AM.BaseOffs != 0) {
-      if (!TLI)
-        return false;
+    if (BaseOffset != 0) {
        // We have one of:
-      // ICmpZero     BaseReg + Offset => ICmp BaseReg, -Offset
-      // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+      // ICmpZero     BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
+      // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
        // Offs is the ICmp immediate.
-      int64_t Offs = AM.BaseOffs;
-      if (AM.Scale == 0)
-        Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
-      return TLI->isLegalICmpImmediate(Offs);
+      if (Scale == 0)
+        // The cast does the right thing with INT64_MIN.
+        BaseOffset = -(uint64_t)BaseOffset;
+      return TTI.isLegalICmpImmediate(BaseOffset);
      }
  
      // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@@ -1305,92 +1322,87 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
  
    case LSRUse::Basic:
      // Only handle single-register values.
-    return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+    return !BaseGV && Scale == 0 && BaseOffset == 0;
  
    case LSRUse::Special:
-    // Only handle -1 scales, or no scale.
-    return AM.Scale == 0 || AM.Scale == -1;
+    // Special case Basic to handle -1 scales.
+    return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
    }
  
    llvm_unreachable("Invalid LSRUse Kind!");
  }
  
-static bool isLegalUse(TargetLowering::AddrMode AM,
-                       int64_t MinOffset, int64_t MaxOffset,
-                       LSRUse::KindType Kind, Type *AccessTy,
-                       const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+                       int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+                       GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+                       int64_t Scale) {
    // Check for overflow.
-  if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+  if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
        (MinOffset > 0))
      return false;
-  AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
-  if (isLegalUse(AM, Kind, AccessTy, TLI)) {
-    AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
-    // Check for overflow.
-    if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
-        (MaxOffset > 0))
-      return false;
-    AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
-    return isLegalUse(AM, Kind, AccessTy, TLI);
-  }
-  return false;
+  MinOffset = (uint64_t)BaseOffset + MinOffset;
+  if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
+      (MaxOffset > 0))
+    return false;
+  MaxOffset = (uint64_t)BaseOffset + MaxOffset;
+
+  return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
+                    Scale) &&
+         isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+}
+
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+                       int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+                       const Formula &F) {
+  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
+                    F.BaseOffset, F.HasBaseReg, F.Scale);
  }
  
-static bool isAlwaysFoldable(int64_t BaseOffs,
-                             GlobalValue *BaseGV,
-                             bool HasBaseReg,
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
                               LSRUse::KindType Kind, Type *AccessTy,
-                             const TargetLowering *TLI) {
+                             GlobalValue *BaseGV, int64_t BaseOffset,
+                             bool HasBaseReg) {
    // Fast-path: zero is always foldable.
-  if (BaseOffs == 0 && !BaseGV) return true;
+  if (BaseOffset == 0 && !BaseGV) return true;
  
    // Conservatively, create an address with an immediate and a
    // base and a scale.
-  TargetLowering::AddrMode AM;
-  AM.BaseOffs = BaseOffs;
-  AM.BaseGV = BaseGV;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
  
    // Canonicalize a scale of 1 to a base register if the formula doesn't
    // already have a base register.
-  if (!AM.HasBaseReg && AM.Scale == 1) {
-    AM.Scale = 0;
-    AM.HasBaseReg = true;
+  if (!HasBaseReg && Scale == 1) {
+    Scale = 0;
+    HasBaseReg = true;
    }
  
-  return isLegalUse(AM, Kind, AccessTy, TLI);
+  return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
  }
  
-static bool isAlwaysFoldable(const SCEV *S,
-                             int64_t MinOffset, int64_t MaxOffset,
-                             bool HasBaseReg,
-                             LSRUse::KindType Kind, Type *AccessTy,
-                             const TargetLowering *TLI,
-                             ScalarEvolution &SE) {
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
+                             ScalarEvolution &SE, int64_t MinOffset,
+                             int64_t MaxOffset, LSRUse::KindType Kind,
+                             Type *AccessTy, const SCEV *S, bool HasBaseReg) {
    // Fast-path: zero is always foldable.
    if (S->isZero()) return true;
  
    // Conservatively, create an address with an immediate and a
    // base and a scale.
-  int64_t BaseOffs = ExtractImmediate(S, SE);
+  int64_t BaseOffset = ExtractImmediate(S, SE);
    GlobalValue *BaseGV = ExtractSymbol(S, SE);
  
    // If there's anything else involved, it's not foldable.
    if (!S->isZero()) return false;
  
    // Fast-path: zero is always foldable.
-  if (BaseOffs == 0 && !BaseGV) return true;
+  if (BaseOffset == 0 && !BaseGV) return true;
  
    // Conservatively, create an address with an immediate and a
    // base and a scale.
-  TargetLowering::AddrMode AM;
-  AM.BaseOffs = BaseOffs;
-  AM.BaseGV = BaseGV;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
  
-  return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+                    BaseOffset, HasBaseReg, Scale);
  }
  
  namespace {
@@ -1490,7 +1502,7 @@ class LSRInstance {
    ScalarEvolution &SE;
    DominatorTree &DT;
    LoopInfo &LI;
-  const TargetLowering *const TLI;
+  const TargetTransformInfo &TTI;
    Loop *const L;
    bool Changed;
  
@@ -1626,7 +1638,7 @@ class LSRInstance {
                           Pass *P);
  
  public:
-  LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+  LSRInstance(Loop *L, Pass *P);
  
    bool getChanged() const { return Changed; }
  
@@ -1676,12 +1688,9 @@ void LSRInstance::OptimizeShadowIV() {
      }
      if (!DestTy) continue;
  
-    if (TLI) {
-      // If target does not support DestTy natively then do not apply
-      // this transformation.
-      EVT DVT = TLI->getValueType(DestTy);
-      if (!TLI->isTypeLegal(DVT)) continue;
-    }
+    // If target does not support DestTy natively then do not apply
+    // this transformation.
+    if (!TTI.isTypeLegal(DestTy)) continue;
  
      PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
      if (!PH) continue;
@@ -2003,18 +2012,17 @@ LSRInstance::OptimizeLoopTermCond() {
              if (C->getValue().getMinSignedBits() >= 64 ||
                  C->getValue().isMinSignedValue())
                goto decline_post_inc;
-            // Without TLI, assume that any stride might be valid, and so any
-            // use might be shared.
-            if (!TLI)
-              goto decline_post_inc;
              // Check for possible scaled-address reuse.
              Type *AccessTy = getAccessType(UI->getUser());
-            TargetLowering::AddrMode AM;
-            AM.Scale = C->getSExtValue();
-            if (TLI->isLegalAddressingMode(AM, AccessTy))
+            int64_t Scale = C->getSExtValue();
+            if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+                                          /*BaseOffset=*/ 0,
+                                          /*HasBaseReg=*/ false, Scale))
                goto decline_post_inc;
-            AM.Scale = -AM.Scale;
-            if (TLI->isLegalAddressingMode(AM, AccessTy))
+            Scale = -Scale;
+            if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+                                          /*BaseOffset=*/ 0,
+                                          /*HasBaseReg=*/ false, Scale))
                goto decline_post_inc;
            }
          }
@@ -2084,13 +2092,13 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
      return false;
    // Conservatively assume HasBaseReg is true for now.
    if (NewOffset < LU.MinOffset) {
-    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
-                          Kind, AccessTy, TLI))
+    if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                          LU.MaxOffset - NewOffset, HasBaseReg))
        return false;
      NewMinOffset = NewOffset;
    } else if (NewOffset > LU.MaxOffset) {
-    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
-                          Kind, AccessTy, TLI))
+    if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                          NewOffset - LU.MinOffset, HasBaseReg))
        return false;
      NewMaxOffset = NewOffset;
    }
@@ -2119,7 +2127,8 @@ LSRInstance::getUse(const SCEV *&Expr,
    int64_t Offset = ExtractImmediate(Expr, SE);
  
    // Basic uses can't accept any offset, for example.
-  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+  if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                        Offset, /*HasBaseReg=*/ true)) {
      Expr = Copy;
      Offset = 0;
    }
@@ -2187,10 +2196,10 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
          // as OrigF.
          if (F.BaseRegs == OrigF.BaseRegs &&
              F.ScaledReg == OrigF.ScaledReg &&
-            F.AM.BaseGV == OrigF.AM.BaseGV &&
-            F.AM.Scale == OrigF.AM.Scale &&
+            F.BaseGV == OrigF.BaseGV &&
+            F.Scale == OrigF.Scale &&
              F.UnfoldedOffset == OrigF.UnfoldedOffset) {
-          if (F.AM.BaseOffs == 0)
+          if (F.BaseOffset == 0)
              return &LU;
            // This is the formula where all the registers and symbols matched;
            // there aren't going to be any others. Since we declined it, we
@@ -2384,7 +2393,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
  /// TODO: Consider IVInc free if it's already used in another chains.
  static bool
  isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
-                  ScalarEvolution &SE, const TargetLowering *TLI) {
+                  ScalarEvolution &SE, const TargetTransformInfo &TTI) {
    if (StressIVChain)
      return true;
  
@@ -2642,7 +2651,7 @@ void LSRInstance::CollectChains() {
    for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
         UsersIdx < NChains; ++UsersIdx) {
      if (!isProfitableChain(IVChainVec[UsersIdx],
-                           ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+                           ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
        continue;
      // Preserve the chain at UsesIdx.
      if (ChainIdx != UsersIdx)
@@ -2669,7 +2678,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
  
  /// Return true if the IVInc can be folded into an addressing mode.
  static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
-                             Value *Operand, const TargetLowering *TLI) {
+                             Value *Operand, const TargetTransformInfo &TTI) {
    const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
    if (!IncConst || !isAddressUse(UserInst, Operand))
      return false;
@@ -2678,8 +2687,9 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
      return false;
  
    int64_t IncOffset = IncConst->getValue()->getSExtValue();
-  if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
-                       LSRUse::Address, getAccessType(UserInst), TLI))
+  if (!isAlwaysFoldable(TTI, LSRUse::Address,
+                        getAccessType(UserInst), /*BaseGV=*/ 0,
+                        IncOffset, /*HaseBaseReg=*/ false))
      return false;
  
    return true;
@@ -2750,7 +2760,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
  
        // If an IV increment can't be folded, use it as the next IV value.
        if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
-                            TLI)) {
+                            TTI)) {
          assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
          IVSrc = IVOper;
          LeftOverExpr = 0;
@@ -2836,7 +2846,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
  
          // x == y  -->  x - y == 0
          const SCEV *N = SE.getSCEV(NV);
-        if (SE.isLoopInvariant(N, L)) {
+        if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
            // S is normalized, so normalize N before folding it into S
            // to keep the result normalized.
            N = TransformForPostIncUse(Normalize, N, CI, 0,
@@ -2892,7 +2902,7 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
                                         LSRUse &LU, size_t LUIdx) {
    Formula F;
    F.BaseRegs.push_back(S);
-  F.AM.HasBaseReg = true;
+  F.HasBaseReg = true;
    bool Inserted = InsertFormula(LU, LUIdx, F);
    assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
  }
@@ -3006,42 +3016,64 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
  
  /// CollectSubexprs - Split S into subexpressions which can be pulled out into
  /// separate registers. If C is non-null, multiply each subexpression by C.
-static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
-                            SmallVectorImpl<const SCEV *> &Ops,
-                            const Loop *L,
-                            ScalarEvolution &SE) {
+///
+/// Return remainder expression after factoring the subexpressions captured by
+/// Ops. If Ops is complete, return NULL.
+static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+                                   SmallVectorImpl<const SCEV *> &Ops,
+                                   const Loop *L,
+                                   ScalarEvolution &SE,
+                                   unsigned Depth = 0) {
+  // Arbitrarily cap recursion to protect compile time.
+  if (Depth >= 3)
+    return S;
+
    if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
      // Break out add operands.
      for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I)
-      CollectSubexprs(*I, C, Ops, L, SE);
-    return;
+         I != E; ++I) {
+      const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+      if (Remainder)
+        Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+    }
+    return NULL;
    } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
      // Split a non-zero base out of an addrec.
-    if (!AR->getStart()->isZero()) {
-      CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
-                                       AR->getStepRecurrence(SE),
-                                       AR->getLoop(),
-                                       //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
-                                       SCEV::FlagAnyWrap),
-                      C, Ops, L, SE);
-      CollectSubexprs(AR->getStart(), C, Ops, L, SE);
-      return;
+    if (AR->getStart()->isZero())
+      return S;
+
+    const SCEV *Remainder = CollectSubexprs(AR->getStart(),
+                                            C, Ops, L, SE, Depth+1);
+    // Split the non-zero AddRec unless it is part of a nested recurrence that
+    // does not pertain to this loop.
+    if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
+      Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+      Remainder = NULL;
+    }
+    if (Remainder != AR->getStart()) {
+      if (!Remainder)
+        Remainder = SE.getConstant(AR->getType(), 0);
+      return SE.getAddRecExpr(Remainder,
+                              AR->getStepRecurrence(SE),
+                              AR->getLoop(),
+                              //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                              SCEV::FlagAnyWrap);
      }
    } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
      // Break (C * (a + b + c)) into C*a + C*b + C*c.
-    if (Mul->getNumOperands() == 2)
-      if (const SCEVConstant *Op0 =
-            dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
-        CollectSubexprs(Mul->getOperand(1),
-                        C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
-                        Ops, L, SE);
-        return;
-      }
+    if (Mul->getNumOperands() != 2)
+      return S;
+    if (const SCEVConstant *Op0 =
+        dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+      C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
+      const SCEV *Remainder =
+        CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
+      if (Remainder)
+        Ops.push_back(SE.getMulExpr(C, Remainder));
+      return NULL;
+    }
    }
-
-  // Otherwise use the value itself, optionally with a scale applied.
-  Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+  return S;
  }
  
  /// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -3056,7 +3088,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
      const SCEV *BaseReg = Base.BaseRegs[i];
  
      SmallVector<const SCEV *, 8> AddOps;
-    CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+    const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+    if (Remainder)
+      AddOps.push_back(Remainder);
  
      if (AddOps.size() == 1) continue;
  
@@ -3070,9 +3104,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
  
        // Don't pull a constant into a register if the constant could be folded
        // into an immediate field.
-      if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
-                           Base.getNumRegs() > 1,
-                           LU.Kind, LU.AccessTy, TLI, SE))
+      if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+                           LU.AccessTy, *J, Base.getNumRegs() > 1))
          continue;
  
        // Collect all operands except *J.
@@ -3084,9 +3117,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
        // Don't leave just a constant behind in a register if the constant could
        // be folded into an immediate field.
        if (InnerAddOps.size() == 1 &&
-          isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
-                           Base.getNumRegs() > 1,
-                           LU.Kind, LU.AccessTy, TLI, SE))
+          isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+                           LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
          continue;
  
        const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
@@ -3096,10 +3128,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
  
        // Add the remaining pieces of the add back into the new formula.
        const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
-      if (TLI && InnerSumSC &&
+      if (InnerSumSC &&
            SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
-          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
-                                   InnerSumSC->getValue()->getZExtValue())) {
+          TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+                                  InnerSumSC->getValue()->getZExtValue())) {
          F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
                             InnerSumSC->getValue()->getZExtValue();
          F.BaseRegs.erase(F.BaseRegs.begin() + i);
@@ -3108,9 +3140,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
  
        // Add J as its own register, or an unfolded immediate.
        const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
-      if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
-          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
-                                   SC->getValue()->getZExtValue()))
+      if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+          TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+                                  SC->getValue()->getZExtValue()))
          F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
                             SC->getValue()->getZExtValue();
        else
@@ -3159,7 +3191,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
  void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
                                            Formula Base) {
    // We can't add a symbolic offset if the address already contains one.
-  if (Base.AM.BaseGV) return;
+  if (Base.BaseGV) return;
  
    for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
      const SCEV *G = Base.BaseRegs[i];
@@ -3167,9 +3199,8 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
      if (G->isZero() || !GV)
        continue;
      Formula F = Base;
-    F.AM.BaseGV = GV;
-    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI))
+    F.BaseGV = GV;
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
        continue;
      F.BaseRegs[i] = G;
      (void)InsertFormula(LU, LUIdx, F);
@@ -3192,9 +3223,9 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
      for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
           E = Worklist.end(); I != E; ++I) {
        Formula F = Base;
-      F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
-      if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
-                     LU.Kind, LU.AccessTy, TLI)) {
+      F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+      if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+                     LU.AccessTy, F)) {
          // Add the offset to the base register.
          const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
          // If it cancelled out, drop the base register, otherwise update it.
@@ -3212,9 +3243,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
      if (G->isZero() || Imm == 0)
        continue;
      Formula F = Base;
-    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
-    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI))
+    F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
        continue;
      F.BaseRegs[i] = G;
      (void)InsertFormula(LU, LUIdx, F);
@@ -3235,7 +3265,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
    // Don't do this if there is more than one offset.
    if (LU.MinOffset != LU.MaxOffset) return;
  
-  assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+  assert(!Base.BaseGV && "ICmpZero use is not legal!");
  
    // Check each interesting stride.
    for (SmallSetVector<int64_t, 8>::const_iterator
@@ -3243,10 +3273,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
      int64_t Factor = *I;
  
      // Check that the multiplication doesn't overflow.
-    if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
+    if (Base.BaseOffset == INT64_MIN && Factor == -1)
        continue;
-    int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
-    if (NewBaseOffs / Factor != Base.AM.BaseOffs)
+    int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
+    if (NewBaseOffset / Factor != Base.BaseOffset)
        continue;
  
      // Check that multiplying with the use offset doesn't overflow.
@@ -3258,14 +3288,14 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
        continue;
  
      Formula F = Base;
-    F.AM.BaseOffs = NewBaseOffs;
+    F.BaseOffset = NewBaseOffset;
  
      // Check that this scale is legal.
-    if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+    if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
        continue;
  
      // Compensate for the use having MinOffset built into it.
-    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+    F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
  
      const SCEV *FactorS = SE.getConstant(IntTy, Factor);
  
@@ -3306,23 +3336,23 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
    if (!IntTy) return;
  
    // If this Formula already has a scaled register, we can't add another one.
-  if (Base.AM.Scale != 0) return;
+  if (Base.Scale != 0) return;
  
    // Check each interesting stride.
    for (SmallSetVector<int64_t, 8>::const_iterator
         I = Factors.begin(), E = Factors.end(); I != E; ++I) {
      int64_t Factor = *I;
  
-    Base.AM.Scale = Factor;
-    Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+    Base.Scale = Factor;
+    Base.HasBaseReg = Base.BaseRegs.size() > 1;
      // Check whether this scale is going to be legal.
-    if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI)) {
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                    Base)) {
        // As a special-case, handle special out-of-loop Basic users specially.
        // TODO: Reconsider this special case.
        if (LU.Kind == LSRUse::Basic &&
-          isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
-                     LSRUse::Special, LU.AccessTy, TLI) &&
+          isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
+                     LU.AccessTy, Base) &&
            LU.AllFixupsOutsideLoop)
          LU.Kind = LSRUse::Special;
        else
@@ -3331,7 +3361,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
      // For an ICmpZero, negating a solitary base register won't lead to
      // new solutions.
      if (LU.Kind == LSRUse::ICmpZero &&
-        !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+        !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
        continue;
      // For each addrec base reg, apply the scale, if possible.
      for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
@@ -3355,11 +3385,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
  
  /// GenerateTruncates - Generate reuse formulae from different IV types.
  void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
-  // This requires TargetLowering to tell us which truncates are free.
-  if (!TLI) return;
-
    // Don't bother truncating symbolic values.
-  if (Base.AM.BaseGV) return;
+  if (Base.BaseGV) return;
  
    // Determine the integer type for the base formula.
    Type *DstTy = Base.getType();
@@ -3369,7 +3396,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
    for (SmallSetVector<Type *, 4>::const_iterator
         I = Types.begin(), E = Types.end(); I != E; ++I) {
      Type *SrcTy = *I;
-    if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+    if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
        Formula F = Base;
  
        if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
@@ -3411,9 +3438,11 @@ void WorkItem::print(raw_ostream &OS) const {
       << " , add offset " << Imm;
  }
  
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  void WorkItem::dump() const {
    print(errs()); errs() << '\n';
  }
+#endif
  
  /// GenerateCrossUseConstantOffsets - Look for registers which are a constant
  /// distance apart and try to form reuse opportunities between them.
@@ -3514,16 +3543,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
        const Formula &F = LU.Formulae[L];
        // Use the immediate in the scaled register.
        if (F.ScaledReg == OrigReg) {
-        int64_t Offs = (uint64_t)F.AM.BaseOffs +
-                       Imm * (uint64_t)F.AM.Scale;
+        int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
          // Don't create 50 + reg(-50).
          if (F.referencesReg(SE.getSCEV(
-                   ConstantInt::get(IntTy, -(uint64_t)Offs))))
+                   ConstantInt::get(IntTy, -(uint64_t)Offset))))
            continue;
          Formula NewF = F;
-        NewF.AM.BaseOffs = Offs;
-        if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
-                        LU.Kind, LU.AccessTy, TLI))
+        NewF.BaseOffset = Offset;
+        if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                        NewF))
            continue;
          NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
  
@@ -3532,9 +3560,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
          // immediate itself, then the formula isn't worthwhile.
          if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
            if (C->getValue()->isNegative() !=
-                (NewF.AM.BaseOffs < 0) &&
-              (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
-                .ule(abs64(NewF.AM.BaseOffs)))
+                (NewF.BaseOffset < 0) &&
+              (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
+                .ule(abs64(NewF.BaseOffset)))
              continue;
  
          // OK, looks good.
@@ -3546,11 +3574,10 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
            if (BaseReg != OrigReg)
              continue;
            Formula NewF = F;
-          NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
-          if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
-                          LU.Kind, LU.AccessTy, TLI)) {
-            if (!TLI ||
-                !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+          NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
+          if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, NewF)) {
+            if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
                continue;
              NewF = F;
              NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
@@ -3564,11 +3591,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
                 J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
                 J != JE; ++J)
              if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
-              if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
-                   abs64(NewF.AM.BaseOffs)) &&
+              if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
+                   abs64(NewF.BaseOffset)) &&
                    (C->getValue()->getValue() +
-                   NewF.AM.BaseOffs).countTrailingZeros() >=
-                   CountTrailingZeros_64(NewF.AM.BaseOffs))
+                   NewF.BaseOffset).countTrailingZeros() >=
+                   CountTrailingZeros_64(NewF.BaseOffset))
                  goto skip_formula;
  
            // Ok, looks good.
@@ -3629,7 +3656,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
  
    // Collect the best formula for each unique set of shared registers. This
    // is reset for each use.
-  typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+  typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
      BestFormulaeTy;
    BestFormulaeTy BestFormulae;
  
@@ -3664,7 +3691,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
                dbgs() << "\n");
        }
        else {
-        SmallVector<const SCEV *, 2> Key;
+        SmallVector<const SCEV *, 4> Key;
          for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
                 JE = F.BaseRegs.end(); J != JE; ++J) {
            const SCEV *Reg = *J;
@@ -3766,7 +3793,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
               I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
            if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
              Formula NewF = F;
-            NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+            NewF.BaseOffset += C->getValue()->getSExtValue();
              NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
                                  (I - F.BaseRegs.begin()));
              if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -3779,9 +3806,9 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
              }
            } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
              if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
-              if (!F.AM.BaseGV) {
+              if (!F.BaseGV) {
                  Formula NewF = F;
-                NewF.AM.BaseGV = GV;
+                NewF.BaseGV = GV;
                  NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
                                      (I - F.BaseRegs.begin()));
                  if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -3824,9 +3851,9 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
        for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
             E = LU.Formulae.end(); I != E; ++I) {
          const Formula &F = *I;
-        if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
+        if (F.BaseOffset != 0 && F.Scale == 0) {
            if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
-            if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
+            if (reconcileNewOffset(*LUThatHas, F.BaseOffset,
                                     /*HasBaseReg=*/false,
                                     LU.Kind, LU.AccessTy)) {
                DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs());
@@ -3840,7 +3867,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
                  LSRFixup &Fixup = *I;
                  if (Fixup.LUIdx == LUIdx) {
                    Fixup.LUIdx = LUThatHas - &Uses.front();
-                  Fixup.Offset += F.AM.BaseOffs;
+                  Fixup.Offset += F.BaseOffset;
                    // Add the new offset to LUThatHas' offset list.
                    if (LUThatHas->Offsets.back() != Fixup.Offset) {
                      LUThatHas->Offsets.push_back(Fixup.Offset);
@@ -3860,9 +3887,8 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
                bool Any = false;
                for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
                  Formula &F = LUThatHas->Formulae[i];
-                if (!isLegalUse(F.AM,
-                                LUThatHas->MinOffset, LUThatHas->MaxOffset,
-                                LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+                if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
+                                LUThatHas->Kind, LUThatHas->AccessTy, F)) {
                    DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
                          dbgs() << '\n');
                    LUThatHas->DeleteFormula(F);
@@ -4268,16 +4294,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
      Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
    }
  
-  // Flush the operand list to suppress SCEVExpander hoisting.
-  if (!Ops.empty()) {
-    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
-    Ops.clear();
-    Ops.push_back(SE.getUnknown(FullV));
-  }
-
    // Expand the ScaledReg portion.
    Value *ICmpScaledV = 0;
-  if (F.AM.Scale != 0) {
+  if (F.Scale != 0) {
      const SCEV *ScaledS = F.ScaledReg;
  
      // If we're expanding for a post-inc user, make the post-inc adjustment.
@@ -4290,36 +4309,47 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
        // An interesting way of "folding" with an icmp is to use a negated
        // scale, which we'll implement by inserting it into the other operand
        // of the icmp.
-      assert(F.AM.Scale == -1 &&
+      assert(F.Scale == -1 &&
               "The only scale supported by ICmpZero uses is -1!");
        ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
      } else {
        // Otherwise just expand the scaled register and an explicit scale,
        // which is expected to be matched as part of the address.
+
+      // Flush the operand list to suppress SCEVExpander hoisting address modes.
+      if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+        Ops.clear();
+        Ops.push_back(SE.getUnknown(FullV));
+      }
        ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
        ScaledS = SE.getMulExpr(ScaledS,
-                              SE.getConstant(ScaledS->getType(), F.AM.Scale));
+                              SE.getConstant(ScaledS->getType(), F.Scale));
        Ops.push_back(ScaledS);
+    }
+  }
  
-      // Flush the operand list to suppress SCEVExpander hoisting.
+  // Expand the GV portion.
+  if (F.BaseGV) {
+    // Flush the operand list to suppress SCEVExpander hoisting.
+    if (!Ops.empty()) {
        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
        Ops.clear();
        Ops.push_back(SE.getUnknown(FullV));
      }
+    Ops.push_back(SE.getUnknown(F.BaseGV));
    }
  
-  // Expand the GV portion.
-  if (F.AM.BaseGV) {
-    Ops.push_back(SE.getUnknown(F.AM.BaseGV));
-
-    // Flush the operand list to suppress SCEVExpander hoisting.
+  // Flush the operand list to suppress SCEVExpander hoisting of both folded and
+  // unfolded offsets. LSR assumes they both live next to their uses.
+  if (!Ops.empty()) {
      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
      Ops.clear();
      Ops.push_back(SE.getUnknown(FullV));
    }
  
    // Expand the immediate portion.
-  int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+  int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
    if (Offset != 0) {
      if (LU.Kind == LSRUse::ICmpZero) {
        // The other interesting way of "folding" with an ICmpZero is to use a
@@ -4360,9 +4390,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
    if (LU.Kind == LSRUse::ICmpZero) {
      ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
      DeadInsts.push_back(CI->getOperand(1));
-    assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+    assert(!F.BaseGV && "ICmp does not support folding a global value and "
                             "a scale at the same time!");
-    if (F.AM.Scale == -1) {
+    if (F.Scale == -1) {
        if (ICmpScaledV->getType() != OpTy) {
          Instruction *Cast =
            CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
@@ -4372,7 +4402,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
        }
        CI->setOperand(1, ICmpScaledV);
      } else {
-      assert(F.AM.Scale == 0 &&
+      assert(F.Scale == 0 &&
               "ICmp does not support folding a global value and "
               "a scale at the same time!");
        Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
@@ -4423,17 +4453,21 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
              SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
              NewBB = NewBBs[0];
            }
-
-          // If PN is outside of the loop and BB is in the loop, we want to
-          // move the block to be immediately before the PHI block, not
-          // immediately after BB.
-          if (L->contains(BB) && !L->contains(PN))
-            NewBB->moveBefore(PN->getParent());
-
-          // Splitting the edge can reduce the number of PHI entries we have.
-          e = PN->getNumIncomingValues();
-          BB = NewBB;
-          i = PN->getBasicBlockIndex(BB);
+          // If NewBB==NULL, then SplitCriticalEdge refused to split because all
+          // phi predecessors are identical. The simple thing to do is skip
+          // splitting in this case rather than complicate the API.
+          if (NewBB) {
+            // If PN is outside of the loop and BB is in the loop, we want to
+            // move the block to be immediately before the PHI block, not
+            // immediately after BB.
+            if (L->contains(BB) && !L->contains(PN))
+              NewBB->moveBefore(PN->getParent());
+
+            // Splitting the edge can reduce the number of PHI entries we have.
+            e = PN->getNumIncomingValues();
+            BB = NewBB;
+            i = PN->getBasicBlockIndex(BB);
+          }
          }
        }
  
@@ -4543,13 +4577,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
    Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
  }
  
-LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
-  : IU(P->getAnalysis<IVUsers>()),
-    SE(P->getAnalysis<ScalarEvolution>()),
-    DT(P->getAnalysis<DominatorTree>()),
-    LI(P->getAnalysis<LoopInfo>()),
-    TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
-
+LSRInstance::LSRInstance(Loop *L, Pass *P)
+    : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
+      DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
+      TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
+      IVIncInsertPos(0) {
    // If LoopSimplify form is not available, stay out of trouble.
    if (!L->isLoopSimplifyForm())
      return;
@@ -4632,14 +4664,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
  
  #ifndef NDEBUG
    // Formulae should be legal.
-  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
-       E = Uses.end(); I != E; ++I) {
-     const LSRUse &LU = *I;
-     for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
-          JE = LU.Formulae.end(); J != JE; ++J)
-        assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
-                          LU.Kind, LU.AccessTy, TLI) &&
-               "Illegal formula generated!");
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
+       I != E; ++I) {
+    const LSRUse &LU = *I;
+    for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+                                                  JE = LU.Formulae.end();
+         J != JE; ++J)
+      assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                        *J) && "Illegal formula generated!");
    };
  #endif
  
@@ -4702,20 +4734,18 @@ void LSRInstance::print(raw_ostream &OS) const {
    print_uses(OS);
  }
  
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  void LSRInstance::dump() const {
    print(errs()); errs() << '\n';
  }
+#endif
  
  namespace {
  
  class LoopStrengthReduce : public LoopPass {
-  /// TLI - Keep a pointer of a TargetLowering to consult for determining
-  /// transformation profitability.
-  const TargetLowering *const TLI;
-
  public:
    static char ID; // Pass ID, replacement for typeid
-  explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+  LoopStrengthReduce();
  
  private:
    bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -4727,6 +4757,7 @@ private:
  char LoopStrengthReduce::ID = 0;
  INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
                  "Loop Strength Reduction", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
  INITIALIZE_PASS_DEPENDENCY(DominatorTree)
  INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  INITIALIZE_PASS_DEPENDENCY(IVUsers)
@@ -4736,14 +4767,13 @@ INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
                  "Loop Strength Reduction", false, false)
  
  
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
-  return new LoopStrengthReduce(TLI);
+Pass *llvm::createLoopStrengthReducePass() {
+  return new LoopStrengthReduce();
  }
  
-LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
-  : LoopPass(ID), TLI(tli) {
-    initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
-  }
+LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
+  initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+}
  
  void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
    // We split critical edges, so we change the CFG.  However, we do update
@@ -4762,24 +4792,27 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.addRequiredID(LoopSimplifyID);
    AU.addRequired<IVUsers>();
    AU.addPreserved<IVUsers>();
+  AU.addRequired<TargetTransformInfo>();
  }
  
  bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
    bool Changed = false;
  
    // Run the main LSR transformation.
-  Changed |= LSRInstance(TLI, L, this).getChanged();
+  Changed |= LSRInstance(L, this).getChanged();
  
    // Remove any extra phis created by processing inner loops.
    Changed |= DeleteDeadPHIs(L->getHeader());
-  if (EnablePhiElim) {
+  if (EnablePhiElim && L->isLoopSimplifyForm()) {
      SmallVector<WeakVH, 16> DeadInsts;
      SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
  #ifndef NDEBUG
      Rewriter.setDebugType(DEBUG_TYPE);
  #endif
-    unsigned numFolded = Rewriter.
-      replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+    unsigned numFolded =
+        Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
+                                     DeadInsts,
+                                     &getAnalysis<TargetTransformInfo>());
      if (numFolded) {
        Changed = true;
        DeleteTriviallyDeadInstructions(DeadInsts);