SROA: Don't crash on a select with two identical operands.

[oota-llvm.git] / lib / Transforms / Scalar / SROA.cpp
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp

index 11dd04951d4c4a9dc7dc1ab532f938c899cc7af9..c383e2f8ed8596e3837d9bd7eabdca17263b3cdc 100644 (file)
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -33,24 +33,22 @@
  #include "llvm/Analysis/Loads.h"
  #include "llvm/Analysis/PtrUseVisitor.h"
  #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Constants.h"
  #include "llvm/DIBuilder.h"
-#include "llvm/DataLayout.h"
  #include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
  #include "llvm/InstVisitor.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
  #include "llvm/Pass.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
  #include "llvm/Support/MathExtras.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Transforms/Utils/Local.h"
@@ -59,11 +57,15 @@
  using namespace llvm;
  
  STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
-STATISTIC(NumNewAllocas,      "Number of new, smaller allocas introduced");
-STATISTIC(NumPromoted,        "Number of allocas promoted to SSA values");
+STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
+STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions");
+STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found");
+STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
  STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
-STATISTIC(NumDeleted,         "Number of instructions deleted");
-STATISTIC(NumVectorized,      "Number of vectorized aggregates");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
  
  /// Hidden option to force the pass to not use DomTree and mem2reg, instead
  /// forming SSA values through the SSAUpdater infrastructure.
@@ -71,112 +73,167 @@ static cl::opt<bool>
  ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
  
  namespace {
-/// \brief Alloca partitioning representation.
-///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
+/// \brief A custom IRBuilder inserter which prefixes all names if they are
+/// preserved.
+template <bool preserveNames = true>
+class IRBuilderPrefixedInserter :
+    public IRBuilderDefaultInserter<preserveNames> {
+  std::string Prefix;
+
  public:
-  /// \brief A common base class for representing a half-open byte range.
-  struct ByteRange {
-    /// \brief The beginning offset of the range.
-    uint64_t BeginOffset;
+  void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
  
-    /// \brief The ending offset, not included in the range.
-    uint64_t EndOffset;
+protected:
+  void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+                    BasicBlock::iterator InsertPt) const {
+    IRBuilderDefaultInserter<preserveNames>::InsertHelper(
+        I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
+  }
+};
  
-    ByteRange() : BeginOffset(), EndOffset() {}
-    ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
-        : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+// Specialization for not preserving the name is trivial.
+template <>
+class IRBuilderPrefixedInserter<false> :
+    public IRBuilderDefaultInserter<false> {
+public:
+  void SetNamePrefix(const Twine &P) {}
+};
  
-    /// \brief Support for ordering ranges.
-    ///
-    /// This provides an ordering over ranges such that start offsets are
-    /// always increasing, and within equal start offsets, the end offsets are
-    /// decreasing. Thus the spanning range comes first in a cluster with the
-    /// same start position.
-    bool operator<(const ByteRange &RHS) const {
-      if (BeginOffset < RHS.BeginOffset) return true;
-      if (BeginOffset > RHS.BeginOffset) return false;
-      if (EndOffset > RHS.EndOffset) return true;
-      return false;
-    }
+/// \brief Provide a typedef for IRBuilder that drops names in release builds.
+#ifndef NDEBUG
+typedef llvm::IRBuilder<true, ConstantFolder,
+                        IRBuilderPrefixedInserter<true> > IRBuilderTy;
+#else
+typedef llvm::IRBuilder<false, ConstantFolder,
+                        IRBuilderPrefixedInserter<false> > IRBuilderTy;
+#endif
+}
  
-    /// \brief Support comparison with a single offset to allow binary searches.
-    friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
-      return LHS.BeginOffset < RHSOffset;
-    }
+namespace {
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+  /// \brief The beginning offset of the range.
+  uint64_t BeginOffset;
  
-    friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
-                                                const ByteRange &RHS) {
-      return LHSOffset < RHS.BeginOffset;
-    }
+  /// \brief The ending offset, not included in the range.
+  uint64_t EndOffset;
  
-    bool operator==(const ByteRange &RHS) const {
-      return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
-    }
-    bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
-  };
+  ByteRange() : BeginOffset(), EndOffset() {}
+  ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+      : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
  
-  /// \brief A partition of an alloca.
+  /// \brief Support for ordering ranges.
    ///
-  /// This structure represents a contiguous partition of the alloca. These are
-  /// formed by examining the uses of the alloca. During formation, they may
-  /// overlap but once an AllocaPartitioning is built, the Partitions within it
-  /// are all disjoint.
-  struct Partition : public ByteRange {
-    /// \brief Whether this partition is splittable into smaller partitions.
-    ///
-    /// We flag partitions as splittable when they are formed entirely due to
-    /// accesses by trivially splittable operations such as memset and memcpy.
-    bool IsSplittable;
+  /// This provides an ordering over ranges such that start offsets are
+  /// always increasing, and within equal start offsets, the end offsets are
+  /// decreasing. Thus the spanning range comes first in a cluster with the
+  /// same start position.
+  bool operator<(const ByteRange &RHS) const {
+    if (BeginOffset < RHS.BeginOffset) return true;
+    if (BeginOffset > RHS.BeginOffset) return false;
+    if (EndOffset > RHS.EndOffset) return true;
+    return false;
+  }
  
-    /// \brief Test whether a partition has been marked as dead.
-    bool isDead() const {
-      if (BeginOffset == UINT64_MAX) {
-        assert(EndOffset == UINT64_MAX);
-        return true;
-      }
-      return false;
-    }
+  /// \brief Support comparison with a single offset to allow binary searches.
+  friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+    return LHS.BeginOffset < RHSOffset;
+  }
+
+  friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+                                              const ByteRange &RHS) {
+    return LHSOffset < RHS.BeginOffset;
+  }
+
+  bool operator==(const ByteRange &RHS) const {
+    return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+  }
+  bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
  
-    /// \brief Kill a partition.
-    /// This is accomplished by setting both its beginning and end offset to
-    /// the maximum possible value.
-    void kill() {
-      assert(!isDead() && "He's Dead, Jim!");
-      BeginOffset = EndOffset = UINT64_MAX;
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+  /// \brief Whether this partition is splittable into smaller partitions.
+  ///
+  /// We flag partitions as splittable when they are formed entirely due to
+  /// accesses by trivially splittable operations such as memset and memcpy.
+  bool IsSplittable;
+
+  /// \brief Test whether a partition has been marked as dead.
+  bool isDead() const {
+    if (BeginOffset == UINT64_MAX) {
+      assert(EndOffset == UINT64_MAX);
+      return true;
      }
+    return false;
+  }
  
-    Partition() : ByteRange(), IsSplittable() {}
-    Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
-        : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
-  };
+  /// \brief Kill a partition.
+  /// This is accomplished by setting both its beginning and end offset to
+  /// the maximum possible value.
+  void kill() {
+    assert(!isDead() && "He's Dead, Jim!");
+    BeginOffset = EndOffset = UINT64_MAX;
+  }
+
+  Partition() : ByteRange(), IsSplittable() {}
+  Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+      : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
  
-  /// \brief A particular use of a partition of the alloca.
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+  /// \brief Combined storage for both the Use* and split state.
+  PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
+
+public:
+  PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+  PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+               bool IsSplit)
+      : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
+
+  /// \brief The use in question. Provides access to both user and used value.
    ///
-  /// This structure is used to associate uses of a partition with it. They
-  /// mark the range of bytes which are referenced by a particular instruction,
-  /// and includes a handle to the user itself and the pointer value in use.
-  /// The bounds of these uses are determined by intersecting the bounds of the
-  /// memory use itself with a particular partition. As a consequence there is
-  /// intentionally overlap between various uses of the same partition.
-  struct PartitionUse : public ByteRange {
-    /// \brief The use in question. Provides access to both user and used value.
-    ///
-    /// Note that this may be null if the partition use is *dead*, that is, it
-    /// should be ignored.
-    Use *U;
+  /// Note that this may be null if the partition use is *dead*, that is, it
+  /// should be ignored.
+  Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
  
-    PartitionUse() : ByteRange(), U() {}
-    PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
-        : ByteRange(BeginOffset, EndOffset), U(U) {}
-  };
+  /// \brief Set the use for this partition use range.
+  void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+
+  /// \brief Whether this use is split across multiple partitions.
+  bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
+
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
  
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
    /// \brief Construct a partitioning of a particular alloca.
    ///
    /// Construction does most of the work for partitioning the alloca. This
@@ -411,9 +468,9 @@ static Value *foldSelectInst(SelectInst &SI) {
    // early on.
    if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
      return SI.getOperand(1+CI->isZero());
-  if (SI.getOperand(1) == SI.getOperand(2)) {
+  if (SI.getOperand(1) == SI.getOperand(2))
      return SI.getOperand(1);
-  }
+
    return 0;
  }
  
@@ -458,10 +515,10 @@ private:
  
      // Clamp the end offset to the end of the allocation. Note that this is
      // formulated to handle even the case where "BeginOffset + Size" overflows.
-    // NOTE! This may appear superficially to be something we could ignore
-    // entirely, but that is not so! There may be PHI-node uses where some
-    // instructions are dead but not others. We can't completely ignore the
-    // PHI node, and so have to record at least the information here.
+    // This may appear superficially to be something we could ignore entirely,
+    // but that is not so! There may be widened loads or PHI-node uses where
+    // some instructions are dead but not others. We can't completely ignore
+    // them, and so have to record at least the information here.
      assert(AllocSize >= BeginOffset); // Established above.
      if (Size > AllocSize - BeginOffset) {
        DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -476,33 +533,17 @@ private:
    }
  
    void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
-                         bool IsVolatile) {
-    uint64_t Size = DL.getTypeStoreSize(Ty);
-
-    // If this memory access can be shown to *statically* extend outside the
-    // bounds of of the allocation, it's behavior is undefined, so simply
-    // ignore it. Note that this is more strict than the generic clamping
-    // behavior of insertUse. We also try to handle cases which might run the
-    // risk of overflow.
-    // FIXME: We should instead consider the pointer to have escaped if this
-    // function is being instrumented for addressing bugs or race conditions.
-    if (Offset.isNegative() || Size > AllocSize ||
-        Offset.ugt(AllocSize - Size)) {
-      DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
-                   << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
-                   << " which extends past the end of the " << AllocSize
-                   << " byte alloca:\n"
-                   << "    alloca: " << P.AI << "\n"
-                   << "       use: " << I << "\n");
-      return;
-    }
-
+                         uint64_t Size, bool IsVolatile) {
      // We allow splitting of loads and stores where the type is an integer type
-    // and which cover the entire alloca. Such integer loads and stores
-    // often require decomposition into fine grained loads and stores.
-    bool IsSplittable = false;
-    if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
-      IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+    // and cover the entire alloca. This prevents us from splitting over
+    // eagerly.
+    // FIXME: In the great blue eventually, we should eagerly split all integer
+    // loads and stores, and then have a separate step that merges adjacent
+    // alloca partitions into a single partition suitable for integer widening.
+    // Or we should skip the merge step and rely on GVN and other passes to
+    // merge adjacent loads and stores that survive mem2reg.
+    bool IsSplittable =
+        Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
  
      insertUse(I, Offset, Size, IsSplittable);
    }
@@ -514,7 +555,8 @@ private:
      if (!IsOffsetKnown)
        return PI.setAborted(&LI);
  
-    return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
+    uint64_t Size = DL.getTypeStoreSize(LI.getType());
+    return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
    }
  
    void visitStoreInst(StoreInst &SI) {
@@ -524,9 +566,28 @@ private:
      if (!IsOffsetKnown)
        return PI.setAborted(&SI);
  
+    uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
+
+    // If this memory access can be shown to *statically* extend outside the
+    // bounds of of the allocation, it's behavior is undefined, so simply
+    // ignore it. Note that this is more strict than the generic clamping
+    // behavior of insertUse. We also try to handle cases which might run the
+    // risk of overflow.
+    // FIXME: We should instead consider the pointer to have escaped if this
+    // function is being instrumented for addressing bugs or race conditions.
+    if (Offset.isNegative() || Size > AllocSize ||
+        Offset.ugt(AllocSize - Size)) {
+      DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
+                   << " which extends past the end of the " << AllocSize
+                   << " byte alloca:\n"
+                   << "    alloca: " << P.AI << "\n"
+                   << "       use: " << SI << "\n");
+      return;
+    }
+
      assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
             "All simple FCA stores should have been pre-split");
-    handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+    handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
    }
  
  
@@ -621,7 +682,7 @@ private:
    }
  
    // Disable SRoA for any intrinsics except for lifetime invariants.
-  // FIXME: What about debug instrinsics? This matches old behavior, but
+  // FIXME: What about debug intrinsics? This matches old behavior, but
    // doesn't make sense.
    void visitIntrinsicInst(IntrinsicInst &II) {
      if (!IsOffsetKnown)
@@ -797,13 +858,14 @@ private:
        EndOffset = AllocSize;
  
      // NB: This only works if we have zero overlapping partitions.
-    iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
-    if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
-      B = llvm::prior(B);
-    for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
-         ++I) {
+    iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+    if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+      I = llvm::prior(I);
+    iterator E = P.end();
+    bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+    for (; I != E && I->BeginOffset < EndOffset; ++I) {
        PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
-                         std::min(I->EndOffset, EndOffset), U);
+                         std::min(I->EndOffset, EndOffset), U, IsSplit);
        P.use_push_back(I, NewPU);
        if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
          P.PHIOrSelectOpMap[U]
@@ -811,20 +873,6 @@ private:
      }
    }
  
-  void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) {
-    uint64_t Size = DL.getTypeStoreSize(Ty);
-
-    // If this memory access can be shown to *statically* extend outside the
-    // bounds of of the allocation, it's behavior is undefined, so simply
-    // ignore it. Note that this is more strict than the generic clamping
-    // behavior of insertUse.
-    if (Offset.isNegative() || Size > AllocSize ||
-        Offset.ugt(AllocSize - Size))
-      return markAsDead(I);
-
-    insertUse(I, Offset, Size);
-  }
-
    void visitBitCastInst(BitCastInst &BC) {
      if (BC.use_empty())
        return markAsDead(BC);
@@ -841,12 +889,23 @@ private:
  
    void visitLoadInst(LoadInst &LI) {
      assert(IsOffsetKnown);
-    handleLoadOrStore(LI.getType(), LI, Offset);
+    uint64_t Size = DL.getTypeStoreSize(LI.getType());
+    insertUse(LI, Offset, Size);
    }
  
    void visitStoreInst(StoreInst &SI) {
      assert(IsOffsetKnown);
-    handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+    uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+    // If this memory access can be shown to *statically* extend outside the
+    // bounds of of the allocation, it's behavior is undefined, so simply
+    // ignore it. Note that this is more strict than the generic clamping
+    // behavior of insertUse.
+    if (Offset.isNegative() || Size > AllocSize ||
+        Offset.ugt(AllocSize - Size))
+      return markAsDead(SI);
+
+    insertUse(SI, Offset, Size);
    }
  
    void visitMemSetInst(MemSetInst &II) {
@@ -870,7 +929,7 @@ private:
      uint64_t Size = Length ? Length->getLimitedValue()
                             : AllocSize - Offset.getLimitedValue();
  
-    MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+    const MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
      if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd &&
          Offsets.DestBegin == Offsets.SourceBegin)
        return markAsDead(II); // Skip identity transfers without side-effects.
@@ -1079,6 +1138,10 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
      splitAndMergePartitions();
    }
  
+  // Record how many partitions we end up with.
+  NumAllocaPartitions += Partitions.size();
+  MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca);
+
    // Now build up the user lists for each of these disjoint partitions by
    // re-walking the recursive users of the alloca.
    Uses.resize(Partitions.size());
@@ -1086,26 +1149,34 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
    PtrI = UB.visitPtr(AI);
    assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!");
    assert(!PtrI.isAborted() && "Early aborted the visit of the pointer.");
+
+  unsigned NumUses = 0;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+  for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx)
+    NumUses += Uses[Idx].size();
+#endif
+  NumAllocaPartitionUses += NumUses;
+  MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca);
  }
  
  Type *AllocaPartitioning::getCommonType(iterator I) const {
    Type *Ty = 0;
    for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
-    if (!UI->U)
+    Use *U = UI->getUse();
+    if (!U)
        continue; // Skip dead uses.
-    if (isa<IntrinsicInst>(*UI->U->getUser()))
+    if (isa<IntrinsicInst>(*U->getUser()))
        continue;
      if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
        continue;
  
      Type *UserTy = 0;
-    if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
        UserTy = LI->getType();
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
+    else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
        UserTy = SI->getValueOperand()->getType();
-    } else {
+    else
        return 0; // Bail if we have weird uses.
-    }
  
      if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
        // If the type is larger than the partition, skip it. We only encounter
@@ -1141,13 +1212,13 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
  
  void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
                                      StringRef Indent) const {
-  for (const_use_iterator UI = use_begin(I), UE = use_end(I);
-       UI != UE; ++UI) {
-    if (!UI->U)
+  for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
+    if (!UI->getUse())
        continue; // Skip dead uses.
      OS << Indent << "  [" << UI->BeginOffset << "," << UI->EndOffset << ") "
-       << "used by: " << *UI->U->getUser() << "\n";
-    if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+       << "used by: " << *UI->getUse()->getUser() << "\n";
+    if (MemTransferInst *II =
+            dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
        const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
        bool IsDest;
        if (!MTO.IsSplittable)
@@ -1170,8 +1241,7 @@ void AllocaPartitioning::print(raw_ostream &OS) const {
    }
  
    OS << "Partitioning of alloca: " << AI << "\n";
-  unsigned Num = 0;
-  for (const_iterator I = begin(), E = end(); I != E; ++I, ++Num) {
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
      print(OS, I);
      printUsers(OS, I);
    }
@@ -1242,18 +1312,18 @@ public:
      for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
             E = DVIs.end(); I != E; ++I) {
        DbgValueInst *DVI = *I;
-      Value *Arg = NULL;
+      Value *Arg = 0;
        if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
          // If an argument is zero extended then use argument directly. The ZExt
          // may be zapped by an optimization pass in future.
          if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
            Arg = dyn_cast<Argument>(ZExt->getOperand(0));
-        if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+        else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
            Arg = dyn_cast<Argument>(SExt->getOperand(0));
          if (!Arg)
-          Arg = SI->getOperand(0);
+          Arg = SI->getValueOperand();
        } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-        Arg = LI->getOperand(0);
+        Arg = LI->getPointerOperand();
        } else {
          continue;
        }
@@ -1277,7 +1347,7 @@ namespace {
  /// 1) It takes allocations of aggregates and analyzes the ways in which they
  ///    are used to try to split them into smaller allocations, ideally of
  ///    a single scalar data type. It will split up memcpy and memset accesses
-///    as necessary and try to isolate invidual scalar accesses.
+///    as necessary and try to isolate individual scalar accesses.
  /// 2) It will transform accesses into forms which are suitable for SSA value
  ///    promotion. This can be replacing a memset with a scalar store of an
  ///    integer value, or it can involve speculating operations on a PHI or
@@ -1379,11 +1449,11 @@ public:
      // may be grown during speculation. However, we never need to re-visit the
      // new uses, and so we can use the initial size bound.
      for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
-      const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
-      if (!PU.U)
+      const PartitionUse &PU = P.getUse(PI, Idx);
+      if (!PU.getUse())
          continue; // Skip dead use.
  
-      visit(cast<Instruction>(PU.U->getUser()));
+      visit(cast<Instruction>(PU.getUse()->getUser()));
      }
    }
  
@@ -1439,8 +1509,7 @@ private:
      // We can only transform this if it is safe to push the loads into the
      // predecessor blocks. The only thing to watch out for is that we can't put
      // a possibly trapping load in the predecessor if it is a critical edge.
-    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
-         ++Idx) {
+    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
        TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
        Value *InVal = PN.getIncomingValue(Idx);
  
@@ -1478,12 +1547,12 @@ private:
      assert(!Loads.empty());
  
      Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
-    IRBuilder<> PHIBuilder(&PN);
+    IRBuilderTy PHIBuilder(&PN);
      PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
                                            PN.getName() + ".sroa.speculated");
  
      // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
-    // matter which one we get and if any differ, it doesn't matter.
+    // matter which one we get and if any differ.
      LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
      MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
      unsigned Align = SomeLoad->getAlignment();
@@ -1501,7 +1570,7 @@ private:
        TerminatorInst *TI = Pred->getTerminator();
        Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
        Value *InVal = PN.getIncomingValue(Idx);
-      IRBuilder<> PredBuilder(TI);
+      IRBuilderTy PredBuilder(TI);
  
        LoadInst *Load
          = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
@@ -1528,8 +1597,8 @@ private:
        // inside the load.
        AllocaPartitioning::use_iterator UI
          = P.findPartitionUseForPHIOrSelectOperand(InUse);
-      assert(isa<PHINode>(*UI->U->getUser()));
-      UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+      assert(isa<PHINode>(*UI->getUse()->getUser()));
+      UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
      }
      DEBUG(dbgs() << "          speculated to: " << *NewPN << "\n");
    }
@@ -1576,16 +1645,16 @@ private:
  
    void visitSelectInst(SelectInst &SI) {
      DEBUG(dbgs() << "    original: " << SI << "\n");
-    IRBuilder<> IRB(&SI);
  
      // If the select isn't safe to speculate, just use simple logic to emit it.
      SmallVector<LoadInst *, 4> Loads;
      if (!isSafeSelectToSpeculate(SI, Loads))
        return;
  
+    IRBuilderTy IRB(&SI);
      Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
      AllocaPartitioning::iterator PIs[2];
-    AllocaPartitioning::PartitionUse PUs[2];
+    PartitionUse PUs[2];
      for (unsigned i = 0, e = 2; i != e; ++i) {
        PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
        if (PIs[i] != P.end()) {
@@ -1596,7 +1665,7 @@ private:
          PUs[i] = *UI;
          // Clear out the use here so that the offsets into the use list remain
          // stable but this use is ignored when rewriting.
-        UI->U = 0;
+        UI->setUse(0);
        }
      }
  
@@ -1628,8 +1697,8 @@ private:
        for (unsigned i = 0, e = 2; i != e; ++i) {
          if (PIs[i] != P.end()) {
            Use *LoadUse = &Loads[i]->getOperandUse(0);
-          assert(PUs[i].U->get() == LoadUse->get());
-          PUs[i].U = LoadUse;
+          assert(PUs[i].getUse()->get() == LoadUse->get());
+          PUs[i].setUse(LoadUse);
            P.use_push_back(PIs[i], PUs[i]);
          }
        }
@@ -1642,51 +1711,12 @@ private:
  };
  }
  
-/// \brief Accumulate the constant offsets in a GEP into a single APInt offset.
-///
-/// If the provided GEP is all-constant, the total byte offset formed by the
-/// GEP is computed and Offset is set to it. If the GEP has any non-constant
-/// operands, the function returns false and the value of Offset is unmodified.
-static bool accumulateGEPOffsets(const DataLayout &TD, GEPOperator &GEP,
-                                 APInt &Offset) {
-  APInt GEPOffset(Offset.getBitWidth(), 0);
-  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
-       GTI != GTE; ++GTI) {
-    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
-    if (!OpC)
-      return false;
-    if (OpC->isZero()) continue;
-
-    // Handle a struct index, which adds its field offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-      unsigned ElementIdx = OpC->getZExtValue();
-      const StructLayout *SL = TD.getStructLayout(STy);
-      GEPOffset += APInt(Offset.getBitWidth(),
-                         SL->getElementOffset(ElementIdx));
-      continue;
-    }
-
-    APInt TypeSize(Offset.getBitWidth(),
-                   TD.getTypeAllocSize(GTI.getIndexedType()));
-    if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {
-      assert((TD.getTypeSizeInBits(VTy->getScalarType()) % 8) == 0 &&
-             "vector element size is not a multiple of 8, cannot GEP over it");
-      TypeSize = TD.getTypeSizeInBits(VTy->getScalarType()) / 8;
-    }
-
-    GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize;
-  }
-  Offset = GEPOffset;
-  return true;
-}
-
  /// \brief Build a GEP out of a base pointer and indices.
  ///
  /// This will return the BasePtr if that is valid, or build a new GEP
  /// instruction using the IRBuilder if GEP-ing is needed.
-static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
-                       SmallVectorImpl<Value *> &Indices,
-                       const Twine &Prefix) {
+static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
+                       SmallVectorImpl<Value *> &Indices) {
    if (Indices.empty())
      return BasePtr;
  
@@ -1695,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
    if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
      return BasePtr;
  
-  return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
+  return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
  }
  
  /// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1707,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
  /// TargetTy. If we can't find one with the same type, we at least try to use
  /// one with the same size. If none of that works, we just produce the GEP as
  /// indicated by Indices to have the correct offset.
-static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
                                      Value *BasePtr, Type *Ty, Type *TargetTy,
-                                    SmallVectorImpl<Value *> &Indices,
-                                    const Twine &Prefix) {
+                                    SmallVectorImpl<Value *> &Indices) {
    if (Ty == TargetTy)
-    return buildGEP(IRB, BasePtr, Indices, Prefix);
+    return buildGEP(IRB, BasePtr, Indices);
  
    // See if we can descend into a struct and locate a field with the correct
    // type.
@@ -1739,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
    if (ElementTy != TargetTy)
      Indices.erase(Indices.end() - NumLayers, Indices.end());
  
-  return buildGEP(IRB, BasePtr, Indices, Prefix);
+  return buildGEP(IRB, BasePtr, Indices);
  }
  
  /// \brief Recursively compute indices for a natural GEP.
  ///
  /// This is the recursive step for getNaturalGEPWithOffset that walks down the
  /// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
                                         Value *Ptr, Type *Ty, APInt &Offset,
                                         Type *TargetTy,
-                                       SmallVectorImpl<Value *> &Indices,
-                                       const Twine &Prefix) {
+                                       SmallVectorImpl<Value *> &Indices) {
    if (Offset == 0)
-    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);
+    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices);
  
    // We can't recurse through pointer types.
    if (Ty->isPointerTy())
@@ -1772,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
      Offset -= NumSkippedElements * ElementSize;
      Indices.push_back(IRB.getInt(NumSkippedElements));
      return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),
-                                    Offset, TargetTy, Indices, Prefix);
+                                    Offset, TargetTy, Indices);
    }
  
    if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1785,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
      Offset -= NumSkippedElements * ElementSize;
      Indices.push_back(IRB.getInt(NumSkippedElements));
      return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
-                                    Indices, Prefix);
+                                    Indices);
    }
  
    StructType *STy = dyn_cast<StructType>(Ty);
@@ -1804,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
  
    Indices.push_back(IRB.getInt32(Index));
    return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices, Prefix);
+                                  Indices);
  }
  
  /// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1817,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
  /// Indices, and setting Ty to the result subtype.
  ///
  /// If no natural GEP can be constructed, this function returns null.
-static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
                                        Value *Ptr, APInt Offset, Type *TargetTy,
-                                      SmallVectorImpl<Value *> &Indices,
-                                      const Twine &Prefix) {
+                                      SmallVectorImpl<Value *> &Indices) {
    PointerType *Ty = cast<PointerType>(Ptr->getType());
  
    // Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1839,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
    Offset -= NumSkippedElements * ElementSize;
    Indices.push_back(IRB.getInt(NumSkippedElements));
    return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices, Prefix);
+                                  Indices);
  }
  
  /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1854,12 +1881,11 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
  /// The strategy for finding the more natural GEPs is to peel off layers of the
  /// pointer, walking back through bit casts and GEPs, searching for a base
  /// pointer from which we can compute a natural GEP with the desired
-/// properities. The algorithm tries to fold as many constant indices into
+/// properties. The algorithm tries to fold as many constant indices into
  /// a single GEP as possible, thus making each GEP more independent of the
  /// surrounding code.
-static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
-                             Value *Ptr, APInt Offset, Type *PointerTy,
-                             const Twine &Prefix) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
+                             Value *Ptr, APInt Offset, Type *PointerTy) {
    // Even though we don't look through PHI nodes, we could be called on an
    // instruction in an unreachable block, which may be on a cycle.
    SmallPtrSet<Value *, 4> Visited;
@@ -1882,7 +1908,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
      // First fold any existing GEPs into the offset.
      while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
        APInt GEPOffset(Offset.getBitWidth(), 0);
-      if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))
+      if (!GEP->accumulateConstantOffset(TD, GEPOffset))
          break;
        Offset += GEPOffset;
        Ptr = GEP->getPointerOperand();
@@ -1893,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
      // See if we can perform a natural GEP here.
      Indices.clear();
      if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,
-                                           Indices, Prefix)) {
+                                           Indices)) {
        if (P->getType() == PointerTy) {
          // Zap any offset pointer that we ended up computing in previous rounds.
          if (OffsetPtr && OffsetPtr->use_empty())
@@ -1928,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
    if (!OffsetPtr) {
      if (!Int8Ptr) {
        Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
-                                  Prefix + ".raw_cast");
+                                  "raw_cast");
        Int8PtrOffset = Offset;
      }
  
      OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
        IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
-                            Prefix + ".raw_idx");
+                            "raw_idx");
    }
    Ptr = OffsetPtr;
  
    // On the off chance we were targeting i8*, guard the bitcast here.
    if (Ptr->getType() != PointerTy)
-    Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
+    Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
  
    return Ptr;
  }
@@ -1954,6 +1980,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
  static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
    if (OldTy == NewTy)
      return true;
+  if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+    if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+      if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+        return true;
    if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
      return false;
    if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -1976,12 +2006,16 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
  /// This will try various different casting techniques, such as bitcasts,
  /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
  /// two types for viability with this routine.
-static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
                             Type *Ty) {
    assert(canConvertValue(DL, V->getType(), Ty) &&
           "Value not convertable to type");
    if (V->getType() == Ty)
      return V;
+  if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+    if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+      if (NewITy->getBitWidth() > OldITy->getBitWidth())
+        return IRB.CreateZExt(V, NewITy);
    if (V->getType()->isIntegerTy() && Ty->isPointerTy())
      return IRB.CreateIntToPtr(V, Ty);
    if (V->getType()->isPointerTy() && Ty->isIntegerTy())
@@ -2009,19 +2043,19 @@ static bool isVectorPromotionViable(const DataLayout &TD,
    if (!Ty)
      return false;
  
-  uint64_t VecSize = TD.getTypeSizeInBits(Ty);
    uint64_t ElementSize = TD.getTypeSizeInBits(Ty->getScalarType());
  
    // While the definition of LLVM vectors is bitpacked, we don't support sizes
    // that aren't byte sized.
    if (ElementSize % 8)
      return false;
-  assert((VecSize % 8) == 0 && "vector size not a multiple of element size?");
-  VecSize /= 8;
+  assert((TD.getTypeSizeInBits(Ty) % 8) == 0 &&
+         "vector size not a multiple of element size?");
    ElementSize /= 8;
  
    for (; I != E; ++I) {
-    if (!I->U)
+    Use *U = I->getUse();
+    if (!U)
        continue; // Skip dead use.
  
      uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
@@ -2041,24 +2075,24 @@ static bool isVectorPromotionViable(const DataLayout &TD,
        = (NumElements == 1) ? Ty->getElementType()
                             : VectorType::get(Ty->getElementType(), NumElements);
  
-    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
        if (MI->isVolatile())
          return false;
-      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
          const AllocaPartitioning::MemTransferOffsets &MTO
            = P.getMemTransferOffsets(*MTI);
          if (!MTO.IsSplittable)
            return false;
        }
-    } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+    } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
        // Disable vector promotion when there are loads or stores of an FCA.
        return false;
-    } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
        if (LI->isVolatile())
          return false;
        if (!canConvertValue(TD, PartitionTy, LI->getType()))
          return false;
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
        if (SI->isVolatile())
          return false;
        if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
@@ -2101,13 +2135,14 @@ static bool isIntegerWideningViable(const DataLayout &TD,
  
    uint64_t Size = TD.getTypeStoreSize(AllocaTy);
  
-  // Check the uses to ensure the uses are (likely) promoteable integer uses.
+  // Check the uses to ensure the uses are (likely) promotable integer uses.
    // Also ensure that the alloca has a covering load or store. We don't want
-  // to widen the integer operotains only to fail to promote due to some other
+  // to widen the integer operations only to fail to promote due to some other
    // unsplittable entry (which we may make splittable later).
    bool WholeAllocaOp = false;
    for (; I != E; ++I) {
-    if (!I->U)
+    Use *U = I->getUse();
+    if (!U)
        continue; // Skip dead use.
  
      uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
@@ -2118,7 +2153,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
      if (RelEnd > Size)
        return false;
  
-    if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
        if (LI->isVolatile())
          return false;
        if (RelBegin == 0 && RelEnd == Size)
@@ -2133,7 +2168,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
        if (RelBegin != 0 || RelEnd != Size ||
            !canConvertValue(TD, AllocaTy, LI->getType()))
          return false;
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
        Type *ValueTy = SI->getValueOperand()->getType();
        if (SI->isVolatile())
          return false;
@@ -2149,16 +2184,16 @@ static bool isIntegerWideningViable(const DataLayout &TD,
        if (RelBegin != 0 || RelEnd != Size ||
            !canConvertValue(TD, ValueTy, AllocaTy))
          return false;
-    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
        if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
          return false;
-      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
          const AllocaPartitioning::MemTransferOffsets &MTO
            = P.getMemTransferOffsets(*MTI);
          if (!MTO.IsSplittable)
            return false;
        }
-    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
        if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
            II->getIntrinsicID() != Intrinsic::lifetime_end)
          return false;
@@ -2169,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
    return WholeAllocaOp;
  }
  
-static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
                               IntegerType *Ty, uint64_t Offset,
                               const Twine &Name) {
    DEBUG(dbgs() << "       start: " << *V << "\n");
@@ -2192,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
    return V;
  }
  
-static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
+static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
                              Value *V, uint64_t Offset, const Twine &Name) {
    IntegerType *IntTy = cast<IntegerType>(Old->getType());
    IntegerType *Ty = cast<IntegerType>(V->getType());
@@ -2223,7 +2258,7 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
    return V;
  }
  
-static Value *extractVector(IRBuilder<> &IRB, Value *V,
+static Value *extractVector(IRBuilderTy &IRB, Value *V,
                              unsigned BeginIndex, unsigned EndIndex,
                              const Twine &Name) {
    VectorType *VecTy = cast<VectorType>(V->getType());
@@ -2251,7 +2286,7 @@ static Value *extractVector(IRBuilder<> &IRB, Value *V,
    return V;
  }
  
-static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V,
+static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
                             unsigned BeginIndex, const Twine &Name) {
    VectorType *VecTy = cast<VectorType>(Old->getType());
    assert(VecTy && "Can only insert a vector into a vector");
@@ -2322,7 +2357,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
  
    // If we are rewriting an alloca partition which can be written as pure
    // vector operations, we stash extra information here. When VecTy is
-  // non-null, we have some strict guarantees about the rewriten alloca:
+  // non-null, we have some strict guarantees about the rewritten alloca:
    //   - The new alloca is exactly the size of the vector type here.
    //   - The accesses all either map to the entire vector or to a single
    //     element.
@@ -2341,11 +2376,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
  
    // The offset of the partition user currently being rewritten.
    uint64_t BeginOffset, EndOffset;
+  bool IsSplit;
    Use *OldUse;
    Instruction *OldPtr;
  
-  // The name prefix to use when rewriting instructions for this alloca.
-  std::string NamePrefix;
+  // Utility IR builder, whose name prefix is setup for each visited use, and
+  // the insertion point is set to point to the user.
+  IRBuilderTy IRB;
  
  public:
    AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P,
@@ -2358,7 +2395,8 @@ public:
        NewAllocaEndOffset(NewEndOffset),
        NewAllocaTy(NewAI.getAllocatedType()),
        VecTy(), ElementTy(), ElementSize(), IntTy(),
-      BeginOffset(), EndOffset() {
+      BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(),
+      IRB(NewAI.getContext(), ConstantFolder()) {
    }
  
    /// \brief Visit the users of the alloca partition and rewrite them.
@@ -2380,14 +2418,21 @@ public:
      }
      bool CanSROA = true;
      for (; I != E; ++I) {
-      if (!I->U)
+      if (!I->getUse())
          continue; // Skip dead uses.
        BeginOffset = I->BeginOffset;
        EndOffset = I->EndOffset;
-      OldUse = I->U;
-      OldPtr = cast<Instruction>(I->U->get());
-      NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
-      CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+      IsSplit = I->isSplit();
+      OldUse = I->getUse();
+      OldPtr = cast<Instruction>(OldUse->get());
+
+      Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
+      IRB.SetInsertPoint(OldUserI);
+      IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
+      IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+                        ".");
+
+      CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
      }
      if (VecTy) {
        assert(CanSROA);
@@ -2409,14 +2454,10 @@ private:
      llvm_unreachable("No rewrite rule for this instruction!");
    }
  
-  Twine getName(const Twine &Suffix) {
-    return NamePrefix + Suffix;
-  }
-
-  Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
+  Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) {
      assert(BeginOffset >= NewAllocaBeginOffset);
      APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
-    return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
+    return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy);
    }
  
    /// \brief Compute suitable alignment to access an offset into the new alloca.
@@ -2466,27 +2507,27 @@ private:
        Pass.DeadInsts.insert(I);
    }
  
-  Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) {
+  Value *rewriteVectorizedLoadInst() {
      unsigned BeginIndex = getIndex(BeginOffset);
      unsigned EndIndex = getIndex(EndOffset);
      assert(EndIndex > BeginIndex && "Empty vector!");
  
      Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                     getName(".load"));
-    return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec"));
+                                     "load");
+    return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
    }
  
-  Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+  Value *rewriteIntegerLoad(LoadInst &LI) {
      assert(IntTy && "We cannot insert an integer to the alloca");
      assert(!LI.isVolatile());
      Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                     getName(".load"));
+                                     "load");
      V = convertValue(TD, IRB, V, IntTy);
      assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
      uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
      if (Offset > 0 || EndOffset < NewAllocaEndOffset)
        V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
-                         getName(".extract"));
+                         "extract");
      return V;
    }
  
@@ -2494,58 +2535,39 @@ private:
      DEBUG(dbgs() << "    original: " << LI << "\n");
      Value *OldOp = LI.getOperand(0);
      assert(OldOp == OldPtr);
-    IRBuilder<> IRB(&LI);
  
      uint64_t Size = EndOffset - BeginOffset;
-    bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
-
-    // If this memory access can be shown to *statically* extend outside the
-    // bounds of the original allocation it's behavior is undefined. Rather
-    // than trying to transform it, just replace it with undef.
-    // FIXME: We should do something more clever for functions being
-    // instrumented by asan.
-    // FIXME: Eventually, once ASan and friends can flush out bugs here, this
-    // should be transformed to a load of null making it unreachable.
-    uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
-    if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
-      LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
-      Pass.DeadInsts.insert(&LI);
-      deleteIfTriviallyDead(OldOp);
-      DEBUG(dbgs() << "          to: undef!!\n");
-      return true;
-    }
  
-    Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
-                                    : LI.getType();
+    Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+                             : LI.getType();
      bool IsPtrAdjusted = false;
      Value *V;
      if (VecTy) {
-      V = rewriteVectorizedLoadInst(IRB);
+      V = rewriteVectorizedLoadInst();
      } else if (IntTy && LI.getType()->isIntegerTy()) {
-      V = rewriteIntegerLoad(IRB, LI);
+      V = rewriteIntegerLoad(LI);
      } else if (BeginOffset == NewAllocaBeginOffset &&
                 canConvertValue(TD, NewAllocaTy, LI.getType())) {
        V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                LI.isVolatile(), getName(".load"));
+                                LI.isVolatile(), "load");
      } else {
        Type *LTy = TargetTy->getPointerTo();
        V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
                                  getPartitionTypeAlign(TargetTy),
-                                LI.isVolatile(), getName(".load"));
+                                LI.isVolatile(), "load");
        IsPtrAdjusted = true;
      }
      V = convertValue(TD, IRB, V, TargetTy);
  
-    if (IsSplitIntLoad) {
+    if (IsSplit) {
        assert(!LI.isVolatile());
        assert(LI.getType()->isIntegerTy() &&
               "Only integer type loads and stores are split");
+      assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+             "Split load isn't smaller than original load");
        assert(LI.getType()->getIntegerBitWidth() ==
               TD.getTypeStoreSizeInBits(LI.getType()) &&
               "Non-byte-multiple bit width");
-      assert(LI.getType()->getIntegerBitWidth() ==
-             TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
-             "Only alloca-wide loads can be split and recomposed");
        // Move the insertion point just past the load so that we can refer to it.
        IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
        // Create a placeholder value with the same type as LI to use as the
@@ -2555,7 +2577,7 @@ private:
        Value *Placeholder
          = new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
        V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
-                        getName(".insert"));
+                        "insert");
        LI.replaceAllUsesWith(V);
        Placeholder->replaceAllUsesWith(&LI);
        delete Placeholder;
@@ -2569,7 +2591,7 @@ private:
      return !LI.isVolatile() && !IsPtrAdjusted;
    }
  
-  bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+  bool rewriteVectorizedStoreInst(Value *V,
                                    StoreInst &SI, Value *OldOp) {
      unsigned BeginIndex = getIndex(BeginOffset);
      unsigned EndIndex = getIndex(EndOffset);
@@ -2584,8 +2606,8 @@ private:
  
      // Mix in the existing elements.
      Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                       getName(".load"));
-    V = insertVector(IRB, Old, V, BeginIndex, getName(".vec"));
+                                       "load");
+    V = insertVector(IRB, Old, V, BeginIndex, "vec");
  
      StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
      Pass.DeadInsts.insert(&SI);
@@ -2595,17 +2617,17 @@ private:
      return true;
    }
  
-  bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
+  bool rewriteIntegerStore(Value *V, StoreInst &SI) {
      assert(IntTy && "We cannot extract an integer from the alloca");
      assert(!SI.isVolatile());
      if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
        Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
+                                         "oldload");
        Old = convertValue(TD, IRB, Old, IntTy);
        assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
        uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
        V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
-                        getName(".insert"));
+                        "insert");
      }
      V = convertValue(TD, IRB, V, NewAllocaTy);
      StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@@ -2619,7 +2641,6 @@ private:
      DEBUG(dbgs() << "    original: " << SI << "\n");
      Value *OldOp = SI.getOperand(1);
      assert(OldOp == OldPtr);
-    IRBuilder<> IRB(&SI);
  
      Value *V = SI.getValueOperand();
  
@@ -2632,26 +2653,25 @@ private:
      uint64_t Size = EndOffset - BeginOffset;
      if (Size < TD.getTypeStoreSize(V->getType())) {
        assert(!SI.isVolatile());
+      assert(IsSplit && "A seemingly split store isn't splittable");
        assert(V->getType()->isIntegerTy() &&
               "Only integer type loads and stores are split");
        assert(V->getType()->getIntegerBitWidth() ==
               TD.getTypeStoreSizeInBits(V->getType()) &&
               "Non-byte-multiple bit width");
-      assert(V->getType()->getIntegerBitWidth() ==
-             TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
-             "Only alloca-wide stores can be split and recomposed");
        IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
        V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
-                         getName(".extract"));
+                         "extract");
      }
  
      if (VecTy)
-      return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+      return rewriteVectorizedStoreInst(V, SI, OldOp);
      if (IntTy && V->getType()->isIntegerTy())
-      return rewriteIntegerStore(IRB, V, SI);
+      return rewriteIntegerStore(V, SI);
  
      StoreInst *NewSI;
      if (BeginOffset == NewAllocaBeginOffset &&
+        EndOffset == NewAllocaEndOffset &&
          canConvertValue(TD, V->getType(), NewAllocaTy)) {
        V = convertValue(TD, IRB, V, NewAllocaTy);
        NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
@@ -2675,11 +2695,11 @@ private:
    ///
    /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
    /// call this routine.
-  /// FIXME: Heed the abvice above.
+  /// FIXME: Heed the advice above.
    ///
    /// \param V The i8 value to splat.
    /// \param Size The number of bytes in the output (assuming i8 is one byte)
-  Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) {
+  Value *getIntegerSplat(Value *V, unsigned Size) {
      assert(Size > 0 && "Expected a positive number of bytes.");
      IntegerType *VTy = cast<IntegerType>(V->getType());
      assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
@@ -2687,37 +2707,25 @@ private:
        return V;
  
      Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
-    V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
+    V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"),
                        ConstantExpr::getUDiv(
                          Constant::getAllOnesValue(SplatIntTy),
                          ConstantExpr::getZExt(
                            Constant::getAllOnesValue(V->getType()),
                            SplatIntTy)),
-                      getName(".isplat"));
+                      "isplat");
      return V;
    }
  
    /// \brief Compute a vector splat for a given element value.
-  Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) {
-    assert(NumElements > 0 && "Cannot splat to an empty vector.");
-
-    // First insert it into a one-element vector so we can shuffle it. It is
-    // really silly that LLVM's IR requires this in order to form a splat.
-    Value *Undef = UndefValue::get(VectorType::get(V->getType(), 1));
-    V = IRB.CreateInsertElement(Undef, V, IRB.getInt32(0),
-                                getName(".splatinsert"));
-
-    // Shuffle the value across the desired number of elements.
-    SmallVector<Constant*, 8> Mask(NumElements, IRB.getInt32(0));
-    V = IRB.CreateShuffleVector(V, Undef, ConstantVector::get(Mask),
-                                getName(".splat"));
+  Value *getVectorSplat(Value *V, unsigned NumElements) {
+    V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
      DEBUG(dbgs() << "       splat: " << *V << "\n");
      return V;
    }
  
    bool visitMemSetInst(MemSetInst &II) {
      DEBUG(dbgs() << "    original: " << II << "\n");
-    IRBuilder<> IRB(&II);
      assert(II.getRawDest() == OldPtr);
  
      // If the memset has a variable size, it cannot be split, just adjust the
@@ -2762,8 +2770,7 @@ private:
      // a sensible representation for the alloca type. This is essentially
      // splatting the byte to a sufficiently wide integer, splatting it across
      // any desired vector width, and bitcasting to the final type.
-    uint64_t Size = EndOffset - BeginOffset;
-    Value *V = getIntegerSplat(IRB, II.getValue(), Size);
+    Value *V;
  
      if (VecTy) {
        // If this is a memset of a vectorized alloca, insert it.
@@ -2775,30 +2782,31 @@ private:
        unsigned NumElements = EndIndex - BeginIndex;
        assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
  
-      Value *Splat = getIntegerSplat(IRB, II.getValue(),
-                                     TD.getTypeSizeInBits(ElementTy)/8);
+      Value *Splat =
+          getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8);
        Splat = convertValue(TD, IRB, Splat, ElementTy);
        if (NumElements > 1)
-        Splat = getVectorSplat(IRB, Splat, NumElements);
+        Splat = getVectorSplat(Splat, NumElements);
  
        Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
-      V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec"));
+                                         "oldload");
+      V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
      } else if (IntTy) {
        // If this is a memset on an alloca where we can widen stores, insert the
        // set integer.
        assert(!II.isVolatile());
  
-      V = getIntegerSplat(IRB, II.getValue(), Size);
+      uint64_t Size = EndOffset - BeginOffset;
+      V = getIntegerSplat(II.getValue(), Size);
  
        if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
                      EndOffset != NewAllocaBeginOffset)) {
          Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                           getName(".oldload"));
+                                           "oldload");
          Old = convertValue(TD, IRB, Old, IntTy);
          assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
          uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-        V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
+        V = insertInteger(TD, IRB, Old, V, Offset, "insert");
        } else {
          assert(V->getType() == IntTy &&
                 "Wrong type for an alloca wide integer!");
@@ -2809,10 +2817,9 @@ private:
        assert(BeginOffset == NewAllocaBeginOffset);
        assert(EndOffset == NewAllocaEndOffset);
  
-      V = getIntegerSplat(IRB, II.getValue(),
-                          TD.getTypeSizeInBits(ScalarTy)/8);
+      V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8);
        if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
-        V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements());
+        V = getVectorSplat(V, AllocaVecTy->getNumElements());
  
        V = convertValue(TD, IRB, V, AllocaTy);
      }
@@ -2829,7 +2836,6 @@ private:
      // them into two categories: split intrinsics and unsplit intrinsics.
  
      DEBUG(dbgs() << "    original: " << II << "\n");
-    IRBuilder<> IRB(&II);
  
      assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
      bool IsDest = II.getRawDest() == OldPtr;
@@ -2913,8 +2919,7 @@ private:
  
        // Compute the other pointer, folding as much as possible to produce
        // a single, simple GEP in most cases.
-      OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
-                                getName("." + OtherPtr->getName()));
+      OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
  
        Value *OurPtr
          = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
@@ -2957,8 +2962,7 @@ private:
        OtherPtrTy = SubIntTy->getPointerTo();
      }
  
-    Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
-                                   getName("." + OtherPtr->getName()));
+    Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
      Value *DstPtr = &NewAI;
      if (!IsDest)
        std::swap(SrcPtr, DstPtr);
@@ -2966,31 +2970,31 @@ private:
      Value *Src;
      if (VecTy && !IsWholeAlloca && !IsDest) {
        Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                  getName(".load"));
-      Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec"));
+                                  "load");
+      Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
      } else if (IntTy && !IsWholeAlloca && !IsDest) {
        Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                  getName(".load"));
+                                  "load");
        Src = convertValue(TD, IRB, Src, IntTy);
        assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
        uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-      Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract"));
+      Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract");
      } else {
        Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
-                                  getName(".copyload"));
+                                  "copyload");
      }
  
      if (VecTy && !IsWholeAlloca && IsDest) {
        Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
-      Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec"));
+                                         "oldload");
+      Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
      } else if (IntTy && !IsWholeAlloca && IsDest) {
        Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
+                                         "oldload");
        Old = convertValue(TD, IRB, Old, IntTy);
        assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
        uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-      Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert"));
+      Src = insertInteger(TD, IRB, Old, Src, Offset, "insert");
        Src = convertValue(TD, IRB, Src, NewAllocaTy);
      }
  
@@ -3005,7 +3009,6 @@ private:
      assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
             II.getIntrinsicID() == Intrinsic::lifetime_end);
      DEBUG(dbgs() << "    original: " << II << "\n");
-    IRBuilder<> IRB(&II);
      assert(II.getArgOperand(1) == OldPtr);
  
      // Record this instruction for deletion.
@@ -3021,6 +3024,7 @@ private:
      else
        New = IRB.CreateLifetimeEnd(Ptr, Size);
  
+    (void)New;
      DEBUG(dbgs() << "          to: " << *New << "\n");
      return true;
    }
@@ -3032,7 +3036,9 @@ private:
      // as local as possible to the PHI. To do that, we re-use the location of
      // the old pointer, which necessarily must be in the right position to
      // dominate the PHI.
-    IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr));
+    IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr));
+    PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+                             ".");
  
      Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
      // Replace the operands which were using the old pointer.
@@ -3045,17 +3051,16 @@ private:
  
    bool visitSelectInst(SelectInst &SI) {
      DEBUG(dbgs() << "    original: " << SI << "\n");
-    IRBuilder<> IRB(&SI);
-
-    // Find the operand we need to rewrite here.
-    bool IsTrueVal = SI.getTrueValue() == OldPtr;
-    if (IsTrueVal)
-      assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!");
-    else
-      assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!");
+    assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
+           "Pointer isn't an operand!");
  
      Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
-    SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
+    // Replace the operands which were using the old pointer.
+    if (SI.getOperand(1) == OldPtr)
+      SI.setOperand(1, NewPtr);
+    if (SI.getOperand(2) == OldPtr)
+      SI.setOperand(2, NewPtr);
+
      DEBUG(dbgs() << "          to: " << SI << "\n");
      deleteIfTriviallyDead(OldPtr);
      return false;
@@ -3120,7 +3125,7 @@ private:
    class OpSplitter {
    protected:
      /// The builder used to form new instructions.
-    IRBuilder<> IRB;
+    IRBuilderTy IRB;
      /// The indices which to be used with insert- or extractvalue to select the
      /// appropriate value within the aggregate.
      SmallVector<unsigned, 4> Indices;
@@ -3197,9 +3202,8 @@ private:
      void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
        assert(Ty->isSingleValueType());
        // Load the single value and insert it using the indices.
-      Value *Load = IRB.CreateLoad(IRB.CreateInBoundsGEP(Ptr, GEPIndices,
-                                                         Name + ".gep"),
-                                   Name + ".load");
+      Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+      Value *Load = IRB.CreateLoad(GEP, Name + ".load");
        Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
        DEBUG(dbgs() << "          to: " << *Load << "\n");
      }
@@ -3333,12 +3337,13 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
      Type *ElementTy = SeqTy->getElementType();
      uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
      uint64_t NumSkippedElements = Offset / ElementSize;
-    if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy))
+    if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
        if (NumSkippedElements >= ArrTy->getNumElements())
          return 0;
-    if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy))
+    } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
        if (NumSkippedElements >= VecTy->getNumElements())
          return 0;
+    }
      Offset -= NumSkippedElements * ElementSize;
  
      // First check if we need to recurse.
@@ -3436,7 +3441,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
    for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
                                          UE = P.use_end(PI);
         UI != UE && !IsLive; ++UI)
-    if (UI->U)
+    if (UI->getUse())
        IsLive = true;
    if (!IsLive)
      return false; // No live uses left of this partition.
@@ -3472,7 +3477,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
    // Check for the case where we're going to rewrite to a new alloca of the
    // exact same type as the original, and with the same access offsets. In that
    // case, re-use the existing alloca, but still run through the rewriter to
-  // performe phi and select speculation.
+  // perform phi and select speculation.
    AllocaInst *NewAI;
    if (AllocaTy == AI.getAllocatedType()) {
      assert(PI->BeginOffset == 0 &&
@@ -3639,7 +3644,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
  /// If there is a domtree available, we attempt to promote using the full power
  /// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
  /// based on the SSAUpdater utilities. This function returns whether any
-/// promotion occured.
+/// promotion occurred.
  bool SROA::promoteAllocas(Function &F) {
    if (PromotableAllocas.empty())
      return false;