[SROA] Fix PR24463, a crash I introduced in SROA by allowing it to

[oota-llvm.git] / lib / Transforms / Scalar / SROA.cpp
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp

index f38b2b1dbf96e92d4c669d4f45feb2795f07da64..cd9f8bfaa683e957fb4266f69b952e20596ce4f8 100644 (file)
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -55,7 +55,6 @@
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Transforms/Utils/Local.h"
  #include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
  
  #if __cplusplus >= 201103L && !defined(NDEBUG)
  // We only use this for a debug check in C++11
@@ -77,11 +76,6 @@ STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
  STATISTIC(NumDeleted, "Number of instructions deleted");
  STATISTIC(NumVectorized, "Number of vectorized aggregates");
  
-/// Hidden option to force the pass to not use DomTree and mem2reg, instead
-/// forming SSA values through the SSAUpdater infrastructure.
-static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false),
-                                     cl::Hidden);
-
  /// Hidden option to enable randomly shuffling the slices to help uncover
  /// instability in their order.
  static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
@@ -127,7 +121,7 @@ typedef llvm::IRBuilder<true, ConstantFolder, IRBuilderPrefixedInserter<true>>
  typedef llvm::IRBuilder<false, ConstantFolder, IRBuilderPrefixedInserter<false>>
      IRBuilderTy;
  #endif
-} // namespace
+}
  
  namespace {
  /// \brief A used slice of an alloca.
@@ -270,7 +264,8 @@ public:
      friend class AllocaSlices;
      friend class AllocaSlices::partition_iterator;
  
-    /// \brief The begining and ending offsets of the alloca for this partition.
+    /// \brief The beginning and ending offsets of the alloca for this
+    /// partition.
      uint64_t BeginOffset, EndOffset;
  
      /// \brief The start end end iterators of this partition.
@@ -439,7 +434,7 @@ public:
  
        // OK, we need to consume new slices. Set the end offset based on the
        // current slice, and step SJ past it. The beginning offset of the
-      // parttion is the beginning offset of the next slice unless we have
+      // partition is the beginning offset of the next slice unless we have
        // pre-existing split slices that are continuing, in which case we begin
        // at the prior end offset.
        P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
@@ -493,7 +488,7 @@ public:
               "End iterators don't match between compared partition iterators!");
  
        // The observed positions of partitions is marked by the P.SI iterator and
-      // the emptyness of the split slices. The latter is only relevant when
+      // the emptiness of the split slices. The latter is only relevant when
        // P.SI == SE, as the end iterator will additionally have an empty split
        // slices list, but the prior may have the same P.SI and a tail of split
        // slices.
@@ -595,7 +590,7 @@ private:
    /// the alloca.
    SmallVector<Use *, 8> DeadOperands;
  };
-} // namespace
+}
  
  static Value *foldSelectInst(SelectInst &SI) {
    // If the condition being selected on is a constant or the same value is
@@ -1072,109 +1067,6 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
  
  #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  
-namespace {
-/// \brief Implementation of LoadAndStorePromoter for promoting allocas.
-///
-/// This subclass of LoadAndStorePromoter adds overrides to handle promoting
-/// the loads and stores of an alloca instruction, as well as updating its
-/// debug information. This is used when a domtree is unavailable and thus
-/// mem2reg in its full form can't be used to handle promotion of allocas to
-/// scalar values.
-class AllocaPromoter : public LoadAndStorePromoter {
-  AllocaInst &AI;
-  DIBuilder &DIB;
-
-  SmallVector<DbgDeclareInst *, 4> DDIs;
-  SmallVector<DbgValueInst *, 4> DVIs;
-
-public:
-  AllocaPromoter(ArrayRef<const Instruction *> Insts,
-                 SSAUpdater &S,
-                 AllocaInst &AI, DIBuilder &DIB)
-      : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
-
-  void run(const SmallVectorImpl<Instruction *> &Insts) {
-    // Retain the debug information attached to the alloca for use when
-    // rewriting loads and stores.
-    if (auto *L = LocalAsMetadata::getIfExists(&AI)) {
-      if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
-        for (User *U : DINode->users())
-          if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
-            DDIs.push_back(DDI);
-          else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
-            DVIs.push_back(DVI);
-      }
-    }
-
-    LoadAndStorePromoter::run(Insts);
-
-    // While we have the debug information, clear it off of the alloca. The
-    // caller takes care of deleting the alloca.
-    while (!DDIs.empty())
-      DDIs.pop_back_val()->eraseFromParent();
-    while (!DVIs.empty())
-      DVIs.pop_back_val()->eraseFromParent();
-  }
-
-  bool
-  isInstInList(Instruction *I,
-               const SmallVectorImpl<Instruction *> &Insts) const override {
-    Value *Ptr;
-    if (LoadInst *LI = dyn_cast<LoadInst>(I))
-      Ptr = LI->getOperand(0);
-    else
-      Ptr = cast<StoreInst>(I)->getPointerOperand();
-
-    // Only used to detect cycles, which will be rare and quickly found as
-    // we're walking up a chain of defs rather than down through uses.
-    SmallPtrSet<Value *, 4> Visited;
-
-    do {
-      if (Ptr == &AI)
-        return true;
-
-      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
-        Ptr = BCI->getOperand(0);
-      else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
-        Ptr = GEPI->getPointerOperand();
-      else
-        return false;
-
-    } while (Visited.insert(Ptr).second);
-
-    return false;
-  }
-
-  void updateDebugInfo(Instruction *Inst) const override {
-    for (DbgDeclareInst *DDI : DDIs)
-      if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
-        ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
-      else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
-        ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
-    for (DbgValueInst *DVI : DVIs) {
-      Value *Arg = nullptr;
-      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-        // If an argument is zero extended then use argument directly. The ZExt
-        // may be zapped by an optimization pass in future.
-        if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
-          Arg = dyn_cast<Argument>(ZExt->getOperand(0));
-        else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
-          Arg = dyn_cast<Argument>(SExt->getOperand(0));
-        if (!Arg)
-          Arg = SI->getValueOperand();
-      } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-        Arg = LI->getPointerOperand();
-      } else {
-        continue;
-      }
-      DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
-                                  DVI->getExpression(), DVI->getDebugLoc(),
-                                  Inst);
-    }
-  }
-};
-} // namespace
-
  namespace {
  /// \brief An optimization pass providing Scalar Replacement of Aggregates.
  ///
@@ -1195,8 +1087,6 @@ namespace {
  ///    this form. By doing so, it will enable promotion of vector aggregates to
  ///    SSA vector values.
  class SROA : public FunctionPass {
-  const bool RequiresDomTree;
-
    LLVMContext *C;
    DominatorTree *DT;
    AssumptionCache *AC;
@@ -1244,9 +1134,7 @@ class SROA : public FunctionPass {
    SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
  
  public:
-  SROA(bool RequiresDomTree = true)
-      : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr),
-        DT(nullptr) {
+  SROA() : FunctionPass(ID), C(nullptr), DT(nullptr) {
      initializeSROAPass(*PassRegistry::getPassRegistry());
    }
    bool runOnFunction(Function &F) override;
@@ -1268,12 +1156,12 @@ private:
    void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
    bool promoteAllocas(Function &F);
  };
-} // namespace
+}
  
  char SROA::ID = 0;
  
-FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
-  return new SROA(RequiresDomTree);
+FunctionPass *llvm::createSROAPass() {
+  return new SROA();
  }
  
  INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
@@ -1847,10 +1735,17 @@ static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset,
  static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
    if (OldTy == NewTy)
      return true;
-  if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
-    if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
-      if (NewITy->getBitWidth() >= OldITy->getBitWidth())
-        return true;
+
+  // For integer types, we can't handle any bit-width differences. This would
+  // break both vector conversions with extension and introduce endianness
+  // issues when in conjunction with loads and stores.
+  if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
+    assert(cast<IntegerType>(OldTy)->getBitWidth() !=
+               cast<IntegerType>(NewTy)->getBitWidth() &&
+           "We can't have the same bitwidth for different int types");
+    return false;
+  }
+
    if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
      return false;
    if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -1885,10 +1780,8 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
    if (OldTy == NewTy)
      return V;
  
-  if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
-    if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
-      if (NewITy->getBitWidth() > OldITy->getBitWidth())
-        return IRB.CreateZExt(V, NewITy);
+  assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
+         "Integer types must be the exact same to convert.");
  
    // See if we need inttoptr for this type pair. A cast involving both scalars
    // and vectors requires and additional bitcast.
@@ -1929,7 +1822,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
  
  /// \brief Test whether the given slice use can be promoted to a vector.
  ///
-/// This function is called to test each entry in a partioning which is slated
+/// This function is called to test each entry in a partition which is slated
  /// for a single slice.
  static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
                                              const Slice &S, VectorType *Ty,
@@ -2125,7 +2018,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
    uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
  
    // We can't reasonably handle cases where the load or store extends past
-  // the end of the aloca's type and into its padding.
+  // the end of the alloca's type and into its padding.
    if (RelEnd > Size)
      return false;
  
@@ -2134,6 +2027,9 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
    if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
      if (LI->isVolatile())
        return false;
+    // We can't handle loads that extend past the allocated memory.
+    if (DL.getTypeStoreSize(LI->getType()) > Size)
+      return false;
      // Note that we don't count vector loads or stores as whole-alloca
      // operations which enable integer widening because we would prefer to use
      // vector widening instead.
@@ -2152,6 +2048,9 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
      Type *ValueTy = SI->getValueOperand()->getType();
      if (SI->isVolatile())
        return false;
+    // We can't handle stores that extend past the allocated memory.
+    if (DL.getTypeStoreSize(ValueTy) > Size)
+      return false;
      // Note that we don't count vector loads or stores as whole-alloca
      // operations which enable integer widening because we would prefer to use
      // vector widening instead.
@@ -2572,9 +2471,19 @@ private:
      V = convertValue(DL, IRB, V, IntTy);
      assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
      uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
-    if (Offset > 0 || NewEndOffset < NewAllocaEndOffset)
-      V = extractInteger(DL, IRB, V, cast<IntegerType>(LI.getType()), Offset,
-                         "extract");
+    if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
+      IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
+      V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
+    }
+    // It is possible that the extracted type is not the load type. This
+    // happens if there is a load past the end of the alloca, and as
+    // a consequence the slice is narrower but still a candidate for integer
+    // lowering. To handle this case, we just zero extend the extracted
+    // integer.
+    assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
+           "Can only handle an extract for an overly wide load");
+    if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
+      V = IRB.CreateZExt(V, LI.getType());
      return V;
    }
  
@@ -2585,6 +2494,7 @@ private:
  
      Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
                               : LI.getType();
+    const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
      bool IsPtrAdjusted = false;
      Value *V;
      if (VecTy) {
@@ -2592,14 +2502,36 @@ private:
      } else if (IntTy && LI.getType()->isIntegerTy()) {
        V = rewriteIntegerLoad(LI);
      } else if (NewBeginOffset == NewAllocaBeginOffset &&
-               canConvertValue(DL, NewAllocaTy, LI.getType())) {
-      V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(),
-                                LI.getName());
+               NewEndOffset == NewAllocaEndOffset &&
+               (canConvertValue(DL, NewAllocaTy, TargetTy) ||
+                (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
+                 TargetTy->isIntegerTy()))) {
+      LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                              LI.isVolatile(), LI.getName());
+      if (LI.isVolatile())
+        NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+      V = NewLI;
+
+      // If this is an integer load past the end of the slice (which means the
+      // bytes outside the slice are undef or this load is dead) just forcibly
+      // fix the integer size with correct handling of endianness.
+      if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
+        if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
+          if (AITy->getBitWidth() < TITy->getBitWidth()) {
+            V = IRB.CreateZExt(V, TITy, "load.ext");
+            if (DL.isBigEndian())
+              V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
+                                "endian_shift");
+          }
      } else {
        Type *LTy = TargetTy->getPointerTo();
-      V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
-                                getSliceAlign(TargetTy), LI.isVolatile(),
-                                LI.getName());
+      LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
+                                              getSliceAlign(TargetTy),
+                                              LI.isVolatile(), LI.getName());
+      if (LI.isVolatile())
+        NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
+      V = NewLI;
        IsPtrAdjusted = true;
      }
      V = convertValue(DL, IRB, V, TargetTy);
@@ -2710,10 +2642,25 @@ private:
      if (IntTy && V->getType()->isIntegerTy())
        return rewriteIntegerStore(V, SI);
  
+    const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize;
      StoreInst *NewSI;
      if (NewBeginOffset == NewAllocaBeginOffset &&
          NewEndOffset == NewAllocaEndOffset &&
-        canConvertValue(DL, V->getType(), NewAllocaTy)) {
+        (canConvertValue(DL, V->getType(), NewAllocaTy) ||
+         (IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
+          V->getType()->isIntegerTy()))) {
+      // If this is an integer store past the end of slice (and thus the bytes
+      // past that point are irrelevant or this is unreachable), truncate the
+      // value prior to storing.
+      if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
+        if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
+          if (VITy->getBitWidth() > AITy->getBitWidth()) {
+            if (DL.isBigEndian())
+              V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
+                                 "endian_shift");
+            V = IRB.CreateTrunc(V, AITy, "load.trunc");
+          }
+
        V = convertValue(DL, IRB, V, NewAllocaTy);
        NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
                                       SI.isVolatile());
@@ -2722,7 +2669,8 @@ private:
        NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
                                       SI.isVolatile());
      }
-    (void)NewSI;
+    if (SI.isVolatile())
+      NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope());
      Pass.DeadInsts.insert(&SI);
      deleteIfTriviallyDead(OldOp);
  
@@ -3119,7 +3067,7 @@ private:
      return true;
    }
  };
-} // namespace
+}
  
  namespace {
  /// \brief Visitor to rewrite aggregate loads and stores as scalar.
@@ -3327,7 +3275,7 @@ private:
      return false;
    }
  };
-} // namespace
+}
  
  /// \brief Strip aggregate type wrapping.
  ///
@@ -3661,7 +3609,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
                         return true;
                       }),
        Stores.end());
-  // Now we have to go *back* through all te stores, because a later store may
+  // Now we have to go *back* through all the stores, because a later store may
    // have caused an earlier store's load to become unsplittable and if it is
    // unsplittable for the later store, then we can't rely on it being split in
    // the earlier store either.
@@ -3922,7 +3870,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
  
      // Mark the original store as dead now that we've split it up and kill its
      // slice. Note that we leave the original load in place unless this store
-    // was its ownly use. It may in turn be split up if it is an alloca load
+    // was its only use. It may in turn be split up if it is an alloca load
      // for some other alloca, but it may be a normal load. This may introduce
      // redundant loads, but where those can be merged the rest of the optimizer
      // should handle the merging, and this uncovers SSA splits which is more
@@ -4180,7 +4128,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
        std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
  
    // Migrate debug information from the old alloca to the new alloca(s)
-  // and the individial partitions.
+  // and the individual partitions.
    if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
      auto *Var = DbgDecl->getVariable();
      auto *Expr = DbgDecl->getExpression();
@@ -4338,93 +4286,19 @@ void SROA::deleteDeadInstructions(
    }
  }
  
-static void enqueueUsersInWorklist(Instruction &I,
-                                   SmallVectorImpl<Instruction *> &Worklist,
-                                   SmallPtrSetImpl<Instruction *> &Visited) {
-  for (User *U : I.users())
-    if (Visited.insert(cast<Instruction>(U)).second)
-      Worklist.push_back(cast<Instruction>(U));
-}
-
  /// \brief Promote the allocas, using the best available technique.
  ///
  /// This attempts to promote whatever allocas have been identified as viable in
  /// the PromotableAllocas list. If that list is empty, there is nothing to do.
-/// If there is a domtree available, we attempt to promote using the full power
-/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
-/// based on the SSAUpdater utilities. This function returns whether any
-/// promotion occurred.
+/// This function returns whether any promotion occurred.
  bool SROA::promoteAllocas(Function &F) {
    if (PromotableAllocas.empty())
      return false;
  
    NumPromoted += PromotableAllocas.size();
  
-  if (DT && !ForceSSAUpdater) {
-    DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
-    PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
-    PromotableAllocas.clear();
-    return true;
-  }
-
-  DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
-  SSAUpdater SSA;
-  DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
-  SmallVector<Instruction *, 64> Insts;
-
-  // We need a worklist to walk the uses of each alloca.
-  SmallVector<Instruction *, 8> Worklist;
-  SmallPtrSet<Instruction *, 8> Visited;
-  SmallVector<Instruction *, 32> DeadInsts;
-
-  for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
-    AllocaInst *AI = PromotableAllocas[Idx];
-    Insts.clear();
-    Worklist.clear();
-    Visited.clear();
-
-    enqueueUsersInWorklist(*AI, Worklist, Visited);
-
-    while (!Worklist.empty()) {
-      Instruction *I = Worklist.pop_back_val();
-
-      // FIXME: Currently the SSAUpdater infrastructure doesn't reason about
-      // lifetime intrinsics and so we strip them (and the bitcasts+GEPs
-      // leading to them) here. Eventually it should use them to optimize the
-      // scalar values produced.
-      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-        assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
-               II->getIntrinsicID() == Intrinsic::lifetime_end);
-        II->eraseFromParent();
-        continue;
-      }
-
-      // Push the loads and stores we find onto the list. SROA will already
-      // have validated that all loads and stores are viable candidates for
-      // promotion.
-      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        assert(LI->getType() == AI->getAllocatedType());
-        Insts.push_back(LI);
-        continue;
-      }
-      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-        assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
-        Insts.push_back(SI);
-        continue;
-      }
-
-      // For everything else, we know that only no-op bitcasts and GEPs will
-      // make it this far, just recurse through them and recall them for later
-      // removal.
-      DeadInsts.push_back(I);
-      enqueueUsersInWorklist(*I, Worklist, Visited);
-    }
-    AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
-    while (!DeadInsts.empty())
-      DeadInsts.pop_back_val()->eraseFromParent();
-    AI->eraseFromParent();
-  }
-
+  DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
+  PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
    PromotableAllocas.clear();
    return true;
  }
@@ -4435,9 +4309,7 @@ bool SROA::runOnFunction(Function &F) {
  
    DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
    C = &F.getContext();
-  DominatorTreeWrapperPass *DTWP =
-      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  
    BasicBlock &EntryBB = F.getEntryBlock();
@@ -4482,7 +4354,6 @@ bool SROA::runOnFunction(Function &F) {
  
  void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.addRequired<AssumptionCacheTracker>();
-  if (RequiresDomTree)
-    AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addRequired<DominatorTreeWrapperPass>();
    AU.setPreservesCFG();
  }