[FunctionAttrs] Inline the prototype attribute inference to an existing

[oota-llvm.git] / lib / Transforms / IPO / ArgumentPromotion.cpp
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp

index df08091ac5bf72965d30c0fbd55602ffa2df7f9b..bdd1b61f9b2b1ec8161a8bf70417d0e91b290ea1 100644 (file)
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -29,26 +29,33 @@
  //
  //===----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "argpromotion"
  #include "llvm/Transforms/IPO.h"
  #include "llvm/ADT/DepthFirstIterator.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/ADT/StringExtras.h"
  #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
  #include "llvm/Analysis/CallGraph.h"
  #include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
  #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
  #include "llvm/IR/DerivedTypes.h"
  #include "llvm/IR/Instructions.h"
  #include "llvm/IR/LLVMContext.h"
  #include "llvm/IR/Module.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CallSite.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/raw_ostream.h"
  #include <set>
  using namespace llvm;
  
+#define DEBUG_TYPE "argpromotion"
+
  STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
  STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
  STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
@@ -58,12 +65,13 @@ namespace {
    /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
    ///
    struct ArgPromotion : public CallGraphSCCPass {
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<AliasAnalysis>();
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<AssumptionCacheTracker>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
        CallGraphSCCPass::getAnalysisUsage(AU);
      }
  
-    virtual bool runOnSCC(CallGraphSCC &SCC);
+    bool runOnSCC(CallGraphSCC &SCC) override;
      static char ID; // Pass identification, replacement for typeid
      explicit ArgPromotion(unsigned maxElements = 3)
          : CallGraphSCCPass(ID), maxElements(maxElements) {
@@ -74,21 +82,29 @@ namespace {
      typedef std::vector<uint64_t> IndicesVector;
  
    private:
+    bool isDenselyPacked(Type *type, const DataLayout &DL);
+    bool canPaddingBeAccessed(Argument *Arg);
      CallGraphNode *PromoteArguments(CallGraphNode *CGN);
-    bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+    bool isSafeToPromoteArgument(Argument *Arg, bool isByVal,
+                                 AAResults &AAR) const;
      CallGraphNode *DoPromotion(Function *F,
-                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
-                               SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+                              SmallPtrSetImpl<Argument*> &ArgsToPromote,
+                              SmallPtrSetImpl<Argument*> &ByValArgsToTransform);
+    
+    using llvm::Pass::doInitialization;
+    bool doInitialization(CallGraph &CG) override;
      /// The maximum number of elements to expand, or 0 for unlimited.
      unsigned maxElements;
+    DenseMap<const Function *, DISubprogram *> FunctionDIs;
    };
  }
  
  char ArgPromotion::ID = 0;
  INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
                  "Promote 'by reference' arguments to scalars", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
                  "Promote 'by reference' arguments to scalars", false, false)
  
@@ -114,6 +130,79 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
    return Changed;
  }
  
+/// \brief Checks if a type could have padding bytes.
+bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) {
+
+  // There is no size information, so be conservative.
+  if (!type->isSized())
+    return false;
+
+  // If the alloc size is not equal to the storage size, then there are padding
+  // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
+  if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
+    return false;
+
+  if (!isa<CompositeType>(type))
+    return true;
+
+  // For homogenous sequential types, check for padding within members.
+  if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
+    return isa<PointerType>(seqTy) ||
+           isDenselyPacked(seqTy->getElementType(), DL);
+
+  // Check for padding within and between elements of a struct.
+  StructType *StructTy = cast<StructType>(type);
+  const StructLayout *Layout = DL.getStructLayout(StructTy);
+  uint64_t StartPos = 0;
+  for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
+    Type *ElTy = StructTy->getElementType(i);
+    if (!isDenselyPacked(ElTy, DL))
+      return false;
+    if (StartPos != Layout->getElementOffsetInBits(i))
+      return false;
+    StartPos += DL.getTypeAllocSizeInBits(ElTy);
+  }
+
+  return true;
+}
+
+/// \brief Checks if the padding bytes of an argument could be accessed.
+bool ArgPromotion::canPaddingBeAccessed(Argument *arg) {
+
+  assert(arg->hasByValAttr());
+
+  // Track all the pointers to the argument to make sure they are not captured.
+  SmallPtrSet<Value *, 16> PtrValues;
+  PtrValues.insert(arg);
+
+  // Track all of the stores.
+  SmallVector<StoreInst *, 16> Stores;
+
+  // Scan through the uses recursively to make sure the pointer is always used
+  // sanely.
+  SmallVector<Value *, 16> WorkList;
+  WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
+  while (!WorkList.empty()) {
+    Value *V = WorkList.back();
+    WorkList.pop_back();
+    if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
+      if (PtrValues.insert(V).second)
+        WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
+    } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
+      Stores.push_back(Store);
+    } else if (!isa<LoadInst>(V)) {
+      return true;
+    }
+  }
+
+// Check to make sure the pointers aren't captured
+  for (StoreInst *Store : Stores)
+    if (PtrValues.count(Store->getValueOperand()))
+      return true;
+
+  return false;
+}
+
  /// PromoteArguments - This method checks the specified function to see if there
  /// are any promotable arguments and if it is safe to promote the function (for
  /// example, all callers are direct).  If safe to promote some arguments, it
@@ -123,29 +212,45 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
    Function *F = CGN->getFunction();
  
    // Make sure that it is local to this module.
-  if (!F || !F->hasLocalLinkage()) return 0;
+  if (!F || !F->hasLocalLinkage()) return nullptr;
+
+  // Don't promote arguments for variadic functions. Adding, removing, or
+  // changing non-pack parameters can change the classification of pack
+  // parameters. Frontends encode that classification at the call site in the
+  // IR, while in the callee the classification is determined dynamically based
+  // on the number of registers consumed so far.
+  if (F->isVarArg()) return nullptr;
  
    // First check: see if there are any pointer arguments!  If not, quick exit.
    SmallVector<Argument*, 16> PointerArgs;
-  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
-    if (I->getType()->isPointerTy())
-      PointerArgs.push_back(I);
-  if (PointerArgs.empty()) return 0;
+  for (Argument &I : F->args())
+    if (I.getType()->isPointerTy())
+      PointerArgs.push_back(&I);
+  if (PointerArgs.empty()) return nullptr;
  
    // Second check: make sure that all callers are direct callers.  We can't
    // transform functions that have indirect callers.  Also see if the function
    // is self-recursive.
    bool isSelfRecursive = false;
-  for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
-       UI != E; ++UI) {
-    CallSite CS(*UI);
+  for (Use &U : F->uses()) {
+    CallSite CS(U.getUser());
      // Must be a direct call.
-    if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0;
+    if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
      
      if (CS.getInstruction()->getParent()->getParent() == F)
        isSelfRecursive = true;
    }
    
+  const DataLayout &DL = F->getParent()->getDataLayout();
+
+  // We need to manually construct BasicAA directly in order to disable its use
+  // of other function analyses.
+  BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F));
+
+  // Construct our own AA results for this function. We do this manually to
+  // work around the limitations of the legacy pass manager.
+  AAResults AAR(createLegacyPMAAResults(*this, *F, BAR));
+
    // Check to see which arguments are promotable.  If an argument is promotable,
    // add it to ArgsToPromote.
    SmallPtrSet<Argument*, 8> ArgsToPromote;
@@ -154,9 +259,32 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
      Argument *PtrArg = PointerArgs[i];
      Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
  
+    // Replace sret attribute with noalias. This reduces register pressure by
+    // avoiding a register copy.
+    if (PtrArg->hasStructRetAttr()) {
+      unsigned ArgNo = PtrArg->getArgNo();
+      F->setAttributes(
+          F->getAttributes()
+              .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
+              .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+      for (Use &U : F->uses()) {
+        CallSite CS(U.getUser());
+        CS.setAttributes(
+            CS.getAttributes()
+                .removeAttribute(F->getContext(), ArgNo + 1,
+                                 Attribute::StructRet)
+                .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+      }
+    }
+
      // If this is a byval argument, and if the aggregate type is small, just
-    // pass the elements, which is always safe.
-    if (PtrArg->hasByValAttr()) {
+    // pass the elements, which is always safe, if the passed value is densely
+    // packed or if we can prove the padding bytes are never accessed. This does
+    // not apply to inalloca.
+    bool isSafeToPromote =
+        PtrArg->hasByValAttr() &&
+        (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
+    if (isSafeToPromote) {
        if (StructType *STy = dyn_cast<StructType>(AgTy)) {
          if (maxElements > 0 && STy->getNumElements() > maxElements) {
            DEBUG(dbgs() << "argpromotion disable promoting argument '"
@@ -167,8 +295,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
          
          // If all the elements are single-value types, we can promote it.
          bool AllSimple = true;
-        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          if (!STy->getElementType(i)->isSingleValueType()) {
+        for (const auto *EltTy : STy->elements()) {
+          if (!EltTy->isSingleValueType()) {
              AllSimple = false;
              break;
            }
@@ -189,8 +317,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
      if (isSelfRecursive) {
        if (StructType *STy = dyn_cast<StructType>(AgTy)) {
          bool RecursiveType = false;
-        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          if (STy->getElementType(i) == PtrArg->getType()) {
+        for (const auto *EltTy : STy->elements()) {
+          if (EltTy == PtrArg->getType()) {
              RecursiveType = true;
              break;
            }
@@ -201,13 +329,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
      }
      
      // Otherwise, see if we can promote the pointer to its value.
-    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr()))
+    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR))
        ArgsToPromote.insert(PtrArg);
    }
  
    // No promotable pointer arguments.
    if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
-    return 0;
+    return nullptr;
  
    return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
  }
@@ -216,17 +344,17 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
  /// all callees pass in a valid pointer for the specified function argument.
  static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
    Function *Callee = Arg->getParent();
+  const DataLayout &DL = Callee->getParent()->getDataLayout();
  
    unsigned ArgNo = Arg->getArgNo();
  
    // Look at all call sites of the function.  At this pointer we know we only
    // have direct callees.
-  for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end();
-       UI != E; ++UI) {
-    CallSite CS(*UI);
+  for (User *U : Callee->users()) {
+    CallSite CS(U);
      assert(CS && "Should only have direct calls!");
  
-    if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
+    if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
        return false;
    }
    return true;
@@ -301,7 +429,9 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
  /// This method limits promotion of aggregates to only promote up to three
  /// elements of the aggregate in order to avoid exploding the number of
  /// arguments passed in.
-bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
+bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
+                                           bool isByValOrInAlloca,
+                                           AAResults &AAR) const {
    typedef std::set<IndicesVector> GEPIndicesSet;
  
    // Quick exit for unused arguments
@@ -323,6 +453,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
    //
    // This set will contain all sets of indices that are loaded in the entry
    // block, and thus are safe to unconditionally load in the caller.
+  //
+  // This optimization is also safe for InAlloca parameters, because it verifies
+  // that the address isn't captured.
    GEPIndicesSet SafeToUnconditionallyLoad;
  
    // This set contains all the sets of indices that we are planning to promote.
@@ -330,17 +463,16 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
    GEPIndicesSet ToPromote;
  
    // If the pointer is always valid, any load with first index 0 is valid.
-  if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
+  if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
      SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
  
    // First, iterate the entry block and mark loads of (geps of) arguments as
    // safe.
-  BasicBlock *EntryBlock = Arg->getParent()->begin();
+  BasicBlock &EntryBlock = Arg->getParent()->front();
    // Declare this here so we can reuse it
    IndicesVector Indices;
-  for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
-       I != E; ++I)
-    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+  for (Instruction &I : EntryBlock)
+    if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
        Value *V = LI->getPointerOperand();
        if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
          V = GEP->getPointerOperand();
@@ -370,26 +502,24 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
    // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
    SmallVector<LoadInst*, 16> Loads;
    IndicesVector Operands;
-  for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
-       UI != E; ++UI) {
-    User *U = *UI;
+  for (Use &U : Arg->uses()) {
+    User *UR = U.getUser();
      Operands.clear();
-    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(UR)) {
        // Don't hack volatile/atomic loads
        if (!LI->isSimple()) return false;
        Loads.push_back(LI);
        // Direct loads are equivalent to a GEP with a zero index and then a load.
        Operands.push_back(0);
-    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) {
        if (GEP->use_empty()) {
          // Dead GEP's cause trouble later.  Just remove them if we run into
          // them.
-        getAnalysis<AliasAnalysis>().deleteValue(GEP);
          GEP->eraseFromParent();
          // TODO: This runs the above loop over and over again for dead GEPs
          // Couldn't we just do increment the UI iterator earlier and erase the
          // use?
-        return isSafeToPromoteArgument(Arg, isByVal);
+        return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR);
        }
  
        // Ensure that all of the indices are constants.
@@ -401,9 +531,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
            return false;  // Not a constant operand GEP!
  
        // Ensure that the only users of the GEP are load instructions.
-      for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
-           UI != E; ++UI)
-        if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+      for (User *GEPU : GEP->users())
+        if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) {
            // Don't hack volatile/atomic loads
            if (!LI->isSimple()) return false;
            Loads.push_back(LI);
@@ -432,7 +561,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
          // of elements of the aggregate.
          return false;
        }
-      ToPromote.insert(Operands);
+      ToPromote.insert(std::move(Operands));
      }
    }
  
@@ -447,27 +576,22 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
    // blocks we know to be transparent to the load.
    SmallPtrSet<BasicBlock*, 16> TranspBlocks;
  
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
    for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
      // Check to see if the load is invalidated from the start of the block to
      // the load itself.
      LoadInst *Load = Loads[i];
      BasicBlock *BB = Load->getParent();
  
-    AliasAnalysis::Location Loc = AA.getLocation(Load);
-    if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
+    MemoryLocation Loc = MemoryLocation::get(Load);
+    if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
        return false;  // Pointer is invalidated!
  
      // Now check every path from the entry block to the load for transparency.
      // To do this, we perform a depth first search on the inverse CFG from the
      // loading block.
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-      BasicBlock *P = *PI;
-      for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
-             I = idf_ext_begin(P, TranspBlocks),
-             E = idf_ext_end(P, TranspBlocks); I != E; ++I)
-        if (AA.canBasicBlockModify(**I, Loc))
+    for (BasicBlock *P : predecessors(BB)) {
+      for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
+        if (AAR.canBasicBlockModify(*TranspBB, Loc))
            return false;
      }
    }
@@ -482,15 +606,15 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
  /// arguments, and returns the new function.  At this point, we know that it's
  /// safe to do so.
  CallGraphNode *ArgPromotion::DoPromotion(Function *F,
-                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
-                              SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
+                             SmallPtrSetImpl<Argument*> &ArgsToPromote,
+                             SmallPtrSetImpl<Argument*> &ByValArgsToTransform) {
  
    // Start by computing a new prototype for the function, which is the same as
    // the old function, but has modified arguments.
    FunctionType *FTy = F->getFunctionType();
    std::vector<Type*> Params;
  
-  typedef std::set<IndicesVector> ScalarizeTable;
+  typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
  
    // ScalarizedElements - If we are promoting a pointer that has elements
    // accessed out of it, keep track of which elements are accessed so that we
@@ -523,14 +647,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
    unsigned ArgIndex = 1;
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
         ++I, ++ArgIndex) {
-    if (ByValArgsToTransform.count(I)) {
+    if (ByValArgsToTransform.count(&*I)) {
        // Simple byval argument? Just add all the struct element types.
        Type *AgTy = cast<PointerType>(I->getType())->getElementType();
        StructType *STy = cast<StructType>(AgTy);
-      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
-        Params.push_back(STy->getElementType(i));
+      Params.insert(Params.end(), STy->element_begin(), STy->element_end());
        ++NumByValArgsPromoted;
-    } else if (!ArgsToPromote.count(I)) {
+    } else if (!ArgsToPromote.count(&*I)) {
        // Unchanged argument
        Params.push_back(I->getType());
        AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
@@ -548,41 +671,46 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
  
        // In this table, we will track which indices are loaded from the argument
        // (where direct loads are tracked as no indices).
-      ScalarizeTable &ArgIndices = ScalarizedElements[I];
-      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
-           ++UI) {
-        Instruction *User = cast<Instruction>(*UI);
-        assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
+      ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+      for (User *U : I->users()) {
+        Instruction *UI = cast<Instruction>(U);
+        Type *SrcTy;
+        if (LoadInst *L = dyn_cast<LoadInst>(UI))
+          SrcTy = L->getType();
+        else
+          SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
          IndicesVector Indices;
-        Indices.reserve(User->getNumOperands() - 1);
+        Indices.reserve(UI->getNumOperands() - 1);
          // Since loads will only have a single operand, and GEPs only a single
          // non-index operand, this will record direct loads without any indices,
          // and gep+loads with the GEP indices.
-        for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
+        for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
               II != IE; ++II)
            Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
          // GEPs with a single 0 index can be merged with direct loads
          if (Indices.size() == 1 && Indices.front() == 0)
            Indices.clear();
-        ArgIndices.insert(Indices);
+        ArgIndices.insert(std::make_pair(SrcTy, Indices));
          LoadInst *OrigLoad;
-        if (LoadInst *L = dyn_cast<LoadInst>(User))
+        if (LoadInst *L = dyn_cast<LoadInst>(UI))
            OrigLoad = L;
          else
            // Take any load, we will use it only to update Alias Analysis
-          OrigLoad = cast<LoadInst>(User->use_back());
-        OriginalLoads[std::make_pair(I, Indices)] = OrigLoad;
+          OrigLoad = cast<LoadInst>(UI->user_back());
+        OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
        }
  
        // Add a parameter to the function for each element passed in.
        for (ScalarizeTable::iterator SI = ArgIndices.begin(),
               E = ArgIndices.end(); SI != E; ++SI) {
          // not allowed to dereference ->begin() if size() is 0
-        Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI));
+        Params.push_back(GetElementPtrInst::getIndexedType(
+            cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+            SI->second));
          assert(Params.back());
        }
  
-      if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+      if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
          ++NumArgumentsPromoted;
        else
          ++NumAggregatesPromoted;
@@ -603,7 +731,17 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
    Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
    NF->copyAttributesFrom(F);
  
-  
+  // Patch the pointer to LLVM function in debug info descriptor.
+  auto DI = FunctionDIs.find(F);
+  if (DI != FunctionDIs.end()) {
+    DISubprogram *SP = DI->second;
+    SP->replaceFunction(NF);
+    // Ensure the map is updated so it can be reused on subsequent argument
+    // promotions of the same function.
+    FunctionDIs.erase(DI);
+    FunctionDIs[NF] = SP;
+  }
+
    DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
          << "From: " << *F);
    
@@ -612,17 +750,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
    NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
    AttributesVec.clear();
  
-  F->getParent()->getFunctionList().insert(F, NF);
+  F->getParent()->getFunctionList().insert(F->getIterator(), NF);
    NF->takeName(F);
  
-  // Get the alias analysis information that we need to update to reflect our
-  // changes.
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
    // Get the callgraph information that we need to update to reflect our
    // changes.
-  CallGraph &CG = getAnalysis<CallGraph>();
-  
+  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+
    // Get a new callgraph node for NF.
    CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
  
@@ -631,7 +765,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
    //
    SmallVector<Value*, 16> Args;
    while (!F->use_empty()) {
-    CallSite CS(F->use_back());
+    CallSite CS(F->user_back());
      assert(CS.getCalledFunction() == F);
      Instruction *Call = CS.getInstruction();
      const AttributeSet &CallPAL = CS.getAttributes();
@@ -647,7 +781,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
      ArgIndex = 1;
      for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
           I != E; ++I, ++AI, ++ArgIndex)
-      if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+      if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
          Args.push_back(*AI);          // Unmodified argument
  
          if (CallPAL.hasAttributes(ArgIndex)) {
@@ -655,35 +789,35 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
            AttributesVec.
              push_back(AttributeSet::get(F->getContext(), Args.size(), B));
          }
-      } else if (ByValArgsToTransform.count(I)) {
+      } else if (ByValArgsToTransform.count(&*I)) {
          // Emit a GEP and load for each element of the struct.
          Type *AgTy = cast<PointerType>(I->getType())->getElementType();
          StructType *STy = cast<StructType>(AgTy);
          Value *Idxs[2] = {
-              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
            Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
-          Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
-                                                 (*AI)->getName()+"."+utostr(i),
-                                                 Call);
+          Value *Idx = GetElementPtrInst::Create(
+              STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
            // TODO: Tell AA about the new values?
            Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
          }
        } else if (!I->use_empty()) {
          // Non-dead argument: insert GEPs and loads as appropriate.
-        ScalarizeTable &ArgIndices = ScalarizedElements[I];
+        ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
          // Store the Value* version of the indices in here, but declare it now
          // for reuse.
          std::vector<Value*> Ops;
          for (ScalarizeTable::iterator SI = ArgIndices.begin(),
                 E = ArgIndices.end(); SI != E; ++SI) {
            Value *V = *AI;
-          LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)];
-          if (!SI->empty()) {
-            Ops.reserve(SI->size());
+          LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)];
+          if (!SI->second.empty()) {
+            Ops.reserve(SI->second.size());
              Type *ElTy = V->getType();
-            for (IndicesVector::const_iterator II = SI->begin(),
-                 IE = SI->end(); II != IE; ++II) {
+            for (IndicesVector::const_iterator II = SI->second.begin(),
+                                               IE = SI->second.end();
+                 II != IE; ++II) {
                // Use i32 to index structs, and i64 for others (pointers/arrays).
                // This satisfies GEP constraints.
                Type *IdxTy = (ElTy->isStructTy() ?
@@ -694,19 +828,20 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
                ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
              }
              // And create a GEP to extract those indices.
-            V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call);
+            V = GetElementPtrInst::Create(SI->first, V, Ops,
+                                          V->getName() + ".idx", Call);
              Ops.clear();
-            AA.copyValue(OrigLoad->getOperand(0), V);
            }
            // Since we're replacing a load make sure we take the alignment
            // of the previous load.
            LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
            newLoad->setAlignment(OrigLoad->getAlignment());
-          // Transfer the TBAA info too.
-          newLoad->setMetadata(LLVMContext::MD_tbaa,
-                               OrigLoad->getMetadata(LLVMContext::MD_tbaa));
+          // Transfer the AA info too.
+          AAMDNodes AAInfo;
+          OrigLoad->getAAMetadata(AAInfo);
+          newLoad->setAAMetadata(AAInfo);
+
            Args.push_back(newLoad);
-          AA.copyValue(OrigLoad, Args.back());
          }
        }
  
@@ -740,16 +875,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
        if (cast<CallInst>(Call)->isTailCall())
          cast<CallInst>(New)->setTailCall();
      }
+    New->setDebugLoc(Call->getDebugLoc());
      Args.clear();
      AttributesVec.clear();
  
-    // Update the alias analysis implementation to know that we are replacing
-    // the old call with a new one.
-    AA.replaceWithNewValue(Call, New);
-
      // Update the callgraph to know that the callsite has been transformed.
      CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
-    CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+    CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
  
      if (!Call->use_empty()) {
        Call->replaceAllUsesWith(New);
@@ -771,67 +903,70 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
    //
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
         I2 = NF->arg_begin(); I != E; ++I) {
-    if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+    if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
        // If this is an unmodified argument, move the name and users over to the
        // new version.
-      I->replaceAllUsesWith(I2);
-      I2->takeName(I);
-      AA.replaceWithNewValue(I, I2);
+      I->replaceAllUsesWith(&*I2);
+      I2->takeName(&*I);
        ++I2;
        continue;
      }
  
-    if (ByValArgsToTransform.count(I)) {
+    if (ByValArgsToTransform.count(&*I)) {
        // In the callee, we create an alloca, and store each of the new incoming
        // arguments into the alloca.
-      Instruction *InsertPt = NF->begin()->begin();
+      Instruction *InsertPt = &NF->begin()->front();
  
        // Just add all the struct element types.
        Type *AgTy = cast<PointerType>(I->getType())->getElementType();
-      Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
+      Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
        StructType *STy = cast<StructType>(AgTy);
        Value *Idxs[2] = {
-            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
  
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
-        Value *Idx = 
-          GetElementPtrInst::Create(TheAlloca, Idxs,
-                                    TheAlloca->getName()+"."+Twine(i), 
-                                    InsertPt);
+        Value *Idx = GetElementPtrInst::Create(
+            AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
+            InsertPt);
          I2->setName(I->getName()+"."+Twine(i));
-        new StoreInst(I2++, Idx, InsertPt);
+        new StoreInst(&*I2++, Idx, InsertPt);
        }
  
        // Anything that used the arg should now use the alloca.
        I->replaceAllUsesWith(TheAlloca);
-      TheAlloca->takeName(I);
-      AA.replaceWithNewValue(I, TheAlloca);
+      TheAlloca->takeName(&*I);
+
+      // If the alloca is used in a call, we must clear the tail flag since
+      // the callee now uses an alloca from the caller.
+      for (User *U : TheAlloca->users()) {
+        CallInst *Call = dyn_cast<CallInst>(U);
+        if (!Call)
+          continue;
+        Call->setTailCall(false);
+      }
        continue;
      }
  
-    if (I->use_empty()) {
-      AA.deleteValue(I);
+    if (I->use_empty())
        continue;
-    }
  
      // Otherwise, if we promoted this argument, then all users are load
      // instructions (or GEPs with only load users), and all loads should be
      // using the new argument that we added.
-    ScalarizeTable &ArgIndices = ScalarizedElements[I];
+    ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
  
      while (!I->use_empty()) {
-      if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
-        assert(ArgIndices.begin()->empty() &&
+      if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
+        assert(ArgIndices.begin()->second.empty() &&
                 "Load element should sort to front!");
          I2->setName(I->getName()+".val");
-        LI->replaceAllUsesWith(I2);
-        AA.replaceWithNewValue(LI, I2);
+        LI->replaceAllUsesWith(&*I2);
          LI->eraseFromParent();
          DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
                << "' in function '" << F->getName() << "'\n");
        } else {
-        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
+        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
          IndicesVector Operands;
          Operands.reserve(GEP->getNumIndices());
          for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
@@ -844,7 +979,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
  
          Function::arg_iterator TheArg = I2;
          for (ScalarizeTable::iterator It = ArgIndices.begin();
-             *It != Operands; ++It, ++TheArg) {
+             It->second != Operands; ++It, ++TheArg) {
            assert(It != ArgIndices.end() && "GEP not handled??");
          }
  
@@ -861,12 +996,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
          // All of the uses must be load instructions.  Replace them all with
          // the argument specified by ArgNo.
          while (!GEP->use_empty()) {
-          LoadInst *L = cast<LoadInst>(GEP->use_back());
-          L->replaceAllUsesWith(TheArg);
-          AA.replaceWithNewValue(L, TheArg);
+          LoadInst *L = cast<LoadInst>(GEP->user_back());
+          L->replaceAllUsesWith(&*TheArg);
            L->eraseFromParent();
          }
-        AA.deleteValue(GEP);
          GEP->eraseFromParent();
        }
      }
@@ -875,10 +1008,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
      std::advance(I2, ArgIndices.size());
    }
  
-  // Tell the alias analysis that the old function is about to disappear.
-  AA.replaceWithNewValue(F, NF);
-
-  
    NF_CGN->stealCalledFunctionsFrom(CG[F]);
    
    // Now that the old function is dead, delete it.  If there is a dangling
@@ -892,3 +1021,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
    
    return NF_CGN;
  }
+
+bool ArgPromotion::doInitialization(CallGraph &CG) {
+  FunctionDIs = makeSubprogramMap(CG.getModule());
+  return CallGraphSCCPass::doInitialization(CG);
+}