Revert "TRE: make TRE a bit more aggressive"

[oota-llvm.git] / lib / Transforms / Scalar / TailRecursionElimination.cpp
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp

index 6d02777d091fcc788b0911fa1302a497c192c151..b7580255150c48fae28ff2dbb02a62bb6ffafcca 100644 (file)
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -50,12 +50,12 @@
  //
  //===----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "tailcallelim"
  #include "llvm/Transforms/Scalar.h"
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CFG.h"
  #include "llvm/Analysis/InlineCost.h"
  #include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/Loads.h"
@@ -64,6 +64,7 @@
  #include "llvm/IR/CallSite.h"
  #include "llvm/IR/Constants.h"
  #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
  #include "llvm/IR/Function.h"
  #include "llvm/IR/Instructions.h"
  #include "llvm/IR/IntrinsicInst.h"
@@ -76,6 +77,8 @@
  #include "llvm/Transforms/Utils/Local.h"
  using namespace llvm;
  
+#define DEBUG_TYPE "tailcallelim"
+
  STATISTIC(NumEliminated, "Number of tail calls removed");
  STATISTIC(NumRetDuped,   "Number of return duplicated");
  STATISTIC(NumAccumAdded, "Number of accumulators introduced");
@@ -94,6 +97,9 @@ namespace {
      bool runOnFunction(Function &F) override;
  
    private:
+    bool runTRE(Function &F);
+    bool markTails(Function &F, bool &AllCallsAreTailCalls);
+
      CallInst *FindTRECandidate(Instruction *I,
                                 bool CannotTailCallElimCallsMarkedTail);
      bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
@@ -131,55 +137,253 @@ void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.addRequired<TargetTransformInfo>();
  }
  
-/// CanTRE - Scan the specified basic block for alloca instructions.
-/// If it contains any that are variable-sized or not in the entry block,
-/// returns false.
-static bool CanTRE(AllocaInst *AI) {
-  // Because of PR962, we don't TRE allocas outside the entry block.
-
-  // If this alloca is in the body of the function, or if it is a variable
-  // sized allocation, we cannot tail call eliminate calls marked 'tail'
-  // with this mechanism.
-  BasicBlock *BB = AI->getParent();
-  return BB == &BB->getParent()->getEntryBlock() &&
-         isa<ConstantInt>(AI->getArraySize());
+/// \brief Scan the specified function for alloca instructions.
+/// If it contains any dynamic allocas, returns false.
+static bool CanTRE(Function &F) {
+  // Because of PR962, we don't TRE dynamic allocas.
+  for (auto &BB : F) {
+    for (auto &I : BB) {
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+        if (!AI->isStaticAlloca())
+          return false;
+      }
+    }
+  }
+
+  return true;
  }
  
-namespace {
-struct AllocaCaptureTracker : public CaptureTracker {
-  AllocaCaptureTracker() : Captured(false) {}
+bool TailCallElim::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
  
-  void tooManyUses() override { Captured = true; }
+  bool AllCallsAreTailCalls = false;
+  bool Modified = markTails(F, AllCallsAreTailCalls);
+  if (AllCallsAreTailCalls)
+    Modified |= runTRE(F);
+  return Modified;
+}
  
-  bool shouldExplore(const Use *U) override {
-    Value *V = U->getUser();
-    if (isa<CallInst>(V) || isa<InvokeInst>(V))
-      UsesAlloca.insert(V);
-    return true;
+namespace {
+struct AllocaDerivedValueTracker {
+  // Start at a root value and walk its use-def chain to mark calls that use the
+  // value or a derived value in AllocaUsers, and places where it may escape in
+  // EscapePoints.
+  void walk(Value *Root) {
+    SmallVector<Use *, 32> Worklist;
+    SmallPtrSet<Use *, 32> Visited;
+
+    auto AddUsesToWorklist = [&](Value *V) {
+      for (auto &U : V->uses()) {
+        if (!Visited.insert(&U))
+          continue;
+        Worklist.push_back(&U);
+      }
+    };
+
+    AddUsesToWorklist(Root);
+
+    while (!Worklist.empty()) {
+      Use *U = Worklist.pop_back_val();
+      Instruction *I = cast<Instruction>(U->getUser());
+
+      switch (I->getOpcode()) {
+      case Instruction::Call:
+      case Instruction::Invoke: {
+        CallSite CS(I);
+        bool IsNocapture = !CS.isCallee(U) &&
+                           CS.doesNotCapture(CS.getArgumentNo(U));
+        callUsesLocalStack(CS, IsNocapture);
+        if (IsNocapture) {
+          // If the alloca-derived argument is passed in as nocapture, then it
+          // can't propagate to the call's return. That would be capturing.
+          continue;
+        }
+        break;
+      }
+      case Instruction::Load: {
+        // The result of a load is not alloca-derived (unless an alloca has
+        // otherwise escaped, but this is a local analysis).
+        continue;
+      }
+      case Instruction::Store: {
+        if (U->getOperandNo() == 0)
+          EscapePoints.insert(I);
+        continue;  // Stores have no users to analyze.
+      }
+      case Instruction::BitCast:
+      case Instruction::GetElementPtr:
+      case Instruction::PHI:
+      case Instruction::Select:
+      case Instruction::AddrSpaceCast:
+        break;
+      default:
+        EscapePoints.insert(I);
+        break;
+      }
+
+      AddUsesToWorklist(I);
+    }
    }
  
-  bool captured(const Use *U) override {
-    if (isa<ReturnInst>(U->getUser()))
-      return false;
-    Captured = true;
-    return true;
+  void callUsesLocalStack(CallSite CS, bool IsNocapture) {
+    // Add it to the list of alloca users.
+    AllocaUsers.insert(CS.getInstruction());
+
+    // If it's nocapture then it can't capture this alloca.
+    if (IsNocapture)
+      return;
+
+    // If it can write to memory, it can leak the alloca value.
+    if (!CS.onlyReadsMemory())
+      EscapePoints.insert(CS.getInstruction());
    }
  
-  bool Captured;
-  SmallPtrSet<const Value *, 16> UsesAlloca;
+  SmallPtrSet<Instruction *, 32> AllocaUsers;
+  SmallPtrSet<Instruction *, 32> EscapePoints;
  };
-} // end anonymous namespace
+}
  
-bool TailCallElim::runOnFunction(Function &F) {
-  if (skipOptnoneFunction(F))
+bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
+  if (F.callsFunctionThatReturnsTwice())
      return false;
+  AllCallsAreTailCalls = true;
+
+  // The local stack holds all alloca instructions and all byval arguments.
+  AllocaDerivedValueTracker Tracker;
+  for (Argument &Arg : F.args()) {
+    if (Arg.hasByValAttr())
+      Tracker.walk(&Arg);
+  }
+  for (auto &BB : F) {
+    for (auto &I : BB)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+        Tracker.walk(AI);
+  }
  
+  bool Modified = false;
+
+  // Track whether a block is reachable after an alloca has escaped. Blocks that
+  // contain the escaping instruction will be marked as being visited without an
+  // escaped alloca, since that is how the block began.
+  enum VisitType {
+    UNVISITED,
+    UNESCAPED,
+    ESCAPED
+  };
+  DenseMap<BasicBlock *, VisitType> Visited;
+
+  // We propagate the fact that an alloca has escaped from block to successor.
+  // Visit the blocks that are propagating the escapedness first. To do this, we
+  // maintain two worklists.
+  SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
+
+  // We may enter a block and visit it thinking that no alloca has escaped yet,
+  // then see an escape point and go back around a loop edge and come back to
+  // the same block twice. Because of this, we defer setting tail on calls when
+  // we first encounter them in a block. Every entry in this list does not
+  // statically use an alloca via use-def chain analysis, but may find an alloca
+  // through other means if the block turns out to be reachable after an escape
+  // point.
+  SmallVector<CallInst *, 32> DeferredTails;
+
+  BasicBlock *BB = &F.getEntryBlock();
+  VisitType Escaped = UNESCAPED;
+  do {
+    for (auto &I : *BB) {
+      if (Tracker.EscapePoints.count(&I))
+        Escaped = ESCAPED;
+
+      CallInst *CI = dyn_cast<CallInst>(&I);
+      if (!CI || CI->isTailCall())
+        continue;
+
+      if (CI->doesNotAccessMemory()) {
+        // A call to a readnone function whose arguments are all things computed
+        // outside this function can be marked tail. Even if you stored the
+        // alloca address into a global, a readnone function can't load the
+        // global anyhow.
+        //
+        // Note that this runs whether we know an alloca has escaped or not. If
+        // it has, then we can't trust Tracker.AllocaUsers to be accurate.
+        bool SafeToTail = true;
+        for (auto &Arg : CI->arg_operands()) {
+          if (isa<Constant>(Arg.getUser()))
+            continue;
+          if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
+            if (!A->hasByValAttr())
+              continue;
+          SafeToTail = false;
+          break;
+        }
+        if (SafeToTail) {
+          emitOptimizationRemark(
+              F.getContext(), "tailcallelim", F, CI->getDebugLoc(),
+              "marked this readnone call a tail call candidate");
+          CI->setTailCall();
+          Modified = true;
+          continue;
+        }
+      }
+
+      if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+        DeferredTails.push_back(CI);
+      } else {
+        AllCallsAreTailCalls = false;
+      }
+    }
+
+    for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
+      auto &State = Visited[SuccBB];
+      if (State < Escaped) {
+        State = Escaped;
+        if (State == ESCAPED)
+          WorklistEscaped.push_back(SuccBB);
+        else
+          WorklistUnescaped.push_back(SuccBB);
+      }
+    }
+
+    if (!WorklistEscaped.empty()) {
+      BB = WorklistEscaped.pop_back_val();
+      Escaped = ESCAPED;
+    } else {
+      BB = nullptr;
+      while (!WorklistUnescaped.empty()) {
+        auto *NextBB = WorklistUnescaped.pop_back_val();
+        if (Visited[NextBB] == UNESCAPED) {
+          BB = NextBB;
+          Escaped = UNESCAPED;
+          break;
+        }
+      }
+    }
+  } while (BB);
+
+  for (CallInst *CI : DeferredTails) {
+    if (Visited[CI->getParent()] != ESCAPED) {
+      // If the escape point was part way through the block, calls after the
+      // escape point wouldn't have been put into DeferredTails.
+      emitOptimizationRemark(F.getContext(), "tailcallelim", F,
+                             CI->getDebugLoc(),
+                             "marked this call a tail call candidate");
+      CI->setTailCall();
+      Modified = true;
+    } else {
+      AllCallsAreTailCalls = false;
+    }
+  }
+
+  return Modified;
+}
+
+bool TailCallElim::runTRE(Function &F) {
    // If this function is a varargs function, we won't be able to PHI the args
    // right, so don't even try to convert it...
    if (F.getFunctionType()->isVarArg()) return false;
  
    TTI = &getAnalysis<TargetTransformInfo>();
-  BasicBlock *OldEntry = 0;
+  BasicBlock *OldEntry = nullptr;
    bool TailCallsAreMarkedTail = false;
    SmallVector<PHINode*, 8> ArgumentPHIs;
    bool MadeChange = false;
@@ -188,39 +392,23 @@ bool TailCallElim::runOnFunction(Function &F) {
    // marked with the 'tail' attribute, because doing so would cause the stack
    // size to increase (real TRE would deallocate variable sized allocas, TRE
    // doesn't).
-  bool CanTRETailMarkedCall = true;
-
-  // Find calls that can be marked tail.
-  AllocaCaptureTracker ACT;
-  for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) {
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
-        CanTRETailMarkedCall &= CanTRE(AI);
-        PointerMayBeCaptured(AI, &ACT);
-        // If any allocas are captured, exit.
-        if (ACT.Captured)
-          return false;
-      }
-    }
-  }
+  bool CanTRETailMarkedCall = CanTRE(F);
  
-  // Second pass, change any tail recursive calls to loops.
+  // Change any tail recursive calls to loops.
    //
    // FIXME: The code generator produces really bad code when an 'escaping
    // alloca' is changed from being a static alloca to being a dynamic alloca.
    // Until this is resolved, disable this transformation if that would ever
    // happen.  This bug is PR962.
-  if (ACT.UsesAlloca.empty()) {
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-      if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
-        bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
-                                            ArgumentPHIs, !CanTRETailMarkedCall);
-        if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
-          Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
-                                            TailCallsAreMarkedTail, ArgumentPHIs,
-                                            !CanTRETailMarkedCall);
-        MadeChange |= Change;
-      }
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+                                          ArgumentPHIs, !CanTRETailMarkedCall);
+      if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+        Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+                                          TailCallsAreMarkedTail, ArgumentPHIs,
+                                          !CanTRETailMarkedCall);
+      MadeChange |= Change;
      }
    }
  
@@ -229,34 +417,13 @@ bool TailCallElim::runOnFunction(Function &F) {
    // with themselves.  Check to see if we did and clean up our mess if so.  This
    // occurs when a function passes an argument straight through to its tail
    // call.
-  if (!ArgumentPHIs.empty()) {
-    for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
-      PHINode *PN = ArgumentPHIs[i];
-
-      // If the PHI Node is a dynamic constant, replace it with the value it is.
-      if (Value *PNV = SimplifyInstruction(PN)) {
-        PN->replaceAllUsesWith(PNV);
-        PN->eraseFromParent();
-      }
-    }
-  }
+  for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+    PHINode *PN = ArgumentPHIs[i];
  
-  // At this point, we know that the function does not have any captured
-  // allocas. If additionally the function does not call setjmp, mark all calls
-  // in the function that do not access stack memory with the tail keyword. This
-  // implies ensuring that there does not exist any path from a call that takes
-  // in an alloca but does not capture it and the call which we wish to mark
-  // with "tail".
-  if (!F.callsFunctionThatReturnsTwice()) {
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-        if (CallInst *CI = dyn_cast<CallInst>(I)) {
-          if (!ACT.UsesAlloca.count(CI)) {
-            CI->setTailCall();
-            MadeChange = true;
-          }
-        }
-      }
+    // If the PHI Node is a dynamic constant, replace it with the value it is.
+    if (Value *PNV = SimplifyInstruction(PN)) {
+      PN->replaceAllUsesWith(PNV);
+      PN->eraseFromParent();
      }
    }
  
@@ -343,11 +510,11 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
  //
  static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
    Function *F = CI->getParent()->getParent();
-  Value *ReturnedValue = 0;
+  Value *ReturnedValue = nullptr;
  
    for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
      ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
-    if (RI == 0 || RI == IgnoreRI) continue;
+    if (RI == nullptr || RI == IgnoreRI) continue;
  
      // We can only perform this transformation if the value returned is
      // evaluatable at the start of the initial invocation of the function,
@@ -355,10 +522,10 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
      //
      Value *RetOp = RI->getOperand(0);
      if (!isDynamicConstant(RetOp, CI, RI))
-      return 0;
+      return nullptr;
  
      if (ReturnedValue && RetOp != ReturnedValue)
-      return 0;     // Cannot transform if differing values are returned.
+      return nullptr;     // Cannot transform if differing values are returned.
      ReturnedValue = RetOp;
    }
    return ReturnedValue;
@@ -370,18 +537,18 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
  ///
  Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
                                                        CallInst *CI) {
-  if (!I->isAssociative() || !I->isCommutative()) return 0;
+  if (!I->isAssociative() || !I->isCommutative()) return nullptr;
    assert(I->getNumOperands() == 2 &&
           "Associative/commutative operations should have 2 args!");
  
    // Exactly one operand should be the result of the call instruction.
    if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
        (I->getOperand(0) != CI && I->getOperand(1) != CI))
-    return 0;
+    return nullptr;
  
    // The only user of this instruction we allow is a single return instruction.
    if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back()))
-    return 0;
+    return nullptr;
  
    // Ok, now we have to check all of the other return instructions in this
    // function.  If they return non-constants or differing values, then we cannot
@@ -402,11 +569,11 @@ TailCallElim::FindTRECandidate(Instruction *TI,
    Function *F = BB->getParent();
  
    if (&BB->front() == TI) // Make sure there is something before the terminator.
-    return 0;
+    return nullptr;
  
    // Scan backwards from the return, checking to see if there is a tail call in
    // this block.  If so, set CI to it.
-  CallInst *CI = 0;
+  CallInst *CI = nullptr;
    BasicBlock::iterator BBI = TI;
    while (true) {
      CI = dyn_cast<CallInst>(BBI);
@@ -414,14 +581,14 @@ TailCallElim::FindTRECandidate(Instruction *TI,
        break;
  
      if (BBI == BB->begin())
-      return 0;          // Didn't find a potential tail call.
+      return nullptr;          // Didn't find a potential tail call.
      --BBI;
    }
  
    // If this call is marked as a tail call, and if there are dynamic allocas in
    // the function, we cannot perform this optimization.
    if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
-    return 0;
+    return nullptr;
  
    // As a special case, detect code like this:
    //   double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
@@ -441,7 +608,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
      for (; I != E && FI != FE; ++I, ++FI)
        if (*I != &*FI) break;
      if (I == E && FI == FE)
-      return 0;
+      return nullptr;
    }
  
    return CI;
@@ -462,8 +629,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
    // which is different to the constant returned by other return instructions
    // (which is recorded in AccumulatorRecursionEliminationInitVal).  This is a
    // special case of accumulator recursion, the operation being "return C".
-  Value *AccumulatorRecursionEliminationInitVal = 0;
-  Instruction *AccumulatorRecursionInstr = 0;
+  Value *AccumulatorRecursionEliminationInitVal = nullptr;
+  Instruction *AccumulatorRecursionInstr = nullptr;
  
    // Ok, we found a potential tail call.  We can currently only transform the
    // tail call if all of the instructions between the call and the return are
@@ -493,8 +660,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
    // accumulator recursion variable eliminated.
    if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
        !isa<UndefValue>(Ret->getReturnValue()) &&
-      AccumulatorRecursionEliminationInitVal == 0 &&
-      !getCommonReturnValue(0, CI)) {
+      AccumulatorRecursionEliminationInitVal == nullptr &&
+      !getCommonReturnValue(nullptr, CI)) {
      // One case remains that we are able to handle: the current return
      // instruction returns a constant, and all other return instructions
      // return a different constant.
@@ -510,9 +677,12 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
    BasicBlock *BB = Ret->getParent();
    Function *F = BB->getParent();
  
+  emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(),
+                         "transforming tail recursion to loop");
+
    // OK! We can transform this tail call.  If this is the first one found,
    // create the new entry block, allowing us to branch back to the old entry.
-  if (OldEntry == 0) {
+  if (!OldEntry) {
      OldEntry = &F->getEntryBlock();
      BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
      NewEntry->takeName(OldEntry);