From 18b1f4e7692f6a42d8e64d4b7c50a73b2aeacd90 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 22 Oct 2012 03:03:52 +0000 Subject: [PATCH] Reapply r166405, teaching tailcallelim to be smarter about nocapture, with a very small but very important bugfix: bool shouldExplore(Use *U) { Value *V = U->get(); if (isa(V) || isa(V)) [...] should have read: bool shouldExplore(Use *U) { Value *V = U->getUser(); if (isa(V) || isa(V)) Fixes PR14143! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166407 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/TailRecursionElimination.cpp | 123 ++++++++++-------- test/Transforms/TailCallElim/nocapture.ll | 23 +++- 2 files changed, 86 insertions(+), 60 deletions(-) diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 6557d630a94..28c278f0362 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -69,6 +69,7 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -117,34 +118,45 @@ FunctionPass *llvm::createTailCallEliminationPass() { return new TailCallElim(); } -/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by -/// callees of this function. We only do very simple analysis right now, this -/// could be expanded in the future to use mod/ref information for particular -/// call sites if desired. -static bool AllocaMightEscapeToCalls(AllocaInst *AI) { - // FIXME: do simple 'address taken' analysis. - return true; +/// CanTRE - Scan the specified basic block for alloca instructions. +/// If it contains any that are variable-sized or not in the entry block, +/// returns false. +static bool CanTRE(AllocaInst *AI) { + // Because of PR962, we don't TRE allocas outside the entry block. + + // If this alloca is in the body of the function, or if it is a variable + // sized allocation, we cannot tail call eliminate calls marked 'tail' + // with this mechanism. + BasicBlock *BB = AI->getParent(); + return BB == &BB->getParent()->getEntryBlock() && + isa(AI->getArraySize()); } -/// CheckForEscapingAllocas - Scan the specified basic block for alloca -/// instructions. If it contains any that might be accessed by calls, return -/// true. -static bool CheckForEscapingAllocas(BasicBlock *BB, - bool &CannotTCETailMarkedCall) { - bool RetVal = false; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (AllocaInst *AI = dyn_cast(I)) { - RetVal |= AllocaMightEscapeToCalls(AI); - - // If this alloca is in the body of the function, or if it is a variable - // sized allocation, we cannot tail call eliminate calls marked 'tail' - // with this mechanism. - if (BB != &BB->getParent()->getEntryBlock() || - !isa(AI->getArraySize())) - CannotTCETailMarkedCall = true; - } - return RetVal; -} +struct AllocaCaptureTracker : public CaptureTracker { + AllocaCaptureTracker() : Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { + Value *V = U->getUser(); + if (isa(V) || isa(V)) + UsesAlloca.push_back(V); + + return true; + } + + bool captured(Use *U) { + if (isa(U->getUser())) + return false; + + Captured = true; + return true; + } + + SmallVector UsesAlloca; + + bool Captured; +}; bool TailCallElim::runOnFunction(Function &F) { // If this function is a varargs function, we won't be able to PHI the args @@ -157,38 +169,34 @@ bool TailCallElim::runOnFunction(Function &F) { bool MadeChange = false; bool FunctionContainsEscapingAllocas = false; - // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls + // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls // marked with the 'tail' attribute, because doing so would cause the stack - // size to increase (real TCE would deallocate variable sized allocas, TCE + // size to increase (real TRE would deallocate variable sized allocas, TRE // doesn't). - bool CannotTCETailMarkedCall = false; - - // Loop over the function, looking for any returning blocks, and keeping track - // of whether this function has any non-trivially used allocas. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall) - break; - - FunctionContainsEscapingAllocas |= - CheckForEscapingAllocas(BB, CannotTCETailMarkedCall); + bool CanTRETailMarkedCall = true; + + // Find calls that can be marked tail. + AllocaCaptureTracker ACT; + for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (AllocaInst *AI = dyn_cast(I)) { + CanTRETailMarkedCall &= CanTRE(AI); + PointerMayBeCaptured(AI, &ACT); + if (ACT.Captured) + return false; + } + } } - /// FIXME: The code generator produces really bad code when an 'escaping - /// alloca' is changed from being a static alloca to being a dynamic alloca. - /// Until this is resolved, disable this transformation if that would ever - /// happen. This bug is PR962. - if (FunctionContainsEscapingAllocas) - return false; - - // Second pass, change any tail calls to loops. + // Second pass, change any tail recursive calls to loops. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs,CannotTCETailMarkedCall); + ArgumentPHIs, !CanTRETailMarkedCall); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, - CannotTCETailMarkedCall); + !CanTRETailMarkedCall); MadeChange |= Change; } } @@ -210,21 +218,24 @@ bool TailCallElim::runOnFunction(Function &F) { } } - // Finally, if this function contains no non-escaping allocas, or calls - // setjmp, mark all calls in the function as eligible for tail calls - //(there is no stack memory for them to access). + // Finally, if this function contains no non-escaping allocas and doesn't + // call setjmp, mark all calls in the function as eligible for tail calls + // (there is no stack memory for them to access). + std::sort(ACT.UsesAlloca.begin(), ACT.UsesAlloca.end()); + if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice()) for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (CallInst *CI = dyn_cast(I)) { - CI->setTailCall(); - MadeChange = true; - } + if (CallInst *CI = dyn_cast(I)) + if (!std::binary_search(ACT.UsesAlloca.begin(), ACT.UsesAlloca.end(), + CI)) { + CI->setTailCall(); + MadeChange = true; + } return MadeChange; } - /// CanMoveAboveCall - Return true if it is safe to move the specified /// instruction from after the call to before the call, assuming that all /// instructions between the call and this instruction are movable. diff --git a/test/Transforms/TailCallElim/nocapture.ll b/test/Transforms/TailCallElim/nocapture.ll index 87cb9dd427b..5a1a9a6e7ce 100644 --- a/test/Transforms/TailCallElim/nocapture.ll +++ b/test/Transforms/TailCallElim/nocapture.ll @@ -1,9 +1,9 @@ ; RUN: opt %s -tailcallelim -S | FileCheck %s -; XFAIL: * declare void @use(i8* nocapture, i8* nocapture) +declare void @boring() -define i8* @foo(i8* nocapture %A, i1 %cond) { +define i8* @test1(i8* nocapture %A, i1 %cond) { ; CHECK: tailrecurse: ; CHECK: %A.tr = phi i8* [ %A, %0 ], [ %B, %cond_true ] ; CHECK: %cond.tr = phi i1 [ %cond, %0 ], [ false, %cond_true ] @@ -14,12 +14,27 @@ define i8* @foo(i8* nocapture %A, i1 %cond) { cond_true: ; CHECK: cond_true: ; CHECK: br label %tailrecurse - call i8* @foo(i8* %B, i1 false) + call i8* @test1(i8* %B, i1 false) ret i8* null cond_false: ; CHECK: cond_false call void @use(i8* %A, i8* %B) -; CHECK: tail call void @use(i8* %A.tr, i8* %B) +; CHECK: call void @use(i8* %A.tr, i8* %B) + call void @boring() +; CHECK: tail call void @boring() ret i8* null ; CHECK: ret i8* null } + +; PR14143 +define void @test2(i8* %a, i8* %b) { +; CHECK: @test2 +; CHECK-NOT: tail call +; CHECK: ret void + %c = alloca [100 x i8], align 16 + %tmp = bitcast [100 x i8]* %c to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %tmp, i64 100, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) -- 2.34.1