From: Evan Cheng Date: Sat, 29 Jan 2011 01:29:26 +0000 (+0000) Subject: Re-commit r124462 with fixes. Tail recursion elim will now dup ret into unconditional... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=5e6940788fb2f8cf3ce4219d3ac0f78317f54696;p=oota-llvm.git Re-commit r124462 with fixes. Tail recursion elim will now dup ret into unconditional predecessor to enable TCE on demand. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124518 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 6de8b850704..53358602870 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -22,9 +22,10 @@ namespace llvm { +class AliasAnalysis; class Instruction; class Pass; -class AliasAnalysis; +class ReturnInst; /// DeleteDeadBlock - Delete the specified block, which must have no /// predecessors. @@ -171,7 +172,15 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P); BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P = 0); - + +/// FoldReturnIntoUncondBranch - This method duplicates the specified return +/// instruction into a predecessor which ends in an unconditional branch. If +/// the return instruction returns a value defined by a PHI, propagate the +/// right value into the return. It returns the new return instruction in the +/// predecessor. +ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, + BasicBlock *Pred); + } // End llvm namespace #endif diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index ce4b1be8541..15aed3436c7 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -465,9 +465,12 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, MaxDuplicateCount = TailDuplicateSize; if (PreRegAlloc) { - // Pre-regalloc tail duplication hurts compile time and doesn't help - // much except for indirect branches. - if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch()) + if (TailBB->empty()) + return false; + const TargetInstrDesc &TID = TailBB->back().getDesc(); + // Pre-regalloc tail duplication hurts compile time and doesn't help + // much except for indirect branches and returns. + if (!TID.isIndirectBranch() && !TID.isReturn()) return false; // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there @@ -502,7 +505,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, } // Heuristically, don't tail-duplicate calls if it would expand code size, // as it's less likely to be worth the extra cost. - if (InstrCount > 1 && HasCall) + if (InstrCount > 1 && (PreRegAlloc && HasCall)) return false; DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 23514fd51be..8b9340926c5 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -52,11 +52,13 @@ #define DEBUG_TYPE "tailcallelim" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" @@ -64,7 +66,9 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumEliminated, "Number of tail calls removed"); @@ -80,6 +84,18 @@ namespace { virtual bool runOnFunction(Function &F); private: + CallInst *FindTRECandidate(Instruction *I, + bool CannotTailCallElimCallsMarkedTail); + bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, + BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail); + bool FoldReturnAndProcessPred(BasicBlock *BB, + ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail); bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVector &ArgumentPHIs, @@ -136,7 +152,6 @@ bool TailCallElim::runOnFunction(Function &F) { bool TailCallsAreMarkedTail = false; SmallVector ArgumentPHIs; bool MadeChange = false; - bool FunctionContainsEscapingAllocas = false; // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls @@ -163,10 +178,17 @@ bool TailCallElim::runOnFunction(Function &F) { return false; // Second pass, change any tail calls to loops. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) - MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { + bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,CannotTCETailMarkedCall); + if (!Change && BB->getFirstNonPHIOrDbg() == Ret) + Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, + TailCallsAreMarkedTail, ArgumentPHIs, + CannotTCETailMarkedCall); + MadeChange |= Change; + } + } // If we eliminated any tail recursions, it's possible that we inserted some // silly PHI nodes which just merge an initial value (the incoming operand) @@ -325,41 +347,47 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, return getCommonReturnValue(cast(I->use_back()), CI); } -bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVector &ArgumentPHIs, - bool CannotTailCallElimCallsMarkedTail) { - BasicBlock *BB = Ret->getParent(); +static Instruction *FirstNonDbg(BasicBlock::iterator I) { + while (isa(I)) + ++I; + return &*I; +} + +CallInst* +TailCallElim::FindTRECandidate(Instruction *TI, + bool CannotTailCallElimCallsMarkedTail) { + BasicBlock *BB = TI->getParent(); Function *F = BB->getParent(); - if (&BB->front() == Ret) // Make sure there is something before the ret... - return false; + if (&BB->front() == TI) // Make sure there is something before the terminator. + return 0; // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. - CallInst *CI; - BasicBlock::iterator BBI = Ret; - while (1) { + CallInst *CI = 0; + BasicBlock::iterator BBI = TI; + while (true) { CI = dyn_cast(BBI); if (CI && CI->getCalledFunction() == F) break; if (BBI == BB->begin()) - return false; // Didn't find a potential tail call. + return 0; // Didn't find a potential tail call. --BBI; } // If this call is marked as a tail call, and if there are dynamic allocas in // the function, we cannot perform this optimization. if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) - return false; + return 0; // As a special case, detect code like this: // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call // and disable this xform in this case, because the code generator will // lower the call to fabs into inline code. if (BB == &F->getEntryBlock() && - &BB->front() == CI && &*++BB->begin() == Ret && + FirstNonDbg(BB->front()) == CI && + FirstNonDbg(llvm::next(BB->begin())) == TI && callIsSmall(F)) { // A single-block function with just a call and a return. Check that // the arguments match. @@ -370,9 +398,17 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, for (; I != E && FI != FE; ++I, ++FI) if (*I != &*FI) break; if (I == E && FI == FE) - return false; + return 0; } + return CI; +} + +bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, + BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set @@ -390,7 +426,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. - for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) { + BasicBlock::iterator BBI = CI; + for (++BBI; &*BBI != Ret; ++BBI) { if (CanMoveAboveCall(BBI, CI)) continue; // If we can't move the instruction above the call, it might be because it @@ -427,6 +464,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, return false; } + BasicBlock *BB = Ret->getParent(); + Function *F = BB->getParent(); + // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. if (OldEntry == 0) { @@ -536,3 +576,49 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, ++NumEliminated; return true; } + +bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB, + ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail) { + bool Change = false; + + // If the return block contains nothing but the return and PHI's, + // there might be an opportunity to duplicate the return in its + // predecessors and perform TRC there. Look for predecessors that end + // in unconditional branch and recursive call(s). + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); + PI != E; ++PI) { + BasicBlock *Pred = *PI; + TerminatorInst *PTI = Pred->getTerminator(); + if (BranchInst *BI = dyn_cast(PTI)) { + CallInst *CI = 0; + if (BI->isUnconditional() && + (CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail))) { + DEBUG(dbgs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); + EliminateRecursiveTailCall(CI, + FoldReturnIntoUncondBranch(Ret, BB, Pred), + OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, + CannotTailCallElimCallsMarkedTail); + Change = true; + } + } + } + + return Change; +} + +bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail) { + CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail); + if (!CI) + return false; + + return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, + ArgumentPHIs, + CannotTailCallElimCallsMarkedTail); +} diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 5d3af3759a0..acaea195e71 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -509,7 +509,32 @@ void llvm::FindFunctionBackedges(const Function &F, // Go up one level. InStack.erase(VisitStack.pop_back_val().first); } - } while (!VisitStack.empty()); - - + } while (!VisitStack.empty()); +} + +/// FoldReturnIntoUncondBranch - This method duplicates the specified return +/// instruction into a predecessor which ends in an unconditional branch. If +/// the return instruction returns a value defined by a PHI, propagate the +/// right value into the return. It returns the new return instruction in the +/// predecessor. +ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, + BasicBlock *Pred) { + Instruction *UncondBranch = Pred->getTerminator(); + // Clone the return and add it to the end of the predecessor. + Instruction *NewRet = RI->clone(); + Pred->getInstList().push_back(NewRet); + + // If the return instruction returns a value, and if the value was a + // PHI node in "BB", propagate the right value into the return. + for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); + i != e; ++i) + if (PHINode *PN = dyn_cast(*i)) + if (PN->getParent() == BB) + *i = PN->getIncomingValueForBlock(Pred); + + // Update any PHI nodes in the returning block to realize that we no + // longer branch to them. + BB->removePredecessor(Pred); + UncondBranch->eraseFromParent(); + return cast(NewRet); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index f6d7d76dbf6..b9432c2e6e1 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -36,6 +37,10 @@ #include using namespace llvm; +static cl::opt +DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), + cl::desc("Duplicate return instructions into unconditional branches")); + STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { @@ -2027,28 +2032,12 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) { } // If we found some, do the transformation! - if (!UncondBranchPreds.empty()) { + if (!UncondBranchPreds.empty() && DupRet) { while (!UncondBranchPreds.empty()) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); - Instruction *UncondBranch = Pred->getTerminator(); - // Clone the return and add it to the end of the predecessor. - Instruction *NewRet = RI->clone(); - Pred->getInstList().push_back(NewRet); - - // If the return instruction returns a value, and if the value was a - // PHI node in "BB", propagate the right value into the return. - for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); - i != e; ++i) - if (PHINode *PN = dyn_cast(*i)) - if (PN->getParent() == BB) - *i = PN->getIncomingValueForBlock(Pred); - - // Update any PHI nodes in the returning block to realize that we no - // longer branch to them. - BB->removePredecessor(Pred); - UncondBranch->eraseFromParent(); + (void)FoldReturnIntoUncondBranch(RI, BB, Pred); } // If we eliminated all predecessors of the block, delete the block now. diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll index 354d0820697..faba6300712 100644 --- a/test/CodeGen/X86/loop-blocks.ll +++ b/test/CodeGen/X86/loop-blocks.ll @@ -70,6 +70,7 @@ exit: ; Same as slightly_more_involved, but block_a is now a CFG diamond with ; fallthrough edges which should be preserved. +; "callq block_a_merge_func" is tail duped. ; CHECK: yet_more_involved: ; CHECK: jmp .LBB2_1 @@ -78,12 +79,12 @@ exit: ; CHECK-NEXT: callq bar99 ; CHECK-NEXT: callq get ; CHECK-NEXT: cmpl $2999, %eax -; CHECK-NEXT: jg .LBB2_6 -; CHECK-NEXT: callq block_a_true_func -; CHECK-NEXT: jmp .LBB2_7 -; CHECK-NEXT: .LBB2_6: +; CHECK-NEXT: jle .LBB2_5 ; CHECK-NEXT: callq block_a_false_func -; CHECK-NEXT: .LBB2_7: +; CHECK-NEXT: callq block_a_merge_func +; CHECK-NEXT: jmp .LBB2_1 +; CHECK-NEXT: .LBB2_5: +; CHECK-NEXT: callq block_a_true_func ; CHECK-NEXT: callq block_a_merge_func ; CHECK-NEXT: .LBB2_1: ; CHECK-NEXT: callq body diff --git a/test/Transforms/JumpThreading/and-and-cond.ll b/test/Transforms/JumpThreading/and-and-cond.ll index e6db9ee5a32..765d940cc7c 100644 --- a/test/Transforms/JumpThreading/and-and-cond.ll +++ b/test/Transforms/JumpThreading/and-and-cond.ll @@ -1,14 +1,14 @@ -; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | grep {ret i32 %v1} -; There should be no uncond branches left. -; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | not grep {br label} +; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | FileCheck %s declare i32 @f1() declare i32 @f2() declare void @f3() define i32 @test(i1 %cond, i1 %cond2, i1 %cond3) { +; CHECK: test br i1 %cond, label %T1, label %F1 +; CHECK-NOT: T1: T1: %v1 = call i32 @f1() br label %Merge @@ -18,6 +18,10 @@ F1: br label %Merge Merge: +; CHECK: Merge: +; CHECK: %v1 = call i32 @f1() +; CHECK-NEXT: %D = and i1 %cond2, %cond3 +; CHECK-NEXT: br i1 %D %A = phi i1 [true, %T1], [false, %F1] %B = phi i32 [%v1, %T1], [%v2, %F1] %C = and i1 %A, %cond2 diff --git a/test/Transforms/JumpThreading/and-cond.ll b/test/Transforms/JumpThreading/and-cond.ll index 58dbec72a76..0159bb3bb76 100644 --- a/test/Transforms/JumpThreading/and-cond.ll +++ b/test/Transforms/JumpThreading/and-cond.ll @@ -1,14 +1,14 @@ -; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | grep {ret i32 %v1} -; There should be no uncond branches left. -; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | not grep {br label} +; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | FileCheck %s declare i32 @f1() declare i32 @f2() declare void @f3() define i32 @test(i1 %cond, i1 %cond2) { +; CHECK: test br i1 %cond, label %T1, label %F1 +; CHECK-NOT: T1 T1: %v1 = call i32 @f1() br label %Merge @@ -18,6 +18,9 @@ F1: br label %Merge Merge: +; CHECK: Merge: +; CHECK: %v1 = call i32 @f1() +; CHECK-NEXT: br i1 %cond2 %A = phi i1 [true, %T1], [false, %F1] %B = phi i32 [%v1, %T1], [%v2, %F1] %C = and i1 %A, %cond2 diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll index 96ba701046d..cce23ea319c 100644 --- a/test/Transforms/JumpThreading/thread-loads.ll +++ b/test/Transforms/JumpThreading/thread-loads.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -jump-threading -simplifycfg -S | grep {ret i32 1} +; RUN: opt < %s -jump-threading -S | FileCheck %s ; rdar://6402033 ; Test that we can thread through the block with the partially redundant load (%2). @@ -6,12 +6,16 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 target triple = "i386-apple-darwin7" define i32 @foo(i32* %P) nounwind { +; CHECK: foo entry: %0 = tail call i32 (...)* @f1() nounwind ; [#uses=1] %1 = icmp eq i32 %0, 0 ; [#uses=1] br i1 %1, label %bb1, label %bb bb: ; preds = %entry +; CHECK: bb1.thread: +; CHECK: store +; CHECK: br label %bb3 store i32 42, i32* %P, align 4 br label %bb1 @@ -26,6 +30,9 @@ bb2: ; preds = %bb1 ret i32 %res.0 bb3: ; preds = %bb1 +; CHECK: bb3: +; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ] +; CHECK: ret i32 %res.01 ret i32 %res.0 } diff --git a/test/Transforms/SimplifyCFG/MagicPointer.ll b/test/Transforms/SimplifyCFG/MagicPointer.ll index 54e5b14880c..93b9a276eac 100644 --- a/test/Transforms/SimplifyCFG/MagicPointer.ll +++ b/test/Transforms/SimplifyCFG/MagicPointer.ll @@ -8,7 +8,6 @@ ; CHECK: i64 2, label ; CHECK: i64 3, label ; CHECK: i64 4, label -; CHECK-NOT: br ; CHECK: } target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll index b485f6ab05f..052e10667da 100644 --- a/test/Transforms/SimplifyCFG/basictest.ll +++ b/test/Transforms/SimplifyCFG/basictest.ll @@ -25,16 +25,6 @@ define void @test3(i1 %T) { } -define void @test4() { - br label %return -return: - ret void -; CHECK: @test4 -; CHECK-NEXT: ret void -} -@test4g = global i8* blockaddress(@test4, %return) - - ; PR5795 define void @test5(i32 %A) { switch i32 %A, label %return [ diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll index da7f65a6ca3..4e199bc8596 100644 --- a/test/Transforms/SimplifyCFG/switch_create.ll +++ b/test/Transforms/SimplifyCFG/switch_create.ll @@ -147,7 +147,7 @@ UnifiedReturnBlock: ; preds = %shortcirc_done.4, %shortcirc_next.4 ; CHECK: i32 16, label %UnifiedReturnBlock ; CHECK: i32 17, label %UnifiedReturnBlock ; CHECK: i32 18, label %UnifiedReturnBlock -; CHECK: i32 19, label %switch.edge +; CHECK: i32 19, label %UnifiedReturnBlock ; CHECK: ] } @@ -441,3 +441,29 @@ if.end: ; CHECK-NOT: switch ; CHECK: ret void } + +; PR8675 +; rdar://5134905 +define zeroext i1 @test16(i32 %x) nounwind { +entry: +; CHECK: @test16 +; CHECK: switch i32 %x, label %lor.rhs [ +; CHECK: i32 1, label %lor.end +; CHECK: i32 2, label %lor.end +; CHECK: i32 3, label %lor.end +; CHECK: ] + %cmp.i = icmp eq i32 %x, 1 + br i1 %cmp.i, label %lor.end, label %lor.lhs.false + +lor.lhs.false: + %cmp.i2 = icmp eq i32 %x, 2 + br i1 %cmp.i2, label %lor.end, label %lor.rhs + +lor.rhs: + %cmp.i1 = icmp eq i32 %x, 3 + br label %lor.end + +lor.end: + %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ] + ret i1 %0 +} diff --git a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll index f1c820ec43b..357ffb60e1e 100644 --- a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll +++ b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll @@ -1,5 +1,4 @@ -; RUN: opt < %s -simplifycfg -S | not grep br - +; RUN: opt < %s -simplifycfg -S | FileCheck %s %llvm.dbg.anchor.type = type { i32, i32 } %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* } @@ -13,7 +12,16 @@ declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind -define i1 @_ZN4llvm11SetCondInst7classofEPKNS_11InstructionE({ i32, i32 }* %I) { +define i1 @t({ i32, i32 }* %I) { +; CHECK: t +; CHECK: switch i32 %tmp.2.i, label %shortcirc_next.4 [ +; CHECK: i32 14, label %UnifiedReturnBlock +; CHECK: i32 15, label %UnifiedReturnBlock +; CHECK: i32 16, label %UnifiedReturnBlock +; CHECK: i32 17, label %UnifiedReturnBlock +; CHECK: i32 18, label %UnifiedReturnBlock +; CHECK: i32 19, label %UnifiedReturnBlock +; CHECK: ] entry: %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1 ; [#uses=1] %tmp.2.i = load i32* %tmp.1.i ; [#uses=6]