From: Andrew Trick Date: Wed, 10 Aug 2011 00:28:10 +0000 (+0000) Subject: Fix the LoopUnroller to handle nontrivial loops and partial unrolling. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=b1eede12818d91a32adac928c6fffcf6d2800dc0;p=oota-llvm.git Fix the LoopUnroller to handle nontrivial loops and partial unrolling. These are not individual bug fixes. I had to rewrite a good chunk of the unroller to make it sane. I think it was getting lucky on trivial completely unrolled loops with no early exits. I included some fairly simple unit tests for partial unrolling. I didn't do much stress testing, so it may not be perfect, but should be usable now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137190 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 28a15ed18cb..48c59ac4e9a 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -134,6 +134,11 @@ public: block_iterator block_begin() const { return Blocks.begin(); } block_iterator block_end() const { return Blocks.end(); } + /// getNumBlocks - Get the number of blocks in this loop. + unsigned getNumBlocks() const { + return std::distance(block_begin(), block_end()); + } + /// isLoopExiting - True if terminator in the block can branch to another /// block that is outside of the current loop. /// diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index fa508d48261..f17098fd39d 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -21,6 +21,7 @@ #include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/Debug.h" @@ -225,11 +226,25 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, Headers.push_back(Header); Latches.push_back(LatchBlock); + // The current on-the-fly SSA update requires blocks to be processed in + // reverse postorder so that LastValueMap contains the correct value at each + // exit. + LoopBlocksDFS DFS(L); + { + // Traverse the loop blocks using depth-first search to record RPO numbers + // for each block in the DFS result. + LoopBlocksTraversal Traversal(DFS, LI); + for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(), + POE = Traversal.end(); POI != POE; ++POI); + } + // Stash the DFS iterators before adding blocks to the loop. + LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + for (unsigned It = 1; It != Count; ++It) { std::vector NewBlocks; - for (std::vector::iterator BB = LoopBlocks.begin(), - E = LoopBlocks.end(); BB != E; ++BB) { + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); @@ -255,35 +270,27 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, L->addBasicBlockToLoop(New, LI->getBase()); - // Add phi entries for newly created values to all exit blocks except - // the successor of the latch block. The successor of the exit block will - // be updated specially after unrolling all the way. - if (*BB != LatchBlock) - for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; - ++SI) - if (!L->contains(*SI)) - for (BasicBlock::iterator BBI = (*SI)->begin(); - PHINode *phi = dyn_cast(BBI); ++BBI) { - Value *Incoming = phi->getIncomingValueForBlock(*BB); - phi->addIncoming(Incoming, New); - } - + // Add phi entries for newly created values to all exit blocks. + for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); + SI != SE; ++SI) { + if (L->contains(*SI)) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *phi = dyn_cast(BBI); ++BBI) { + Value *Incoming = phi->getIncomingValueForBlock(*BB); + ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); + if (It != LastValueMap.end()) + Incoming = It->second; + phi->addIncoming(Incoming, New); + } + } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); - if (*BB == LatchBlock) { + if (*BB == LatchBlock) Latches.push_back(New); - // Also, clear out the new latch's back edge so that it doesn't look - // like a new loop, so that it's amenable to being merged with adjacent - // blocks later on. - TerminatorInst *Term = New->getTerminator(); - assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); - assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); - Term->setSuccessor(!ContinueOnTrue, NULL); - } - NewBlocks.push_back(New); } @@ -294,36 +301,24 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, ::RemapInstruction(I, LastValueMap); } - // The latch block exits the loop. If there are any PHI nodes in the - // successor blocks, update them to use the appropriate values computed as the - // last iteration of the loop. - if (Count != 1) { - BasicBlock *LastIterationBB = cast(LastValueMap[LatchBlock]); - for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock); - SI != SE; ++SI) { - for (BasicBlock::iterator BBI = (*SI)->begin(); - PHINode *PN = dyn_cast(BBI); ++BBI) { - Value *InVal = PN->removeIncomingValue(LatchBlock, false); - // If this value was defined in the loop, take the value defined by the - // last iteration of the loop. - if (Instruction *InValI = dyn_cast(InVal)) { - if (L->contains(InValI)) - InVal = LastValueMap[InVal]; - } - PN->addIncoming(InVal, LastIterationBB); - } - } - } - - // Now, if we're doing complete unrolling, loop over the PHI nodes in the - // original block, setting them to their incoming values. - if (CompletelyUnroll) { - BasicBlock *Preheader = L->getLoopPreheader(); - for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *PN = OrigPHINode[i]; + // Loop over the PHI nodes in the original block, setting incoming values. + for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { + PHINode *PN = OrigPHINode[i]; + if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } + else if (Count > 1) { + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast(InVal)) { + if (L->contains(InValI)) + InVal = LastValueMap[InVal]; + } + assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); + PN->addIncoming(InVal, Latches.back()); + } } // Now that all the basic blocks for the unrolled iterations are in place, @@ -355,6 +350,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { + // Remove phi operands at this loop exit + if (Dest != LoopExit) { + BasicBlock *BB = Latches[i]; + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + if (*SI == Headers[i]) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *Phi = dyn_cast(BBI); ++BBI) { + Phi->removeIncomingValue(BB, false); + } + } + } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); diff --git a/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll b/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll new file mode 100644 index 00000000000..3353e516629 --- /dev/null +++ b/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll @@ -0,0 +1,100 @@ +; RUN: opt < %s -loop-unroll -S -unroll-count=4 | FileCheck %s +; Test phi update after partial unroll. + +declare i1 @check() nounwind + +; CHECK: @test +; CHECK: if.else: +; CHECK: if.then.loopexit +; CHECK: %sub5.lcssa = phi i32 [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ] +; CHECK: if.else.3 +define void @test1(i32 %i, i32 %j) nounwind uwtable ssp { +entry: + %cond1 = call zeroext i1 @check() + br i1 %cond1, label %if.then, label %if.else.lr.ph + +if.else.lr.ph: ; preds = %entry + br label %if.else + +if.else: ; preds = %if.else, %if.else.lr.ph + %sub = phi i32 [ %i, %if.else.lr.ph ], [ %sub5, %if.else ] + %sub5 = sub i32 %sub, %j + %cond2 = call zeroext i1 @check() + br i1 %cond2, label %if.then, label %if.else + +if.then: ; preds = %if.else, %entry + %i.tr = phi i32 [ %i, %entry ], [ %sub5, %if.else ] + ret void + +} + +; PR7318: assertion failure after doing a simple loop unroll +; +; CHECK: @test2 +; CHECK: bb1.bb2_crit_edge: +; CHECK: %.lcssa = phi i32 [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ] +; CHECK: bb1.3: +define i32 @test2(i32* nocapture %p, i32 %n) nounwind readonly { +entry: + %0 = icmp sgt i32 %n, 0 ; [#uses=1] + br i1 %0, label %bb.nph, label %bb2 + +bb.nph: ; preds = %entry + %tmp = zext i32 %n to i64 ; [#uses=1] + br label %bb + +bb: ; preds = %bb.nph, %bb1 + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb1 ] ; [#uses=2] + %s.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb1 ] ; [#uses=1] + %scevgep = getelementptr i32* %p, i64 %indvar ; [#uses=1] + %1 = load i32* %scevgep, align 1 ; [#uses=1] + %2 = add nsw i32 %1, %s.01 ; [#uses=2] + br label %bb1 + +bb1: ; preds = %bb + %indvar.next = add i64 %indvar, 1 ; [#uses=2] + %exitcond = icmp ne i64 %indvar.next, %tmp ; [#uses=1] + br i1 %exitcond, label %bb, label %bb1.bb2_crit_edge + +bb1.bb2_crit_edge: ; preds = %bb1 + %.lcssa = phi i32 [ %2, %bb1 ] ; [#uses=1] + br label %bb2 + +bb2: ; preds = %bb1.bb2_crit_edge, %entry + %s.0.lcssa = phi i32 [ %.lcssa, %bb1.bb2_crit_edge ], [ 0, %entry ] ; [#uses=1] + ret i32 %s.0.lcssa +} + +; Check phi update for loop with an early-exit. +; +; CHECK: @test3 +; CHECK: return.loopexit: +; CHECK: %tmp7.i.lcssa = phi i32 [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ] +; CHECK: exit.3: +define i32 @test3() nounwind uwtable ssp align 2 { +entry: + br i1 undef, label %return, label %if.end + +if.end: ; preds = %entry + br label %do.body + +do.body: ; preds = %do.cond, %if.end + br i1 undef, label %exit, label %do.cond + +exit: ; preds = %do.body + %tmp7.i = load i32* undef, align 8 + br i1 undef, label %do.cond, label %land.lhs.true + +land.lhs.true: ; preds = %exit + br i1 undef, label %return, label %do.cond + +do.cond: ; preds = %land.lhs.true, %exit, %do.body + br i1 undef, label %do.end, label %do.body + +do.end: ; preds = %do.cond + br label %return + +return: ; preds = %do.end, %land.lhs.true, %entry + %retval.0 = phi i32 [ 0, %do.end ], [ 0, %entry ], [ %tmp7.i, %land.lhs.true ] + ret i32 %retval.0 +} diff --git a/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll b/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll new file mode 100644 index 00000000000..c1221f595ac --- /dev/null +++ b/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll @@ -0,0 +1,62 @@ +; RUN: opt -S < %s -instcombine -inline -jump-threading -loop-unroll -unroll-count=4 | FileCheck %s +; +; This is a test case that required a number of setup passes because +; it depends on block order. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.6.8" + +declare i1 @check() nounwind +declare i32 @getval() nounwind + +; Check that the loop exit merges values from all the iterations. This +; could be a tad fragile, but it's a good test. +; +; CHECK: @foo +; CHECK: return: +; CHECK: %retval.0 = phi i32 [ %tmp7.i, %land.lhs.true ], [ 0, %do.cond ], [ %tmp7.i.1, %land.lhs.true.1 ], [ 0, %do.cond.1 ], [ %tmp7.i.2, %land.lhs.true.2 ], [ 0, %do.cond.2 ], [ %tmp7.i.3, %land.lhs.true.3 ], [ 0, %do.cond.3 ] +; CHECK-NOT: @bar +; CHECK: bar.exit.3 +define i32 @foo() uwtable ssp align 2 { +entry: + br i1 undef, label %return, label %if.end + +if.end: ; preds = %entry + %call2 = call i32 @getval() + br label %do.body + +do.body: ; preds = %do.cond, %if.end + %call6 = call i32 @bar() + %cmp = icmp ne i32 %call6, 0 + br i1 %cmp, label %land.lhs.true, label %do.cond + +land.lhs.true: ; preds = %do.body + %call10 = call i32 @getval() + %cmp11 = icmp eq i32 0, %call10 + br i1 %cmp11, label %return, label %do.cond + +do.cond: ; preds = %land.lhs.true, %do.body + %cmp18 = icmp sle i32 0, %call2 + br i1 %cmp18, label %do.body, label %return + +return: ; preds = %do.cond, %land.lhs.true, %entry + %retval.0 = phi i32 [ 0, %entry ], [ %call6, %land.lhs.true ], [ 0, %do.cond ] + ret i32 %retval.0 +} + +define linkonce_odr i32 @bar() nounwind uwtable ssp align 2 { +entry: + br i1 undef, label %land.lhs.true, label %cond.end + +land.lhs.true: ; preds = %entry + %cmp4 = call zeroext i1 @check() + br i1 %cmp4, label %cond.true, label %cond.end + +cond.true: ; preds = %land.lhs.true + %tmp7 = call i32 @getval() + br label %cond.end + +cond.end: ; preds = %cond.true, %land.lhs.true, %entry + %cond = phi i32 [ %tmp7, %cond.true ], [ 0, %land.lhs.true ], [ 0, %entry ] + ret i32 %cond +}