Teach LoopSimplify how to merge multiple loop exits into a single exit,
authorDan Gohman <gohman@apple.com>
Sat, 27 Jun 2009 21:30:38 +0000 (21:30 +0000)
committerDan Gohman <gohman@apple.com>
Sat, 27 Jun 2009 21:30:38 +0000 (21:30 +0000)
when one of them can be converted to a trivial icmp and conditional
branch.

This addresses what is essentially a phase ordering problem.
SimplifyCFG knows how to do this transformation, but it doesn't do so
if the primary block has any instructions in it other than an icmp and
a branch. In the given testcase, the block contains other instructions,
however they are loop-invariant and can be hoisted. SimplifyCFG doesn't
have LoopInfo though, so it can't hoist them. And, it's important that
the blocks be merged before LoopRotation, as it doesn't support
multiple-exit loops.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@74396 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Transforms/Utils/Local.h
lib/Transforms/Utils/LoopSimplify.cpp
lib/Transforms/Utils/SimplifyCFG.cpp
test/Transforms/LoopSimplify/merge-exits.ll [new file with mode: 0644]

index 98a68f6461bb40bdd0f6fb8e512bc407a12cc72c..dd423fa3b1734cd98f052a2ee1cd8c9c32ccb17f 100644 (file)
@@ -19,6 +19,7 @@ namespace llvm {
 
 class User;
 class BasicBlock;
+class BranchInst;
 class Instruction;
 class Value;
 class Pass;
@@ -94,6 +95,12 @@ void MergeBasicBlockIntoOnlyPred(BasicBlock *BB);
 ///
 bool SimplifyCFG(BasicBlock *BB);
 
+/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
+/// and if a predecessor branches to us and one of our successors, fold the
+/// setcc into the predecessor and use logical operations to pick the right
+/// destination.
+bool FoldBranchToCommonDest(BranchInst *BI);
+
 /// DemoteRegToStack - This function takes a virtual register computed by an
 /// Instruction and replaces it with a slot in the stack frame, allocated via
 /// alloca.  This allows the CFG to be changed around without fear of
index 03d273d25d791591bcab11f608e41c81bd6cc647..d03591081fb4e77d4deb681d60c7f61966625a75 100644 (file)
@@ -42,6 +42,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/ADT/SetOperations.h"
@@ -258,6 +259,80 @@ ReprocessLoop:
       PN->eraseFromParent();
     }
 
+  // If this loop has muliple exits and the exits all go to the same
+  // block, attempt to merge the exits. This helps several passes, such
+  // as LoopRotation, which do not support loops with multiple exits.
+  // SimplifyCFG also does this (and this code uses the same utility
+  // function), however this code is loop-aware, where SimplifyCFG is
+  // not. That gives it the advantage of being able to hoist
+  // loop-invariant instructions out of the way to open up more
+  // opportunities, and the disadvantage of having the responsibility
+  // to preserve dominator information.
+  if (ExitBlocks.size() > 1 && L->getUniqueExitBlock()) {
+    SmallVector<BasicBlock*, 8> ExitingBlocks;
+    L->getExitingBlocks(ExitingBlocks);
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+      BasicBlock *ExitingBlock = ExitingBlocks[i];
+      if (!ExitingBlock->getSinglePredecessor()) continue;
+      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+      if (!BI || !BI->isConditional()) continue;
+      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+      if (!CI || CI->getParent() != ExitingBlock) continue;
+
+      // Attempt to hoist out all instructions except for the
+      // comparison and the branch.
+      bool AllInvariant = true;
+      for (BasicBlock::iterator I = ExitingBlock->begin(),
+           E = ExitingBlock->end(); I != E; ) {
+        Instruction *Inst = I++;
+        if (Inst == BI || Inst == CI)
+          continue;
+        if (Inst->isTrapping()) {
+          AllInvariant = false;
+          break;
+        }
+        for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j)
+          if (!L->isLoopInvariant(Inst->getOperand(j))) {
+            AllInvariant = false;
+            break;
+          }
+        if (!AllInvariant)
+          break;
+        // Hoist.
+        Inst->moveBefore(L->getLoopPreheader()->getTerminator());
+      }
+      if (!AllInvariant) continue;
+
+      // The block has now been cleared of all instructions except for
+      // a comparison and a conditional branch. SimplifyCFG may be able
+      // to fold it now.
+      if (!FoldBranchToCommonDest(BI)) continue;
+
+      // Success. The block is now dead, so remove it from the loop,
+      // update the dominator tree and dominance frontier, and delete it.
+      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+      Changed = true;
+      L->removeBlockFromLoop(ExitingBlock);
+
+      DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
+      DomTreeNode *Node = DT->getNode(ExitingBlock);
+      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+        Node->getChildren();
+      for (unsigned k = 0, g = Children.size(); k != g; ++k) {
+        DT->changeImmediateDominator(Children[k], Node->getIDom());
+        if (DF) DF->changeImmediateDominator(Children[k]->getBlock(),
+                                             Node->getIDom()->getBlock(),
+                                             DT);
+      }
+      DT->eraseNode(ExitingBlock);
+      if (DF) DF->removeBlock(ExitingBlock);
+
+      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+      ExitingBlock->eraseFromParent();
+    }
+  }
+
   return Changed;
 }
 
index ee0f6a65de4eb8ecda215165e81ef39f38985c1d..58d4d5a344c1c99f9189e73470b37c5991ff86ca 100644 (file)
@@ -1492,7 +1492,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
 /// and if a predecessor branches to us and one of our successors, fold the
 /// setcc into the predecessor and use logical operations to pick the right
 /// destination.
-static bool FoldBranchToCommonDest(BranchInst *BI) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   BasicBlock *BB = BI->getParent();
   Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
   if (Cond == 0) return false;
diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll
new file mode 100644 (file)
index 0000000..c5bf7fd
--- /dev/null
@@ -0,0 +1,45 @@
+; RUN: llvm-as < %s | opt -loopsimplify -loop-rotate -instcombine -indvars \
+; RUN:  | llvm-dis > %t
+; RUN: not grep sext %t
+; RUN: grep {phi i64} %t | count 1
+
+; Loopsimplify should be able to merge the two loop exits
+; into one, so that loop rotate can rotate the loop, so
+; that indvars can promote the induction variable to i64
+; without needing casts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
+entry:
+       %t0 = load float* %peakWeight, align 4          ; <float> [#uses=1]
+       br label %bb1
+
+bb:            ; preds = %bb2
+       %t1 = sext i32 %hiPart.0 to i64         ; <i64> [#uses=1]
+       %t2 = getelementptr float* %pTmp1, i64 %t1              ; <float*> [#uses=1]
+       %t3 = load float* %t2, align 4          ; <float> [#uses=1]
+       %t4 = fadd float %t3, %distERBhi.0              ; <float> [#uses=1]
+       %t5 = add i32 %hiPart.0, 1              ; <i32> [#uses=2]
+       %t6 = sext i32 %t5 to i64               ; <i64> [#uses=1]
+       %t7 = getelementptr float* %peakWeight, i64 %t6         ; <float*> [#uses=1]
+       %t8 = load float* %t7, align 4          ; <float> [#uses=1]
+       %t9 = fadd float %t8, %peakCount.0              ; <float> [#uses=1]
+       br label %bb1
+
+bb1:           ; preds = %bb, %entry
+       %peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]          ; <float> [#uses=2]
+       %hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]         ; <i32> [#uses=3]
+       %distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]         ; <float> [#uses=3]
+       %t10 = fcmp uge float %distERBhi.0, 2.500000e+00                ; <i1> [#uses=1]
+       br i1 %t10, label %bb3, label %bb2
+
+bb2:           ; preds = %bb1
+       %t11 = add i32 %bandEdgeIndex, -1               ; <i32> [#uses=1]
+       %t12 = icmp sgt i32 %t11, %hiPart.0             ; <i1> [#uses=1]
+       br i1 %t12, label %bb, label %bb3
+
+bb3:           ; preds = %bb2, %bb1
+       %t13 = fdiv float %peakCount.0, %distERBhi.0            ; <float> [#uses=1]
+       ret float %t13
+}