Handle some non-exit blocks in tail merging.
authorDale Johannesen <dalej@apple.com>
Mon, 7 May 2007 20:57:21 +0000 (20:57 +0000)
committerDale Johannesen <dalej@apple.com>
Mon, 7 May 2007 20:57:21 +0000 (20:57 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36907 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/BranchFolding.cpp
test/CodeGen/Generic/2007-05-07-tailmerge-1.c [new file with mode: 0644]

index 2abeb2d31b2d0cac6aca985b4edfe7663dafd04a..9faed69b1423abc8828147eee3ec63ab3739c280 100644 (file)
@@ -50,11 +50,14 @@ namespace {
   private:
     // Tail Merging.
     bool TailMergeBlocks(MachineFunction &MF);
+    bool TryMergeBlocks(MachineBasicBlock* SuccBB,
+                        MachineBasicBlock* PredBB);
     void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                  MachineBasicBlock *NewDest);
     MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
                                   MachineBasicBlock::iterator BBI1);
 
+    std::vector<std::pair<unsigned,MachineBasicBlock*> > MergePotentials;
     const MRegisterInfo *RegInfo;
     RegScavenger *RS;
     // Branch optzn.
@@ -346,18 +349,18 @@ static bool ShouldSplitFirstBlock(MachineBasicBlock *MBB1,
   return MBB1Time < MBB2Time;
 }
 
-bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged.  If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).  
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
+                                  MachineBasicBlock* PredBB) {
   MadeChange = false;
   
-  if (!EnableTailMerge) return false;
-  
-  // Find blocks with no successors.
-  std::vector<std::pair<unsigned,MachineBasicBlock*> > MergePotentials;
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
-    if (I->succ_empty())
-      MergePotentials.push_back(std::make_pair(HashEndOfMBB(I), I));
-  }
-  
   // Sort by hash value so that blocks with identical end sequences sort
   // together.
   std::stable_sort(MergePotentials.begin(), MergePotentials.end());
@@ -371,6 +374,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
     // If there is nothing that matches the hash of the current basic block,
     // give up.
     if (CurHash != PrevHash) {
+      if (SuccBB && CurMBB != PredBB)
+        TII->InsertBranch(*CurMBB, SuccBB, NULL, std::vector<MachineOperand>());
       MergePotentials.pop_back();
       continue;
     }
@@ -401,6 +406,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
       // If we didn't find anything that has at least two instructions matching
       // this one, bail out.
       if (FoundMatch == ~0U) {
+        // Put the unconditional branch back, if we need one.
+        if (SuccBB && CurMBB != PredBB)
+          TII->InsertBranch(*CurMBB, SuccBB, NULL, std::vector<MachineOperand>());
         MergePotentials.pop_back();
         continue;
       }
@@ -445,10 +453,72 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
     }
     MadeChange = true;
   }
-  
   return MadeChange;
 }
 
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+  MadeChange = false;
+  
+  if (!EnableTailMerge) return false;
+  
+  // First find blocks with no successors.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    if (I->succ_empty())
+      MergePotentials.push_back(std::make_pair(HashEndOfMBB(I), I));
+  }
+  // See if we can do any crossjumping on those.
+  MadeChange |= TryMergeBlocks(NULL, NULL);
+
+  MergePotentials.clear();
+  // Look at blocks with two predecessors, where each predecessor has either:
+  // - a single successor, or
+  // - two successors, where successor I is reached either by ubr or fallthrough.
+  // The two-successor case where successor I is reached by cbr
+  // from both blocks is handled by the preceding case (when we consider the
+  // other, fallthough block).
+  // FIXME:  The two-successor case where I is reached by cbr
+  // from one block, and by fallthrough/ubr from the other, is not handled yet.
+  // Beware that sometimes blocks are in the predecessor list, but can't really 
+  // jump to the "successor" we're looking at.  Tolerate this.
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    if (!I->succ_empty() && I->pred_size() >= 2) {
+      MachineBasicBlock *IBB = I;
+      MachineBasicBlock *PredBB = prior(I);
+      for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end(); 
+           P != E2; ++P) {
+        MachineBasicBlock* PBB = *P;
+        MachineBasicBlock *TBB = 0, *FBB = 0;
+        std::vector<MachineOperand> Cond;
+        // Remove the unconditional branch at the end, if any.
+        if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond) &&
+            ((!FBB && Cond.size()==0) ||    // single successor
+             (!FBB && TBB!=IBB) ||          // cbr elsewhere, fallthrough to I
+             (FBB && FBB==IBB))) {          // cbr then branch to I
+          if (TBB) {
+            TII->RemoveBranch(*PBB);
+            if (TBB!=IBB)
+              // reinsert conditional branch only, for now
+              TII->InsertBranch(*PBB, TBB, 0, Cond);
+          }
+          MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB), *P));
+        }
+      }
+    if (MergePotentials.size() >= 2)
+      MadeChange |= TryMergeBlocks(I, PredBB);
+    // Reinsert an unconditional branch if needed.
+    // The 1 below can be either an original single predecessor, or a result
+    // of removing blocks in TryMergeBlocks.
+    if (MergePotentials.size()==1 && 
+        (MergePotentials.begin())->second != PredBB)
+      TII->InsertBranch(*((MergePotentials.begin())->second), I, NULL, 
+                        std::vector<MachineOperand>());
+    MergePotentials.clear();
+    }
+  }
+
+  return MadeChange;
+}
 
 //===----------------------------------------------------------------------===//
 //  Branch Optimization
diff --git a/test/CodeGen/Generic/2007-05-07-tailmerge-1.c b/test/CodeGen/Generic/2007-05-07-tailmerge-1.c
new file mode 100644 (file)
index 0000000..15efd60
--- /dev/null
@@ -0,0 +1,65 @@
+; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*baz | wc -l | grep 1
+; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*quux | wc -l | grep 1
+; Check that calls to baz and quux are tail-merged.
+
+; ModuleID = 'tail.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @f(i32 %i, i32 %q) {
+entry:
+       %i_addr = alloca i32            ; <i32*> [#uses=2]
+       %q_addr = alloca i32            ; <i32*> [#uses=2]
+       %retval = alloca i32, align 4           ; <i32*> [#uses=1]
+       "alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
+       store i32 %i, i32* %i_addr
+       store i32 %q, i32* %q_addr
+       %tmp = load i32* %i_addr                ; <i32> [#uses=1]
+       %tmp1 = icmp ne i32 %tmp, 0             ; <i1> [#uses=1]
+       %tmp12 = zext i1 %tmp1 to i8            ; <i8> [#uses=1]
+       %toBool = icmp ne i8 %tmp12, 0          ; <i1> [#uses=1]
+       br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true:             ; preds = %entry
+       %tmp3 = call i32 (...)* @bar( )         ; <i32> [#uses=0]
+       %tmp4 = call i32 (...)* @baz( i32 5, i32 6 )            ; <i32> [#uses=0]
+       br label %cond_next
+
+cond_false:            ; preds = %entry
+       %tmp5 = call i32 (...)* @foo( )         ; <i32> [#uses=0]
+       %tmp6 = call i32 (...)* @baz( i32 5, i32 6 )            ; <i32> [#uses=0]
+       br label %cond_next
+
+cond_next:             ; preds = %cond_false, %cond_true
+       %tmp7 = load i32* %q_addr               ; <i32> [#uses=1]
+       %tmp8 = icmp ne i32 %tmp7, 0            ; <i1> [#uses=1]
+       %tmp89 = zext i1 %tmp8 to i8            ; <i8> [#uses=1]
+       %toBool10 = icmp ne i8 %tmp89, 0                ; <i1> [#uses=1]
+       br i1 %toBool10, label %cond_true11, label %cond_false15
+
+cond_true11:           ; preds = %cond_next
+       %tmp13 = call i32 (...)* @foo( )                ; <i32> [#uses=0]
+       %tmp14 = call i32 (...)* @quux( i32 3, i32 4 )          ; <i32> [#uses=0]
+       br label %cond_next18
+
+cond_false15:          ; preds = %cond_next
+       %tmp16 = call i32 (...)* @bar( )                ; <i32> [#uses=0]
+       %tmp17 = call i32 (...)* @quux( i32 3, i32 4 )          ; <i32> [#uses=0]
+       br label %cond_next18
+
+cond_next18:           ; preds = %cond_false15, %cond_true11
+       %tmp19 = call i32 (...)* @bar( )                ; <i32> [#uses=0]
+       br label %return
+
+return:                ; preds = %cond_next18
+       %retval20 = load i32* %retval           ; <i32> [#uses=1]
+       ret i32 %retval20
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+declare i32 @foo(...)
+
+declare i32 @quux(...)