Micro-optimization:
authorBill Wendling <isanbard@gmail.com>
Mon, 12 Apr 2010 22:19:57 +0000 (22:19 +0000)
committerBill Wendling <isanbard@gmail.com>
Mon, 12 Apr 2010 22:19:57 +0000 (22:19 +0000)
If we have this situation:

    jCC  L1
    jmp  L2
L1:
  ...
L2:
  ...

We can get a small performance boost by emitting this instead:

    jnCC L2
L1:
  ...
L2:
  ...

This testcase shows an example of this:

float func(float x, float y) {
    double product = (double)x * y;
    if (product == 0.0)
        return product;
    return product - 1.0;
}

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@101075 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86InstrInfo.cpp
test/CodeGen/X86/brcond.ll

index e67bf3d7ad27ece6d6e325e5b12cc5a1cc12e484..094164ec7cf75706df3388d54be66079aecd5bf5 100644 (file)
@@ -1684,6 +1684,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   // Start from the bottom of the block and work up, examining the
   // terminator instructions.
   MachineBasicBlock::iterator I = MBB.end();
+  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
   while (I != MBB.begin()) {
     --I;
     if (I->isDebugValue())
@@ -1701,6 +1702,8 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // Handle unconditional branches.
     if (I->getOpcode() == X86::JMP_4) {
+      UnCondBrIter = I;
+
       if (!AllowModify) {
         TBB = I->getOperand(0).getMBB();
         continue;
@@ -1718,10 +1721,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
         TBB = 0;
         I->eraseFromParent();
         I = MBB.end();
+        UnCondBrIter = MBB.end();
         continue;
       }
 
-      // TBB is used to indicate the unconditinal destination.
+      // TBB is used to indicate the unconditional destination.
       TBB = I->getOperand(0).getMBB();
       continue;
     }
@@ -1733,7 +1737,48 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // Working from the bottom, handle the first conditional branch.
     if (Cond.empty()) {
+      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+      if (AllowModify && UnCondBrIter != MBB.end() &&
+          MBB.isLayoutSuccessor(TargetBB)) {
+        // If we can modify the code and it ends in something like:
+        //
+        //     jCC L1
+        //     jmp L2
+        //   L1:
+        //     ...
+        //   L2:
+        //
+        // Then we can change this to:
+        //
+        //     jnCC L2
+        //   L1:
+        //     ...
+        //   L2:
+        //
+        // Which is a bit more efficient.
+        // We conditionally jump to the fall-through block.
+        BranchCode = GetOppositeBranchCondition(BranchCode);
+        unsigned JNCC = GetCondBranchFromCond(BranchCode);
+        MachineBasicBlock::iterator OldInst = I;
+        --I;
+
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
+          .addMBB(UnCondBrIter->getOperand(0).getMBB());
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
+          .addMBB(TargetBB);
+        MBB.addSuccessor(TargetBB);
+
+        OldInst->eraseFromParent();
+        UnCondBrIter->eraseFromParent();
+
+        // Restart the analysis.
+        UnCondBrIter = MBB.end();
+        I = MBB.end();
+        continue;
+      }
+
       FBB = TBB;
+      TBB = TargetBB;
       TBB = I->getOperand(0).getMBB();
       Cond.push_back(MachineOperand::CreateImm(BranchCode));
       continue;
index 130483ad841095e73acbddf704f873ad7bb2a0d3..7e23896a24b110a65eccdcbf95f1bda2938edd7d 100644 (file)
@@ -67,3 +67,41 @@ return:                                           ; preds = %entry
 ; CHECK-NEXT:  orl     8(%esp), %eax
 ; CHECK-NEXT:  je      LBB3_2
 }
+
+; <rdar://problem/7598384>:
+;
+;    jCC  L1
+;    jmp  L2
+; L1:
+;   ...
+; L2:
+;   ...
+;
+; to:
+; 
+;    jnCC L2
+; L1:
+;   ...
+; L2:
+;   ...
+define float @test4(float %x, float %y) nounwind readnone optsize ssp {
+entry:
+  %0 = fpext float %x to double                   ; <double> [#uses=1]
+  %1 = fpext float %y to double                   ; <double> [#uses=1]
+  %2 = fmul double %0, %1                         ; <double> [#uses=3]
+  %3 = fcmp oeq double %2, 0.000000e+00           ; <i1> [#uses=1]
+  br i1 %3, label %bb2, label %bb1
+
+; CHECK:      jne
+; CHECK-NOT:  jmp
+; CHECK-NEXT: jnp
+
+bb1:                                              ; preds = %entry
+  %4 = fadd double %2, -1.000000e+00              ; <double> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %entry, %bb1
+  %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ] ; <double> [#uses=1]
+  %.0 = fptrunc double %.0.in to float            ; <float> [#uses=1]
+  ret float %.0
+}