Optimize conditional branches in X86FastISel. This replaces
authorDan Gohman <gohman@apple.com>
Thu, 2 Oct 2008 22:15:21 +0000 (22:15 +0000)
committerDan Gohman <gohman@apple.com>
Thu, 2 Oct 2008 22:15:21 +0000 (22:15 +0000)
sequences like this:
       sete    %al
       testb   %al, %al
       jne     LBB11_1
with this:
       je      LBB11_1

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56969 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/CodeGen/FastISel.h
lib/CodeGen/SelectionDAG/FastISel.cpp
lib/Target/X86/X86FastISel.cpp

index 9d33340de7805fc521a30af796489d655cd1d198..2b112c878c8a81375bc2e23cac1193616bfe0c56 100644 (file)
@@ -238,6 +238,11 @@ protected:
   /// from a specified index of a superregister.
   unsigned FastEmitInst_extractsubreg(unsigned Op0, uint32_t Idx);
 
+  /// FastEmitBranch - Emit an unconditional branch to the given block,
+  /// unless it is the immediate (fall-through) successor, and update
+  /// the CFG.
+  void FastEmitBranch(MachineBasicBlock *MBB);
+
   void UpdateValueMap(Value* I, unsigned Reg);
 
   unsigned createResultReg(const TargetRegisterClass *RC);
index 9f70bc998c905421112524920459d58300b81864..c0e8418c4c5853900436bc1a5b8b8a59c5a85de7 100644 (file)
@@ -461,6 +461,23 @@ FastISel::SelectInstruction(Instruction *I) {
   return SelectOperator(I, I->getOpcode());
 }
 
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
+  MachineFunction::iterator NextMBB =
+     next(MachineFunction::iterator(MBB));
+
+  if (MBB->isLayoutSuccessor(MSucc)) {
+    // The unconditional fall-through case, which needs no instructions.
+  } else {
+    // The unconditional branch case.
+    TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>());
+  }
+  MBB->addSuccessor(MSucc);
+}
+
 bool
 FastISel::SelectOperator(User *I, unsigned Opcode) {
   switch (Opcode) {
@@ -508,18 +525,9 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
     BranchInst *BI = cast<BranchInst>(I);
 
     if (BI->isUnconditional()) {
-      MachineFunction::iterator NextMBB =
-         next(MachineFunction::iterator(MBB));
       BasicBlock *LLVMSucc = BI->getSuccessor(0);
       MachineBasicBlock *MSucc = MBBMap[LLVMSucc];
-
-      if (NextMBB != MF.end() && MSucc == NextMBB) {
-        // The unconditional fall-through case, which needs no instructions.
-      } else {
-        // The unconditional branch case.
-        TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>());
-      }
-      MBB->addSuccessor(MSucc);
+      FastEmitBranch(MSucc);
       return true;
     }
 
index 3d5759001f07bc6627606971337cb620406bc4e8..68d0d85501eadff1ab1bd57938fd44735d95e5df 100644 (file)
@@ -90,6 +90,8 @@ private:
   bool X86SelectSelect(Instruction *I);
 
   bool X86SelectTrunc(Instruction *I);
+  unsigned X86ChooseCmpOpcode(MVT VT);
 
   bool X86SelectFPExt(Instruction *I);
   bool X86SelectFPTrunc(Instruction *I);
@@ -507,6 +509,19 @@ bool X86FastISel::X86SelectLoad(Instruction *I)  {
   return false;
 }
 
+unsigned X86FastISel::X86ChooseCmpOpcode(MVT VT) {
+  switch (VT.getSimpleVT()) {
+  case MVT::i8: return X86::CMP8rr;
+  case MVT::i16: return X86::CMP16rr;
+  case MVT::i32: return X86::CMP32rr;
+  case MVT::i64: return X86::CMP64rr;
+  case MVT::f32: return X86::UCOMISSrr;
+  case MVT::f64: return X86::UCOMISDrr;
+  default: break;
+  }
+  return 0;
+}
+
 bool X86FastISel::X86SelectCmp(Instruction *I) {
   CmpInst *CI = cast<CmpInst>(I);
 
@@ -519,16 +534,7 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
   unsigned Op1Reg = getRegForValue(CI->getOperand(1));
   if (Op1Reg == 0) return false;
 
-  unsigned Opc;
-  switch (VT.getSimpleVT()) {
-  case MVT::i8: Opc = X86::CMP8rr; break;
-  case MVT::i16: Opc = X86::CMP16rr; break;
-  case MVT::i32: Opc = X86::CMP32rr; break;
-  case MVT::i64: Opc = X86::CMP64rr; break;
-  case MVT::f32: Opc = X86::UCOMISSrr; break;
-  case MVT::f64: Opc = X86::UCOMISDrr; break;
-  default: return false;
-  }
+  unsigned Opc = X86ChooseCmpOpcode(VT);
 
   unsigned ResultReg = createResultReg(&X86::GR8RegClass);
   switch (CI->getPredicate()) {
@@ -661,19 +667,139 @@ bool X86FastISel::X86SelectZExt(Instruction *I) {
 }
 
 bool X86FastISel::X86SelectBranch(Instruction *I) {
-  BranchInst *BI = cast<BranchInst>(I);
   // Unconditional branches are selected by tablegen-generated code.
-  unsigned OpReg = getRegForValue(BI->getCondition());
-  if (OpReg == 0) return false;
+  // Handle a conditional branch.
+  BranchInst *BI = cast<BranchInst>(I);
   MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
   MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
 
+  // Fold the common case of a conditional branch with a comparison.
+  if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+    if (CI->hasOneUse()) {
+      MVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+      unsigned Opc = X86ChooseCmpOpcode(VT);
+      if (Opc == 0) return false;
+
+      // Try to take advantage of fallthrough opportunities.
+      CmpInst::Predicate Predicate = CI->getPredicate();
+      if (MBB->isLayoutSuccessor(TrueMBB)) {
+        std::swap(TrueMBB, FalseMBB);
+        Predicate = CmpInst::getInversePredicate(Predicate);
+      }
+
+      unsigned Op0Reg = getRegForValue(CI->getOperand(0));
+      if (Op0Reg == 0) return false;
+      unsigned Op1Reg = getRegForValue(CI->getOperand(1));
+      if (Op1Reg == 0) return false;
+      
+      switch (Predicate) {
+      case CmpInst::FCMP_OGT:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JA)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_OGE:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JAE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_OLT:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
+        BuildMI(MBB, TII.get(X86::JA)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_OLE:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
+        BuildMI(MBB, TII.get(X86::JAE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_ONE:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_ORD:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JNP)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_UNO:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JP)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_UEQ:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_UGT:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
+        BuildMI(MBB, TII.get(X86::JB)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_UGE:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
+        BuildMI(MBB, TII.get(X86::JBE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_ULT:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JB)).addMBB(TrueMBB);
+        break;
+      case CmpInst::FCMP_ULE:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JBE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::ICMP_EQ:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::ICMP_NE:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::ICMP_UGT:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JA)).addMBB(TrueMBB);
+        break;
+      case CmpInst::ICMP_UGE:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JAE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::ICMP_ULT:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JB)).addMBB(TrueMBB);
+        break;
+      case CmpInst::ICMP_ULE:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JBE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::ICMP_SGT:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JG)).addMBB(TrueMBB);
+        break;
+      case CmpInst::ICMP_SGE:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JGE)).addMBB(TrueMBB);
+        break;
+      case CmpInst::ICMP_SLT:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JL)).addMBB(TrueMBB);
+        break;
+      case CmpInst::ICMP_SLE:
+        BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
+        BuildMI(MBB, TII.get(X86::JLE)).addMBB(TrueMBB);
+        break;
+      default:
+        return false;
+      }
+      MBB->addSuccessor(TrueMBB);
+      FastEmitBranch(FalseMBB);
+      return true;
+    }
+  }
+
+  // Otherwise do a clumsy setcc and re-test it.
+  unsigned OpReg = getRegForValue(BI->getCondition());
+  if (OpReg == 0) return false;
+
   BuildMI(MBB, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
-  BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB);
-  BuildMI(MBB, TII.get(X86::JMP)).addMBB(FalseMBB);
 
+  BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB);
   MBB->addSuccessor(TrueMBB);
-  MBB->addSuccessor(FalseMBB);
+
+  FastEmitBranch(FalseMBB);
 
   return true;
 }