From e8cb2ee1cd8ad7568a81f294ba09e0af2aff9e77 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Tue, 17 Jun 2014 21:55:43 +0000 Subject: [PATCH] [FastISel][X86] Optimize predicates and fold CMP instructions. This optimizes predicates for certain compares, such as fcmp oeq %x, %x to fcmp ord %x, %x. The latter one is more efficient to generate. The same optimization is applied to conditional branches. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211126 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 122 +++++- test/CodeGen/X86/fast-isel-cmp-branch3.ll | 470 ++++++++++++++++++++++ test/CodeGen/X86/fast-isel-cmp.ll | 417 +++++++++++++++++++ 3 files changed, 996 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/X86/fast-isel-cmp-branch3.ll diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 6a4a467f6a9..37174a14b29 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -155,6 +155,46 @@ private: } // end anonymous namespace. +static CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) { + // If both operands are the same, then try to optimize or fold the cmp. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (CI->getOperand(0) != CI->getOperand(1)) + return Predicate; + + switch (Predicate) { + default: llvm_unreachable("Invalid predicate!"); + case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break; + + case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break; + } + + return Predicate; +} + static std::pair getX86ConditonCode(CmpInst::Predicate Predicate) { X86::CondCode CC = X86::COND_INVALID; @@ -1048,21 +1088,61 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { if (!isTypeLegal(I->getOperand(0)->getType(), VT)) return false; + // Try to optimize or fold the cmp. + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + unsigned ResultReg = 0; + switch (Predicate) { + default: break; + case CmpInst::FCMP_FALSE: { + ResultReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0), + ResultReg); + ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, + X86::sub_8bit); + if (!ResultReg) + return false; + break; + } + case CmpInst::FCMP_TRUE: { + ResultReg = createResultReg(&X86::GR8RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), + ResultReg).addImm(1); + break; + } + } + + if (ResultReg) { + UpdateValueMap(I, ResultReg); + return true; + } + + const Value *LHS = CI->getOperand(0); + const Value *RHS = CI->getOperand(1); + + // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. + // We don't have to materialize a zero constant for this case and can just use + // %x again on the RHS. + if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { + const auto *RHSC = dyn_cast(RHS); + if (RHSC && RHSC->isNullValue()) + RHS = LHS; + } + // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. static unsigned SETFOpcTable[2][3] = { { X86::SETEr, X86::SETNPr, X86::AND8rr }, { X86::SETNEr, X86::SETPr, X86::OR8rr } }; unsigned *SETFOpc = nullptr; - switch (CI->getPredicate()) { + switch (Predicate) { default: break; case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break; case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break; } - unsigned ResultReg = createResultReg(&X86::GR8RegClass); + ResultReg = createResultReg(&X86::GR8RegClass); if (SETFOpc) { - if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) + if (!X86FastEmitCompare(LHS, RHS, VT)) return false; unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); @@ -1078,17 +1158,15 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { } X86::CondCode CC; - bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. - std::tie(CC, SwapArgs) = getX86ConditonCode(CI->getPredicate()); + bool SwapArgs; + std::tie(CC, SwapArgs) = getX86ConditonCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected conditon code."); unsigned Opc = X86::getSETFromCond(CC); - const Value *LHS = CI->getOperand(0); - const Value *RHS = CI->getOperand(1); if (SwapArgs) std::swap(LHS, RHS); - // Emit a compare of Op0/Op1. + // Emit a compare of LHS/RHS. if (!X86FastEmitCompare(LHS, RHS, VT)) return false; @@ -1162,8 +1240,28 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { if (CI->hasOneUse() && CI->getParent() == I->getParent()) { EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); + // Try to optimize or fold the cmp. + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + switch (Predicate) { + default: break; + case CmpInst::FCMP_FALSE: FastEmitBranch(FalseMBB, DbgLoc); return true; + case CmpInst::FCMP_TRUE: FastEmitBranch(TrueMBB, DbgLoc); return true; + } + + const Value *CmpLHS = CI->getOperand(0); + const Value *CmpRHS = CI->getOperand(1); + + // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, + // 0.0. + // We don't have to materialize a zero constant for this case and can just + // use %x again on the RHS. + if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { + const auto *CmpRHSC = dyn_cast(CmpRHS); + if (CmpRHSC && CmpRHSC->isNullValue()) + CmpRHS = CmpLHS; + } + // Try to take advantage of fallthrough opportunities. - CmpInst::Predicate Predicate = CI->getPredicate(); if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::getInversePredicate(Predicate); @@ -1186,14 +1284,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { } X86::CondCode CC; - bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. - unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA" + bool SwapArgs; + unsigned BranchOpc; std::tie(CC, SwapArgs) = getX86ConditonCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected conditon code."); BranchOpc = X86::GetCondBranchFromCond(CC); - const Value *CmpLHS = CI->getOperand(0); - const Value *CmpRHS = CI->getOperand(1); if (SwapArgs) std::swap(CmpLHS, CmpRHS); diff --git a/test/CodeGen/X86/fast-isel-cmp-branch3.ll b/test/CodeGen/X86/fast-isel-cmp-branch3.ll new file mode 100644 index 00000000000..a3f6851ca24 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-cmp-branch3.ll @@ -0,0 +1,470 @@ +; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s + +define i32 @fcmp_oeq1(float %x) { +; CHECK-LABEL: fcmp_oeq1 +; CHECK: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: jp {{LBB.+_1}} + %1 = fcmp oeq float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_oeq2(float %x) { +; CHECK-LABEL: fcmp_oeq2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jne {{LBB.+_1}} +; CHECK-NEXT: jnp {{LBB.+_2}} + %1 = fcmp oeq float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ogt1(float %x) { +; CHECK-LABEL: fcmp_ogt1 +; CHECK-NOT: ucomiss +; CHECK: movl $1, %eax + %1 = fcmp ogt float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ogt2(float %x) { +; CHECK-LABEL: fcmp_ogt2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jbe {{LBB.+_1}} + %1 = fcmp ogt float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_oge1(float %x) { +; CHECK-LABEL: fcmp_oge1 +; CHECK: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: jp {{LBB.+_1}} + %1 = fcmp oge float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_oge2(float %x) { +; CHECK-LABEL: fcmp_oge2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jb {{LBB.+_1}} + %1 = fcmp oge float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_olt1(float %x) { +; CHECK-LABEL: fcmp_olt1 +; CHECK-NOT: ucomiss +; CHECK: movl $1, %eax + %1 = fcmp olt float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_olt2(float %x) { +; CHECK-LABEL: fcmp_olt2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jbe {{LBB.+_1}} + %1 = fcmp olt float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ole1(float %x) { +; CHECK-LABEL: fcmp_ole1 +; CHECK: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: jp {{LBB.+_1}} + %1 = fcmp ole float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ole2(float %x) { +; CHECK-LABEL: fcmp_ole2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jb {{LBB.+_1}} + %1 = fcmp ole float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_one1(float %x) { +; CHECK-LABEL: fcmp_one1 +; CHECK-NOT: ucomiss +; CHECK: movl $1, %eax + %1 = fcmp one float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_one2(float %x) { +; CHECK-LABEL: fcmp_one2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: je {{LBB.+_1}} + %1 = fcmp one float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ord1(float %x) { +; CHECK-LABEL: fcmp_ord1 +; CHECK: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: jp {{LBB.+_1}} + %1 = fcmp ord float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ord2(float %x) { +; CHECK-LABEL: fcmp_ord2 +; CHECK: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: jp {{LBB.+_1}} + %1 = fcmp ord float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_uno1(float %x) { +; CHECK-LABEL: fcmp_uno1 +; CHECK: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: jp {{LBB.+_2}} + %1 = fcmp uno float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_uno2(float %x) { +; CHECK-LABEL: fcmp_uno2 +; CHECK: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: jp {{LBB.+_2}} + %1 = fcmp uno float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ueq1(float %x) { +; CHECK-LABEL: fcmp_ueq1 +; CHECK-NOT: ucomiss + %1 = fcmp ueq float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ueq2(float %x) { +; CHECK-LABEL: fcmp_ueq2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: je {{LBB.+_2}} + %1 = fcmp ueq float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ugt1(float %x) { +; CHECK-LABEL: fcmp_ugt1 +; CHECK: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: jnp {{LBB.+_1}} + %1 = fcmp ugt float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ugt2(float %x) { +; CHECK-LABEL: fcmp_ugt2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jae {{LBB.+_1}} + %1 = fcmp ugt float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_uge1(float %x) { +; CHECK-LABEL: fcmp_uge1 +; CHECK-NOT: ucomiss + %1 = fcmp uge float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_uge2(float %x) { +; CHECK-LABEL: fcmp_uge2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: ja {{LBB.+_1}} + %1 = fcmp uge float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ult1(float %x) { +; CHECK-LABEL: fcmp_ult1 +; CHECK: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: jnp {{LBB.+_1}} + %1 = fcmp ult float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ult2(float %x) { +; CHECK-LABEL: fcmp_ult2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jae {{LBB.+_1}} + %1 = fcmp ult float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ule1(float %x) { +; CHECK-LABEL: fcmp_ule1 +; CHECK-NOT: ucomiss + %1 = fcmp ule float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ule2(float %x) { +; CHECK-LABEL: fcmp_ule2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: ja {{LBB.+_1}} + %1 = fcmp ule float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_une1(float %x) { +; CHECK-LABEL: fcmp_une1 +; CHECK: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: jnp {{LBB.+_1}} + %1 = fcmp une float %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_une2(float %x) { +; CHECK-LABEL: fcmp_une2 +; CHECK: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jne {{LBB.+_2}} +; CHECK-NEXT: jp {{LBB.+_2}} +; CHECK-NEXT: jmp {{LBB.+_1}} + %1 = fcmp une float %x, 0.000000e+00 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq(i32 %x) { +; CHECK-LABEL: icmp_eq +; CHECK-NOT: cmpl +; CHECK: movl $0, %eax + %1 = icmp eq i32 %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_ne(i32 %x) { +; CHECK-LABEL: icmp_ne +; CHECK-NOT: cmpl +; CHECK: movl $1, %eax + %1 = icmp ne i32 %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_ugt(i32 %x) { +; CHECK-LABEL: icmp_ugt +; CHECK-NOT: cmpl +; CHECK: movl $1, %eax + %1 = icmp ugt i32 %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_uge(i32 %x) { +; CHECK-LABEL: icmp_uge +; CHECK-NOT: cmpl +; CHECK: movl $0, %eax + %1 = icmp uge i32 %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_ult(i32 %x) { +; CHECK-LABEL: icmp_ult +; CHECK-NOT: cmpl +; CHECK: movl $1, %eax + %1 = icmp ult i32 %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_ule(i32 %x) { +; CHECK-LABEL: icmp_ule +; CHECK-NOT: cmpl +; CHECK: movl $0, %eax + %1 = icmp ule i32 %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_sgt(i32 %x) { +; CHECK-LABEL: icmp_sgt +; CHECK-NOT: cmpl +; CHECK: movl $1, %eax + %1 = icmp sgt i32 %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_sge(i32 %x) { +; CHECK-LABEL: icmp_sge +; CHECK-NOT: cmpl +; CHECK: movl $0, %eax + %1 = icmp sge i32 %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_slt(i32 %x) { +; CHECK-LABEL: icmp_slt +; CHECK-NOT: cmpl +; CHECK: movl $1, %eax + %1 = icmp slt i32 %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_sle(i32 %x) { +; CHECK-LABEL: icmp_sle +; CHECK-NOT: cmpl +; CHECK: movl $0, %eax + %1 = icmp sle i32 %x, %x + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + diff --git a/test/CodeGen/X86/fast-isel-cmp.ll b/test/CodeGen/X86/fast-isel-cmp.ll index 61cc67a244e..1b72cfcde65 100644 --- a/test/CodeGen/X86/fast-isel-cmp.ll +++ b/test/CodeGen/X86/fast-isel-cmp.ll @@ -270,3 +270,420 @@ define zeroext i1 @icmp_sle(i32 %x, i32 %y) { ret i1 %1 } +; Test cmp folding and condition optimization. +define zeroext i1 @fcmp_oeq2(float %x) { +; SDAG-LABEL: fcmp_oeq2 +; SDAG: ucomiss %xmm0, %xmm0 +; SDAG-NEXT: setnp %al +; FAST-LABEL: fcmp_oeq2 +; FAST: ucomiss %xmm0, %xmm0 +; FAST-NEXT: setnp %al + %1 = fcmp oeq float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_oeq3(float %x) { +; SDAG-LABEL: fcmp_oeq3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: cmpeqss %xmm1, %xmm0 +; SDAG-NEXT: movd %xmm0, %eax +; SDAG-NEXT: andl $1, %eax +; FAST-LABEL: fcmp_oeq3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm1, %xmm0 +; FAST-NEXT: sete %al +; FAST-NEXT: setnp %cl +; FAST-NEXT: andb %al, %cl + %1 = fcmp oeq float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_ogt2(float %x) { +; SDAG-LABEL: fcmp_ogt2 +; SDAG: xorl %eax, %eax +; FAST-LABEL: fcmp_ogt2 +; FAST: xorl %eax, %eax + %1 = fcmp ogt float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_ogt3(float %x) { +; SDAG-LABEL: fcmp_ogt3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: ucomiss %xmm1, %xmm0 +; SDAG-NEXT: seta %al +; FAST-LABEL: fcmp_ogt3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm1, %xmm0 +; FAST-NEXT: seta %al + %1 = fcmp ogt float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_oge2(float %x) { +; SDAG-LABEL: fcmp_oge2 +; SDAG: ucomiss %xmm0, %xmm0 +; SDAG-NEXT: setnp %al +; FAST-LABEL: fcmp_oge2 +; FAST: ucomiss %xmm0, %xmm0 +; FAST-NEXT: setnp %al + %1 = fcmp oge float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_oge3(float %x) { +; SDAG-LABEL: fcmp_oge3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: ucomiss %xmm1, %xmm0 +; SDAG-NEXT: setae %al +; FAST-LABEL: fcmp_oge3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm1, %xmm0 +; FAST-NEXT: setae %al + %1 = fcmp oge float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_olt2(float %x) { +; SDAG-LABEL: fcmp_olt2 +; SDAG: xorl %eax, %eax +; FAST-LABEL: fcmp_olt2 +; FAST: xorl %eax, %eax + %1 = fcmp olt float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_olt3(float %x) { +; SDAG-LABEL: fcmp_olt3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: ucomiss %xmm0, %xmm1 +; SDAG-NEXT: seta %al +; FAST-LABEL: fcmp_olt3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm0, %xmm1 +; FAST-NEXT: seta %al + %1 = fcmp olt float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_ole2(float %x) { +; SDAG-LABEL: fcmp_ole2 +; SDAG: ucomiss %xmm0, %xmm0 +; SDAG-NEXT: setnp %al +; FAST-LABEL: fcmp_ole2 +; FAST: ucomiss %xmm0, %xmm0 +; FAST-NEXT: setnp %al + %1 = fcmp ole float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_ole3(float %x) { +; SDAG-LABEL: fcmp_ole3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: ucomiss %xmm0, %xmm1 +; SDAG-NEXT: setae %al +; FAST-LABEL: fcmp_ole3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm0, %xmm1 +; FAST-NEXT: setae %al + %1 = fcmp ole float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_one2(float %x) { +; SDAG-LABEL: fcmp_one2 +; SDAG: xorl %eax, %eax +; FAST-LABEL: fcmp_one2 +; FAST: xorl %eax, %eax + %1 = fcmp one float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_one3(float %x) { +; SDAG-LABEL: fcmp_one3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: ucomiss %xmm1, %xmm0 +; SDAG-NEXT: setne %al +; FAST-LABEL: fcmp_one3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm1, %xmm0 +; FAST-NEXT: setne %al + %1 = fcmp one float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_ord2(float %x) { +; SDAG-LABEL: fcmp_ord2 +; SDAG: ucomiss %xmm0, %xmm0 +; SDAG-NEXT: setnp %al +; FAST-LABEL: fcmp_ord2 +; FAST: ucomiss %xmm0, %xmm0 +; FAST-NEXT: setnp %al + %1 = fcmp ord float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_ord3(float %x) { +; SDAG-LABEL: fcmp_ord3 +; SDAG: ucomiss %xmm0, %xmm0 +; SDAG-NEXT: setnp %al +; FAST-LABEL: fcmp_ord3 +; FAST: ucomiss %xmm0, %xmm0 +; FAST-NEXT: setnp %al + %1 = fcmp ord float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_uno2(float %x) { +; SDAG-LABEL: fcmp_uno2 +; SDAG: ucomiss %xmm0, %xmm0 +; SDAG-NEXT: setp %al +; FAST-LABEL: fcmp_uno2 +; FAST: ucomiss %xmm0, %xmm0 +; FAST-NEXT: setp %al + %1 = fcmp uno float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_uno3(float %x) { +; SDAG-LABEL: fcmp_uno3 +; SDAG: ucomiss %xmm0, %xmm0 +; SDAG-NEXT: setp %al +; FAST-LABEL: fcmp_uno3 +; FAST: ucomiss %xmm0, %xmm0 +; FAST-NEXT: setp %al + %1 = fcmp uno float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_ueq2(float %x) { +; SDAG-LABEL: fcmp_ueq2 +; SDAG: movb $1, %al +; FAST-LABEL: fcmp_ueq2 +; FAST: movb $1, %al + %1 = fcmp ueq float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_ueq3(float %x) { +; SDAG-LABEL: fcmp_ueq3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: ucomiss %xmm1, %xmm0 +; SDAG-NEXT: sete %al +; FAST-LABEL: fcmp_ueq3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm1, %xmm0 +; FAST-NEXT: sete %al + %1 = fcmp ueq float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_ugt2(float %x) { +; SDAG-LABEL: fcmp_ugt2 +; SDAG: ucomiss %xmm0, %xmm0 +; SDAG-NEXT: setp %al +; FAST-LABEL: fcmp_ugt2 +; FAST: ucomiss %xmm0, %xmm0 +; FAST-NEXT: setp %al + %1 = fcmp ugt float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_ugt3(float %x) { +; SDAG-LABEL: fcmp_ugt3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: ucomiss %xmm0, %xmm1 +; SDAG-NEXT: setb %al +; FAST-LABEL: fcmp_ugt3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm0, %xmm1 +; FAST-NEXT: setb %al + %1 = fcmp ugt float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_uge2(float %x) { +; SDAG-LABEL: fcmp_uge2 +; SDAG: movb $1, %al +; FAST-LABEL: fcmp_uge2 +; FAST: movb $1, %al + %1 = fcmp uge float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_uge3(float %x) { +; SDAG-LABEL: fcmp_uge3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: ucomiss %xmm0, %xmm1 +; SDAG-NEXT: setbe %al +; FAST-LABEL: fcmp_uge3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm0, %xmm1 +; FAST-NEXT: setbe %al + %1 = fcmp uge float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_ult2(float %x) { +; SDAG-LABEL: fcmp_ult2 +; SDAG: ucomiss %xmm0, %xmm0 +; SDAG-NEXT: setp %al +; FAST-LABEL: fcmp_ult2 +; FAST: ucomiss %xmm0, %xmm0 +; FAST-NEXT: setp %al + %1 = fcmp ult float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_ult3(float %x) { +; SDAG-LABEL: fcmp_ult3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: ucomiss %xmm1, %xmm0 +; SDAG-NEXT: setb %al +; FAST-LABEL: fcmp_ult3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm1, %xmm0 +; FAST-NEXT: setb %al + %1 = fcmp ult float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_ule2(float %x) { +; SDAG-LABEL: fcmp_ule2 +; SDAG: movb $1, %al +; FAST-LABEL: fcmp_ule2 +; FAST: movb $1, %al + %1 = fcmp ule float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_ule3(float %x) { +; SDAG-LABEL: fcmp_ule3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: ucomiss %xmm1, %xmm0 +; SDAG-NEXT: setbe %al +; FAST-LABEL: fcmp_ule3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm1, %xmm0 +; FAST-NEXT: setbe %al + %1 = fcmp ule float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @fcmp_une2(float %x) { +; SDAG-LABEL: fcmp_une2 +; SDAG: ucomiss %xmm0, %xmm0 +; SDAG-NEXT: setp %al +; FAST-LABEL: fcmp_une2 +; FAST: ucomiss %xmm0, %xmm0 +; FAST-NEXT: setp %al + %1 = fcmp une float %x, %x + ret i1 %1 +} + +define zeroext i1 @fcmp_une3(float %x) { +; SDAG-LABEL: fcmp_une3 +; SDAG: xorps %xmm1, %xmm1 +; SDAG-NEXT: cmpneqss %xmm1, %xmm0 +; SDAG-NEXT: movd %xmm0, %eax +; SDAG-NEXT: andl $1, %eax +; FAST-LABEL: fcmp_une3 +; FAST: xorps %xmm1, %xmm1 +; FAST-NEXT: ucomiss %xmm1, %xmm0 +; FAST-NEXT: setne %al +; FAST-NEXT: setp %cl +; FAST-NEXT: orb %al, %cl + %1 = fcmp une float %x, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @icmp_eq2(i32 %x) { +; SDAG-LABEL: icmp_eq2 +; SDAG: movb $1, %al +; FAST-LABEL: icmp_eq2 +; FAST: movb $1, %al + %1 = icmp eq i32 %x, %x + ret i1 %1 +} + +define zeroext i1 @icmp_ne2(i32 %x) { +; SDAG-LABEL: icmp_ne2 +; SDAG: xorl %eax, %eax +; FAST-LABEL: icmp_ne2 +; FAST: xorl %eax, %eax + %1 = icmp ne i32 %x, %x + ret i1 %1 +} + +define zeroext i1 @icmp_ugt2(i32 %x) { +; SDAG-LABEL: icmp_ugt2 +; SDAG: xorl %eax, %eax +; FAST-LABEL: icmp_ugt2 +; FAST: xorl %eax, %eax + %1 = icmp ugt i32 %x, %x + ret i1 %1 +} + +define zeroext i1 @icmp_uge2(i32 %x) { +; SDAG-LABEL: icmp_uge2 +; SDAG: movb $1, %al +; FAST-LABEL: icmp_uge2 +; FAST: movb $1, %al + %1 = icmp uge i32 %x, %x + ret i1 %1 +} + +define zeroext i1 @icmp_ult2(i32 %x) { +; SDAG-LABEL: icmp_ult2 +; SDAG: xorl %eax, %eax +; FAST-LABEL: icmp_ult2 +; FAST: xorl %eax, %eax + %1 = icmp ult i32 %x, %x + ret i1 %1 +} + +define zeroext i1 @icmp_ule2(i32 %x) { +; SDAG-LABEL: icmp_ule2 +; SDAG: movb $1, %al +; FAST-LABEL: icmp_ule2 +; FAST: movb $1, %al + %1 = icmp ule i32 %x, %x + ret i1 %1 +} + +define zeroext i1 @icmp_sgt2(i32 %x) { +; SDAG-LABEL: icmp_sgt2 +; SDAG: xorl %eax, %eax +; FAST-LABEL: icmp_sgt2 +; FAST: xorl %eax, %eax + %1 = icmp sgt i32 %x, %x + ret i1 %1 +} + +define zeroext i1 @icmp_sge2(i32 %x) { +; SDAG-LABEL: icmp_sge2 +; SDAG: movb $1, %al +; FAST-LABEL: icmp_sge2 +; FAST: movb $1, %al + %1 = icmp sge i32 %x, %x + ret i1 %1 +} + +define zeroext i1 @icmp_slt2(i32 %x) { +; SDAG-LABEL: icmp_slt2 +; SDAG: xorl %eax, %eax +; FAST-LABEL: icmp_slt2 +; FAST: xorl %eax, %eax + %1 = icmp slt i32 %x, %x + ret i1 %1 +} + +define zeroext i1 @icmp_sle2(i32 %x) { +; SDAG-LABEL: icmp_sle2 +; SDAG: movb $1, %al +; FAST-LABEL: icmp_sle2 +; FAST: movb $1, %al + %1 = icmp sle i32 %x, %x + ret i1 %1 +} + -- 2.34.1