From 670e53977bf289009bb460538987542c9c46ac90 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 21 Sep 2009 18:03:22 +0000 Subject: [PATCH] Recognize SSE min and max opportunities in even more cases. And fix a bug with the behavior of min/max instructions formed from fcmp uge comparisons. Also, use FiniteOnlyFPMath() for this code instead of UnsafeFPMath, as it is more specific. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82466 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 156 +++++++-- .../X86/scalar-min-max-fill-operand.ll | 6 +- test/CodeGen/X86/sse-minmax.ll | 321 +++++++++++++++++- 3 files changed, 442 insertions(+), 41 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1ce51377992..fe94418d106 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8256,76 +8256,158 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); - // If we have SSE[12] support, try to form min/max nodes. + // If we have SSE[12] support, try to form min/max nodes. SSE min/max + // instructions have the peculiarity that if either operand is a NaN, + // they chose what we call the RHS operand (and as such are not symmetric). + // It happens that this matches the semantics of the common C idiom + // xhasSSE2() && (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && Cond.getOpcode() == ISD::SETCC) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); unsigned Opcode = 0; + // Check for x CC y ? x : y. if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { switch (CC) { default: break; - case ISD::SETOLE: // (X <= Y) ? X : Y -> min + case ISD::SETULT: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMIN; + break; + case ISD::SETOLE: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMIN; + break; case ISD::SETULE: - case ISD::SETLE: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min + // This can be a min, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOLT: case ISD::SETLT: + case ISD::SETLE: Opcode = X86ISD::FMIN; break; - case ISD::SETOGT: // (X > Y) ? X : Y -> max + case ISD::SETOGE: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMAX; + break; case ISD::SETUGT: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMAX; + break; + case ISD::SETUGE: + // This can be a max, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOGT: case ISD::SETGT: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max case ISD::SETGE: Opcode = X86ISD::FMAX; break; } + // Check for x CC y ? y : x -- a min/max with reversed arms. } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { switch (CC) { default: break; - case ISD::SETOGT: - // This can use a min only if the LHS isn't NaN. - if (DAG.isKnownNeverNaN(LHS)) - Opcode = X86ISD::FMIN; - else if (DAG.isKnownNeverNaN(RHS)) { - Opcode = X86ISD::FMIN; - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); + case ISD::SETOGE: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; } + Opcode = X86ISD::FMIN; break; - - case ISD::SETUGT: // (X > Y) ? Y : X -> min + case ISD::SETUGT: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMIN; + break; + case ISD::SETUGE: + // This can be a min, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOGT: case ISD::SETGT: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min case ISD::SETGE: Opcode = X86ISD::FMIN; break; - case ISD::SETULE: - // This can use a max only if the LHS isn't NaN. - if (DAG.isKnownNeverNaN(LHS)) - Opcode = X86ISD::FMAX; - else if (DAG.isKnownNeverNaN(RHS)) { - Opcode = X86ISD::FMAX; - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); + case ISD::SETULT: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; } + Opcode = X86ISD::FMAX; break; - - case ISD::SETOLE: // (X <= Y) ? Y : X -> max - case ISD::SETLE: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max + case ISD::SETOLE: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMAX; + break; + case ISD::SETULE: + // This can be a max, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOLT: case ISD::SETLT: + case ISD::SETLE: Opcode = X86ISD::FMAX; break; } diff --git a/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/test/CodeGen/X86/scalar-min-max-fill-operand.ll index bda50ccd58e..fe40758d8ec 100644 --- a/test/CodeGen/X86/scalar-min-max-fill-operand.ll +++ b/test/CodeGen/X86/scalar-min-max-fill-operand.ll @@ -4,17 +4,17 @@ declare float @bar() -define float @foo(float %a) +define float @foo(float %a) nounwind { %s = call float @bar() %t = fcmp olt float %s, %a %u = select i1 %t, float %s, float %a ret float %u } -define float @hem(float %a) +define float @hem(float %a) nounwind { %s = call float @bar() - %t = fcmp uge float %s, %a + %t = fcmp ogt float %s, %a %u = select i1 %t, float %s, float %a ret float %u } diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index 99528342536..17ffb5e464a 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -1,4 +1,323 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s + +; Some of these patterns can be matched as SSE min or max. Some of +; then can be matched provided that the operands are swapped. +; Some of them can't be matched at all and require a comparison +; and a conditional branch. + +; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse} +; x_ : use 0.0 instead of %y +; _inverse : swap the arms of the select. + +; CHECK: ogt: +; CHECK-NEXT: maxsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @ogt(double %x, double %y) nounwind { + %c = fcmp ogt double %x, %y + %d = select i1 %c, double %x, double %y + ret double %d +} + +; CHECK: olt: +; CHECK-NEXT: minsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @olt(double %x, double %y) nounwind { + %c = fcmp olt double %x, %y + %d = select i1 %c, double %x, double %y + ret double %d +} + +; CHECK: ogt_inverse: +; CHECK-NEXT: minsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @ogt_inverse(double %x, double %y) nounwind { + %c = fcmp ogt double %x, %y + %d = select i1 %c, double %y, double %x + ret double %d +} + +; CHECK: olt_inverse: +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @olt_inverse(double %x, double %y) nounwind { + %c = fcmp olt double %x, %y + %d = select i1 %c, double %y, double %x + ret double %d +} + +; CHECK: oge: +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +define double @oge(double %x, double %y) nounwind { + %c = fcmp oge double %x, %y + %d = select i1 %c, double %x, double %y + ret double %d +} + +; CHECK: ole: +; CHECK-NEXT: ucomisd %xmm0, %xmm1 +define double @ole(double %x, double %y) nounwind { + %c = fcmp ole double %x, %y + %d = select i1 %c, double %x, double %y + ret double %d +} + +; CHECK: oge_inverse: +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +define double @oge_inverse(double %x, double %y) nounwind { + %c = fcmp oge double %x, %y + %d = select i1 %c, double %y, double %x + ret double %d +} + +; CHECK: ole_inverse: +; CHECK-NEXT: ucomisd %xmm0, %xmm1 +define double @ole_inverse(double %x, double %y) nounwind { + %c = fcmp ole double %x, %y + %d = select i1 %c, double %y, double %x + ret double %d +} + +; CHECK: x_ogt: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_ogt(double %x) nounwind { + %c = fcmp ogt double %x, 0.000000e+00 + %d = select i1 %c, double %x, double 0.000000e+00 + ret double %d +} + +; CHECK: x_olt: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_olt(double %x) nounwind { + %c = fcmp olt double %x, 0.000000e+00 + %d = select i1 %c, double %x, double 0.000000e+00 + ret double %d +} + +; CHECK: x_ogt_inverse: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_ogt_inverse(double %x) nounwind { + %c = fcmp ogt double %x, 0.000000e+00 + %d = select i1 %c, double 0.000000e+00, double %x + ret double %d +} + +; CHECK: x_olt_inverse: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_olt_inverse(double %x) nounwind { + %c = fcmp olt double %x, 0.000000e+00 + %d = select i1 %c, double 0.000000e+00, double %x + ret double %d +} + +; CHECK: x_oge: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_oge(double %x) nounwind { + %c = fcmp oge double %x, 0.000000e+00 + %d = select i1 %c, double %x, double 0.000000e+00 + ret double %d +} + +; CHECK: x_ole: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_ole(double %x) nounwind { + %c = fcmp ole double %x, 0.000000e+00 + %d = select i1 %c, double %x, double 0.000000e+00 + ret double %d +} + +; CHECK: x_oge_inverse: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_oge_inverse(double %x) nounwind { + %c = fcmp oge double %x, 0.000000e+00 + %d = select i1 %c, double 0.000000e+00, double %x + ret double %d +} + +; CHECK: x_ole_inverse: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_ole_inverse(double %x) nounwind { + %c = fcmp ole double %x, 0.000000e+00 + %d = select i1 %c, double 0.000000e+00, double %x + ret double %d +} + +; CHECK: ugt: +; CHECK-NEXT: ucomisd %xmm0, %xmm1 +define double @ugt(double %x, double %y) nounwind { + %c = fcmp ugt double %x, %y + %d = select i1 %c, double %x, double %y + ret double %d +} + +; CHECK: ult: +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +define double @ult(double %x, double %y) nounwind { + %c = fcmp ult double %x, %y + %d = select i1 %c, double %x, double %y + ret double %d +} + +; CHECK: ugt_inverse: +; CHECK-NEXT: ucomisd %xmm0, %xmm1 +define double @ugt_inverse(double %x, double %y) nounwind { + %c = fcmp ugt double %x, %y + %d = select i1 %c, double %y, double %x + ret double %d +} + +; CHECK: ult_inverse: +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +define double @ult_inverse(double %x, double %y) nounwind { + %c = fcmp ult double %x, %y + %d = select i1 %c, double %y, double %x + ret double %d +} + +; CHECK: uge: +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @uge(double %x, double %y) nounwind { + %c = fcmp uge double %x, %y + %d = select i1 %c, double %x, double %y + ret double %d +} + +; CHECK: ule: +; CHECK-NEXT: minsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @ule(double %x, double %y) nounwind { + %c = fcmp ule double %x, %y + %d = select i1 %c, double %x, double %y + ret double %d +} + +; CHECK: uge_inverse: +; CHECK-NEXT: minsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @uge_inverse(double %x, double %y) nounwind { + %c = fcmp uge double %x, %y + %d = select i1 %c, double %y, double %x + ret double %d +} + +; CHECK: ule_inverse: +; CHECK-NEXT: maxsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @ule_inverse(double %x, double %y) nounwind { + %c = fcmp ule double %x, %y + %d = select i1 %c, double %y, double %x + ret double %d +} + +; CHECK: x_ugt: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_ugt(double %x) nounwind { + %c = fcmp ugt double %x, 0.000000e+00 + %d = select i1 %c, double %x, double 0.000000e+00 + ret double %d +} + +; CHECK: x_ult: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_ult(double %x) nounwind { + %c = fcmp ult double %x, 0.000000e+00 + %d = select i1 %c, double %x, double 0.000000e+00 + ret double %d +} + +; CHECK: x_ugt_inverse: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_ugt_inverse(double %x) nounwind { + %c = fcmp ugt double %x, 0.000000e+00 + %d = select i1 %c, double 0.000000e+00, double %x + ret double %d +} + +; CHECK: x_ult_inverse: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_ult_inverse(double %x) nounwind { + %c = fcmp ult double %x, 0.000000e+00 + %d = select i1 %c, double 0.000000e+00, double %x + ret double %d +} + +; CHECK: x_uge: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_uge(double %x) nounwind { + %c = fcmp uge double %x, 0.000000e+00 + %d = select i1 %c, double %x, double 0.000000e+00 + ret double %d +} + +; CHECK: x_ule: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_ule(double %x) nounwind { + %c = fcmp ule double %x, 0.000000e+00 + %d = select i1 %c, double %x, double 0.000000e+00 + ret double %d +} + +; CHECK: x_uge_inverse: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_uge_inverse(double %x) nounwind { + %c = fcmp uge double %x, 0.000000e+00 + %d = select i1 %c, double 0.000000e+00, double %x + ret double %d +} + +; CHECK: x_ule_inverse: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm1, %xmm0 +; CHECK-NEXT: ret +define double @x_ule_inverse(double %x) nounwind { + %c = fcmp ule double %x, 0.000000e+00 + %d = select i1 %c, double 0.000000e+00, double %x + ret double %d +} + +; Test a few more misc. cases. ; CHECK: clampTo3k_a: ; CHECK: minsd -- 2.34.1