From 670e53977bf289009bb460538987542c9c46ac90 Mon Sep 17 00:00:00 2001
From: Dan Gohman <gohman@apple.com>
Date: Mon, 21 Sep 2009 18:03:22 +0000
Subject: [PATCH] Recognize SSE min and max opportunities in even more cases.
 And fix a bug with the behavior of min/max instructions formed from fcmp uge
 comparisons.

Also, use FiniteOnlyFPMath() for this code instead of UnsafeFPMath,
as it is more specific.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82466 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp            | 156 +++++++--
 .../X86/scalar-min-max-fill-operand.ll        |   6 +-
 test/CodeGen/X86/sse-minmax.ll                | 321 +++++++++++++++++-
 3 files changed, 442 insertions(+), 41 deletions(-)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1ce51377992..fe94418d106 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8256,76 +8256,158 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
   SDValue LHS = N->getOperand(1);
   SDValue RHS = N->getOperand(2);
 
-  // If we have SSE[12] support, try to form min/max nodes.
+  // If we have SSE[12] support, try to form min/max nodes. SSE min/max
+  // instructions have the peculiarity that if either operand is a NaN,
+  // they chose what we call the RHS operand (and as such are not symmetric).
+  // It happens that this matches the semantics of the common C idiom
+  // x<y?x:y and related forms, so we can recognize these cases.
   if (Subtarget->hasSSE2() &&
       (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
       Cond.getOpcode() == ISD::SETCC) {
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
 
     unsigned Opcode = 0;
+    // Check for x CC y ? x : y.
     if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
       switch (CC) {
       default: break;
-      case ISD::SETOLE: // (X <= Y) ? X : Y -> min
+      case ISD::SETULT:
+        // This can be a min if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(RHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(LHS))
+            break;
+        }
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETOLE:
+        // This can be a min if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(LHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(RHS))
+            break;
+        }
+        Opcode = X86ISD::FMIN;
+        break;
       case ISD::SETULE:
-      case ISD::SETLE:
-        if (!UnsafeFPMath) break;
-        // FALL THROUGH.
-      case ISD::SETOLT:  // (X olt/lt Y) ? X : Y -> min
+        // This can be a min, but if either operand is a NaN we need it to
+        // preserve the original LHS.
+        std::swap(LHS, RHS);
+      case ISD::SETOLT:
       case ISD::SETLT:
+      case ISD::SETLE:
         Opcode = X86ISD::FMIN;
         break;
 
-      case ISD::SETOGT: // (X > Y) ? X : Y -> max
+      case ISD::SETOGE:
+        // This can be a max if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(LHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(RHS))
+            break;
+        }
+        Opcode = X86ISD::FMAX;
+        break;
       case ISD::SETUGT:
+        // This can be a max if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(RHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(LHS))
+            break;
+        }
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETUGE:
+        // This can be a max, but if either operand is a NaN we need it to
+        // preserve the original LHS.
+        std::swap(LHS, RHS);
+      case ISD::SETOGT:
       case ISD::SETGT:
-        if (!UnsafeFPMath) break;
-        // FALL THROUGH.
-      case ISD::SETUGE:  // (X uge/ge Y) ? X : Y -> max
       case ISD::SETGE:
         Opcode = X86ISD::FMAX;
         break;
       }
+    // Check for x CC y ? y : x -- a min/max with reversed arms.
     } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
       switch (CC) {
       default: break;
-      case ISD::SETOGT:
-        // This can use a min only if the LHS isn't NaN.
-        if (DAG.isKnownNeverNaN(LHS))
-          Opcode = X86ISD::FMIN;
-        else if (DAG.isKnownNeverNaN(RHS)) {
-          Opcode = X86ISD::FMIN;
-          // Put the potential NaN in the RHS so that SSE will preserve it.
-          std::swap(LHS, RHS);
+      case ISD::SETOGE:
+        // This can be a min if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(RHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(LHS))
+            break;
         }
+        Opcode = X86ISD::FMIN;
         break;
-
-      case ISD::SETUGT: // (X > Y) ? Y : X -> min
+      case ISD::SETUGT:
+        // This can be a min if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(LHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(RHS))
+            break;
+        }
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETUGE:
+        // This can be a min, but if either operand is a NaN we need it to
+        // preserve the original LHS.
+        std::swap(LHS, RHS);
+      case ISD::SETOGT:
       case ISD::SETGT:
-        if (!UnsafeFPMath) break;
-        // FALL THROUGH.
-      case ISD::SETUGE:  // (X uge/ge Y) ? Y : X -> min
       case ISD::SETGE:
         Opcode = X86ISD::FMIN;
         break;
 
-      case ISD::SETULE:
-        // This can use a max only if the LHS isn't NaN.
-        if (DAG.isKnownNeverNaN(LHS))
-          Opcode = X86ISD::FMAX;
-        else if (DAG.isKnownNeverNaN(RHS)) {
-          Opcode = X86ISD::FMAX;
-          // Put the potential NaN in the RHS so that SSE will preserve it.
-          std::swap(LHS, RHS);
+      case ISD::SETULT:
+        // This can be a max if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(LHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(RHS))
+            break;
         }
+        Opcode = X86ISD::FMAX;
         break;
-
-      case ISD::SETOLE:   // (X <= Y) ? Y : X -> max
-      case ISD::SETLE:
-        if (!UnsafeFPMath) break;
-        // FALL THROUGH.
-      case ISD::SETOLT:   // (X olt/lt Y) ? Y : X -> max
+      case ISD::SETOLE:
+        // This can be a max if we can prove that at least one of the operands
+        // is not a nan.
+        if (!FiniteOnlyFPMath()) {
+          if (DAG.isKnownNeverNaN(RHS)) {
+            // Put the potential NaN in the RHS so that SSE will preserve it.
+            std::swap(LHS, RHS);
+          } else if (!DAG.isKnownNeverNaN(LHS))
+            break;
+        }
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETULE:
+        // This can be a max, but if either operand is a NaN we need it to
+        // preserve the original LHS.
+        std::swap(LHS, RHS);
+      case ISD::SETOLT:
       case ISD::SETLT:
+      case ISD::SETLE:
         Opcode = X86ISD::FMAX;
         break;
       }
diff --git a/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
index bda50ccd58e..fe40758d8ec 100644
--- a/test/CodeGen/X86/scalar-min-max-fill-operand.ll
+++ b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
@@ -4,17 +4,17 @@
 
 declare float @bar()
 
-define float @foo(float %a)
+define float @foo(float %a) nounwind
 {
   %s = call float @bar()
   %t = fcmp olt float %s, %a
   %u = select i1 %t, float %s, float %a
   ret float %u
 }
-define float @hem(float %a)
+define float @hem(float %a) nounwind
 {
   %s = call float @bar()
-  %t = fcmp uge float %s, %a
+  %t = fcmp ogt float %s, %a
   %u = select i1 %t, float %s, float %a
   ret float %u
 }
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 99528342536..17ffb5e464a 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,4 +1,323 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+
+; Some of these patterns can be matched as SSE min or max. Some of
+; then can be matched provided that the operands are swapped.
+; Some of them can't be matched at all and require a comparison
+; and a conditional branch.
+
+; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse}
+; x_ : use 0.0 instead of %y
+; _inverse : swap the arms of the select.
+
+; CHECK:      ogt:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      olt:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ogt_inverse:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      olt_inverse:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt_inverse(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      oge:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ole:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      oge_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge_inverse(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ole_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole_inverse(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ogt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_olt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ogt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt_inverse(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_olt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt_inverse(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_oge:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ole:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_oge_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge_inverse(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ole_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole_inverse(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      ugt:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ult:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ugt_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ult_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult_inverse(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      uge:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ule:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      uge_inverse:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge_inverse(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ule_inverse:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule_inverse(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ugt:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ult:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ugt_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt_inverse(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ult_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult_inverse(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_uge:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ule:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_uge_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge_inverse(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ule_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule_inverse(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; Test a few more misc. cases.
 
 ; CHECK: clampTo3k_a:
 ; CHECK: minsd
-- 
2.34.1