From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 6 Jan 2015 23:00:46 +0000 (+0000)
Subject: R600/SI: Add combine for isinfinite pattern
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=6a72b20325d01c27cb6e730cc8f080a8bea2cd4f;p=oota-llvm.git

R600/SI: Add combine for isinfinite pattern

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225310 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 0690792fb72..a211504a781 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -218,6 +218,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   setTargetDAGCombine(ISD::FMAXNUM);
   setTargetDAGCombine(ISD::SELECT_CC);
   setTargetDAGCombine(ISD::SETCC);
+  setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
   setTargetDAGCombine(ISD::UINT_TO_FP);
 
@@ -1302,6 +1303,59 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
   return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
 }
 
+SDValue SITargetLowering::performAndCombine(SDNode *N,
+                                            DAGCombinerInfo &DCI) const {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+
+  // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
+  // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity)
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  if (LHS.getOpcode() == ISD::SETCC &&
+      RHS.getOpcode() == ISD::SETCC) {
+    ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+    ISD::CondCode RCC = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
+
+    SDValue X = LHS.getOperand(0);
+    SDValue Y = RHS.getOperand(0);
+    if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X)
+      return SDValue();
+
+    if (LCC == ISD::SETO) {
+      if (X != LHS.getOperand(1))
+        return SDValue();
+
+      if (RCC == ISD::SETUNE) {
+        const ConstantFPSDNode *C1 = dyn_cast<ConstantFPSDNode>(RHS.getOperand(1));
+        if (!C1 || !C1->isInfinity() || C1->isNegative())
+          return SDValue();
+
+        const uint32_t Mask = SIInstrFlags::N_NORMAL |
+                              SIInstrFlags::N_SUBNORMAL |
+                              SIInstrFlags::N_ZERO |
+                              SIInstrFlags::P_ZERO |
+                              SIInstrFlags::P_SUBNORMAL |
+                              SIInstrFlags::P_NORMAL;
+
+        static_assert(((~(SIInstrFlags::S_NAN |
+                          SIInstrFlags::Q_NAN |
+                          SIInstrFlags::N_INFINITY |
+                          SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask,
+                      "mask not equal");
+
+        return DAG.getNode(AMDGPUISD::FP_CLASS, SDLoc(N), MVT::i1,
+                           X, DAG.getConstant(Mask, MVT::i32));
+      }
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue SITargetLowering::performOrCombine(SDNode *N,
                                            DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -1607,6 +1661,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
     }
     break;
   }
+  case ISD::AND:
+    return performAndCombine(N, DCI);
   case ISD::OR:
     return performOrCombine(N, DCI);
   case AMDGPUISD::FP_CLASS:
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 44b25dcd844..8b41245a15c 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -58,6 +58,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
   SDValue performSHLPtrCombine(SDNode *N,
                                unsigned AS,
                                DAGCombinerInfo &DCI) const;
+  SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
diff --git a/test/CodeGen/R600/fp-classify.ll b/test/CodeGen/R600/fp-classify.ll
index e6ca5efd767..a1b2f08edde 100644
--- a/test/CodeGen/R600/fp-classify.ll
+++ b/test/CodeGen/R600/fp-classify.ll
@@ -41,5 +41,90 @@ define void @test_not_isinf_pattern_1(i32 addrspace(1)* nocapture %out, float %x
   ret void
 }
 
+; SI-LABEL: {{^}}test_isfinite_pattern_0:
+; SI-NOT: v_cmp
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1f8{{$}}
+; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]]
+; SI-NOT: v_cmp
+; SI: s_endpgm
+define void @test_isfinite_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %ord = fcmp ord float %x, 0.000000e+00
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; Use negative infinity
+; SI-LABEL: {{^}}test_isfinite_not_pattern_0:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %ord = fcmp ord float %x, 0.000000e+00
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %ninf = fcmp une float %x.fabs, 0xFFF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; No fabs
+; SI-LABEL: {{^}}test_isfinite_not_pattern_1:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %ord = fcmp ord float %x, 0.000000e+00
+  %ninf = fcmp une float %x, 0x7FF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; fabs of different value
+; SI-LABEL: {{^}}test_isfinite_not_pattern_2:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_2(i32 addrspace(1)* nocapture %out, float %x, float %y) #0 {
+  %ord = fcmp ord float %x, 0.000000e+00
+  %x.fabs = tail call float @llvm.fabs.f32(float %y) #1
+  %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; Wrong ordered compare type
+; SI-LABEL: {{^}}test_isfinite_not_pattern_3:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_3(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %ord = fcmp uno float %x, 0.000000e+00
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; Wrong unordered compare
+; SI-LABEL: {{^}}test_isfinite_not_pattern_4:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_4(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %ord = fcmp ord float %x, 0.000000e+00
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }