From: Matt Arsenault Date: Tue, 6 Jan 2015 23:00:46 +0000 (+0000) Subject: R600/SI: Add combine for isinfinite pattern X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=6a72b20325d01c27cb6e730cc8f080a8bea2cd4f;p=oota-llvm.git R600/SI: Add combine for isinfinite pattern git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225310 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 0690792fb72..a211504a781 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -218,6 +218,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::FMAXNUM); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::UINT_TO_FP); @@ -1302,6 +1303,59 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset); } +SDValue SITargetLowering::performAndCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + if (DCI.isBeforeLegalize()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + + // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) -> + // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity) + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + if (LHS.getOpcode() == ISD::SETCC && + RHS.getOpcode() == ISD::SETCC) { + ISD::CondCode LCC = cast(LHS.getOperand(2))->get(); + ISD::CondCode RCC = cast(RHS.getOperand(2))->get(); + + SDValue X = LHS.getOperand(0); + SDValue Y = RHS.getOperand(0); + if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X) + return SDValue(); + + if (LCC == ISD::SETO) { + if (X != LHS.getOperand(1)) + return SDValue(); + + if (RCC == ISD::SETUNE) { + const ConstantFPSDNode *C1 = dyn_cast(RHS.getOperand(1)); + if (!C1 || !C1->isInfinity() || C1->isNegative()) + return SDValue(); + + const uint32_t Mask = SIInstrFlags::N_NORMAL | + SIInstrFlags::N_SUBNORMAL | + SIInstrFlags::N_ZERO | + SIInstrFlags::P_ZERO | + SIInstrFlags::P_SUBNORMAL | + SIInstrFlags::P_NORMAL; + + static_assert(((~(SIInstrFlags::S_NAN | + SIInstrFlags::Q_NAN | + SIInstrFlags::N_INFINITY | + SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask, + "mask not equal"); + + return DAG.getNode(AMDGPUISD::FP_CLASS, SDLoc(N), MVT::i1, + X, DAG.getConstant(Mask, MVT::i32)); + } + } + } + + return SDValue(); +} + SDValue SITargetLowering::performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1607,6 +1661,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, } break; } + case ISD::AND: + return performAndCombine(N, DCI); case ISD::OR: return performOrCombine(N, DCI); case AMDGPUISD::FP_CLASS: diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 44b25dcd844..8b41245a15c 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -58,6 +58,7 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue performSHLPtrCombine(SDNode *N, unsigned AS, DAGCombinerInfo &DCI) const; + SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/test/CodeGen/R600/fp-classify.ll b/test/CodeGen/R600/fp-classify.ll index e6ca5efd767..a1b2f08edde 100644 --- a/test/CodeGen/R600/fp-classify.ll +++ b/test/CodeGen/R600/fp-classify.ll @@ -41,5 +41,90 @@ define void @test_not_isinf_pattern_1(i32 addrspace(1)* nocapture %out, float %x ret void } +; SI-LABEL: {{^}}test_isfinite_pattern_0: +; SI-NOT: v_cmp +; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1f8{{$}} +; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]] +; SI-NOT: v_cmp +; SI: s_endpgm +define void @test_isfinite_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Use negative infinity +; SI-LABEL: {{^}}test_isfinite_not_pattern_0: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp une float %x.fabs, 0xFFF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; No fabs +; SI-LABEL: {{^}}test_isfinite_not_pattern_1: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %ninf = fcmp une float %x, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; fabs of different value +; SI-LABEL: {{^}}test_isfinite_not_pattern_2: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_2(i32 addrspace(1)* nocapture %out, float %x, float %y) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %y) #1 + %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Wrong ordered compare type +; SI-LABEL: {{^}}test_isfinite_not_pattern_3: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_3(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp uno float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Wrong unordered compare +; SI-LABEL: {{^}}test_isfinite_not_pattern_4: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_4(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp one float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone }