From 78fc2103c975e99e289cce4966d5b46ae5c01a32 Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Wed, 6 May 2015 11:44:10 +0000 Subject: [PATCH] [ARM] generate VMAXNM/VMINNM for a compare followed by a select, in safe math mode too git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236590 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 125 ++++++++++--- test/CodeGen/ARM/vminmaxnm.ll | 285 +++++++++++++++++++++++++++++ 2 files changed, 385 insertions(+), 25 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index bdcf86cfed6..6db9ffe9bf0 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3377,12 +3377,6 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SelectTrue, SelectFalse, ISD::SETNE); } -static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { - if (CC == ISD::SETNE) - return ISD::SETEQ; - return ISD::getSetCCInverse(CC, true); -} - static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps) { // Start by selecting the GE condition code for opcodes that return true for @@ -3495,7 +3489,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ARMCC::CondCodes CondCode = IntCCToARMCC(CC); if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || CondCode == ARMCC::VC || CondCode == ARMCC::NE) { - CC = getInverseCCForVSEL(CC); + CC = ISD::getSetCCInverse(CC, true); std::swap(TrueVal, FalseVal); } } @@ -3517,27 +3511,108 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // c = fcmp [?gt, ?ge, ?lt, ?le] a, b // select c, a, b // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'. - // We only do this transformation in UnsafeFPMath and for no-NaNs - // comparisons, because signed zeros and NaNs are handled differently than - // the original code sequence. - // FIXME: There are more cases that can be transformed even with NaNs, - // signed zeroes and safe math. E.g. in the following, the result will be - // FalseVal if a is a NaN or -0./0. and that's what vmaxnm will give, too. - // c = fcmp ogt, a, 0. ; select c, a, 0. => vmaxnm a, 0. // FIXME: There is similar code that allows some extensions in // AArch64TargetLowering::LowerSELECT_CC that should be shared with this // code. - if (getTargetMachine().Options.UnsafeFPMath) { - if (LHS == TrueVal && RHS == FalseVal) { - if (CC == ISD::SETGT || CC == ISD::SETGE) - return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETLT || CC == ISD::SETLE) - return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); - } else if (LHS == FalseVal && RHS == TrueVal) { - if (CC == ISD::SETLT || CC == ISD::SETLE) - return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETGT || CC == ISD::SETGE) - return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + bool swapSides = false; + if (!getTargetMachine().Options.NoNaNsFPMath) { + // transformability may depend on which way around we compare + switch (CC) { + default: + break; + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + // the non-NaN should be RHS + swapSides = DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS); + break; + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + // the non-NaN should be LHS + swapSides = DAG.isKnownNeverNaN(RHS) && !DAG.isKnownNeverNaN(LHS); + break; + } + } + swapSides = swapSides || (LHS == FalseVal && RHS == TrueVal); + if (swapSides) { + CC = ISD::getSetCCSwappedOperands(CC); + std::swap(LHS, RHS); + } + if (LHS == TrueVal && RHS == FalseVal) { + bool canTransform = true; + // FIXME: FastMathFlags::noSignedZeros() doesn't appear reachable from here + if (!getTargetMachine().Options.UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { + const ConstantFPSDNode *Zero; + switch (CC) { + default: + break; + case ISD::SETOGT: + case ISD::SETUGT: + case ISD::SETGT: + // RHS must not be -0 + canTransform = (Zero = dyn_cast(RHS)) && + !Zero->isNegative(); + break; + case ISD::SETOGE: + case ISD::SETUGE: + case ISD::SETGE: + // LHS must not be -0 + canTransform = (Zero = dyn_cast(LHS)) && + !Zero->isNegative(); + break; + case ISD::SETOLT: + case ISD::SETULT: + case ISD::SETLT: + // RHS must not be +0 + canTransform = (Zero = dyn_cast(RHS)) && + Zero->isNegative(); + break; + case ISD::SETOLE: + case ISD::SETULE: + case ISD::SETLE: + // LHS must not be +0 + canTransform = (Zero = dyn_cast(LHS)) && + Zero->isNegative(); + break; + } + } + if (canTransform) { + // Note: If one of the elements in a pair is a number and the other + // element is NaN, the corresponding result element is the number. + // This is consistent with the IEEE 754-2008 standard. + // Therefore, a > b ? a : b <=> vmax(a,b), if b is constant and a is NaN + switch (CC) { + default: + break; + case ISD::SETOGT: + case ISD::SETOGE: + if (!DAG.isKnownNeverNaN(RHS)) + break; + return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS); + case ISD::SETUGT: + case ISD::SETUGE: + if (!DAG.isKnownNeverNaN(LHS)) + break; + case ISD::SETGT: + case ISD::SETGE: + return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS); + case ISD::SETOLT: + case ISD::SETOLE: + if (!DAG.isKnownNeverNaN(RHS)) + break; + return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS); + case ISD::SETULT: + case ISD::SETULE: + if (!DAG.isKnownNeverNaN(LHS)) + break; + case ISD::SETLT: + case ISD::SETLE: + return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS); + } } } diff --git a/test/CodeGen/ARM/vminmaxnm.ll b/test/CodeGen/ARM/vminmaxnm.ll index 2e2648d11f7..3632ffd0021 100644 --- a/test/CodeGen/ARM/vminmaxnm.ll +++ b/test/CodeGen/ARM/vminmaxnm.ll @@ -218,6 +218,291 @@ define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) { ret double %cond } +; known non-NaNs + +define float @fp-armv8_vminnm_NNNo(float %a) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNo": +; CHECK-FAST: vminnm.f32 +; CHECK-FAST: vminnm.f32 +; CHECK-LABEL: "fp-armv8_vminnm_NNNo": +; CHECK: vminnm.f32 +; CHECK-NOT: vminnm.f32 + %cmp1 = fcmp olt float %a, 12. + %cond1 = select i1 %cmp1, float %a, float 12. + %cmp2 = fcmp olt float 34., %cond1 + %cond2 = select i1 %cmp2, float 34., float %cond1 + ret float %cond2 +} + +define double @fp-armv8_vminnm_NNNole(double %a) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNole": +; CHECK-FAST: vminnm.f64 +; CHECK-FAST: vminnm.f64 +; CHECK-LABEL: "fp-armv8_vminnm_NNNole": +; CHECK: vminnm.f64 +; CHECK-NOT: vminnm.f64 + %cmp1 = fcmp ole double %a, 34. + %cond1 = select i1 %cmp1, double %a, double 34. + %cmp2 = fcmp ole double 56., %cond1 + %cond2 = select i1 %cmp2, double 56., double %cond1 + ret double %cond2 +} + +define float @fp-armv8_vminnm_NNNo_rev(float %a) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNo_rev": +; CHECK-FAST: vminnm.f32 +; CHECK-FAST: vminnm.f32 +; CHECK-LABEL: "fp-armv8_vminnm_NNNo_rev": +; CHECK: vminnm.f32 +; CHECK-NOT: vminnm.f32 + %cmp1 = fcmp ogt float %a, 56. + %cond1 = select i1 %cmp1, float 56., float %a + %cmp2 = fcmp ogt float 78., %cond1 + %cond2 = select i1 %cmp2, float %cond1, float 78. + ret float %cond2 +} + +define double @fp-armv8_vminnm_NNNoge_rev(double %a) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNoge_rev": +; CHECK-FAST: vminnm.f64 +; CHECK-FAST: vminnm.f64 +; CHECK-LABEL: "fp-armv8_vminnm_NNNoge_rev": +; CHECK: vminnm.f64 +; CHECK-NOT: vminnm.f64 + %cmp1 = fcmp oge double %a, 78. + %cond1 = select i1 %cmp1, double 78., double %a + %cmp2 = fcmp oge double 90., %cond1 + %cond2 = select i1 %cmp2, double %cond1, double 90. + ret double %cond2 +} + +define float @fp-armv8_vminnm_NNNu(float %b) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNu": +; CHECK-FAST: vminnm.f32 +; CHECK-FAST: vminnm.f32 +; CHECK-LABEL: "fp-armv8_vminnm_NNNu": +; CHECK: vminnm.f32 +; CHECK-NOT: vminnm.f32 + %cmp1 = fcmp ult float 12., %b + %cond1 = select i1 %cmp1, float 12., float %b + %cmp2 = fcmp ult float %cond1, 34. + %cond2 = select i1 %cmp2, float %cond1, float 34. + ret float %cond2 +} + +define float @fp-armv8_vminnm_NNNule(float %b) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNule": +; CHECK-FAST: vminnm.f32 +; CHECK-FAST: vminnm.f32 +; CHECK-LABEL: "fp-armv8_vminnm_NNNule": +; CHECK: vminnm.f32 +; CHECK-NOT: vminnm.f32 + %cmp1 = fcmp ule float 34., %b + %cond1 = select i1 %cmp1, float 34., float %b + %cmp2 = fcmp ule float %cond1, 56. + %cond2 = select i1 %cmp2, float %cond1, float 56. + ret float %cond2 +} + +define float @fp-armv8_vminnm_NNNu_rev(float %b) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNu_rev": +; CHECK-FAST: vminnm.f32 +; CHECK-FAST: vminnm.f32 +; CHECK-LABEL: "fp-armv8_vminnm_NNNu_rev": +; CHECK: vminnm.f32 +; CHECK-NOT: vminnm.f32 + %cmp1 = fcmp ugt float 56., %b + %cond1 = select i1 %cmp1, float %b, float 56. + %cmp2 = fcmp ugt float %cond1, 78. + %cond2 = select i1 %cmp2, float 78., float %cond1 + ret float %cond2 +} + +define double @fp-armv8_vminnm_NNNuge_rev(double %b) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNuge_rev": +; CHECK-FAST: vminnm.f64 +; CHECK-FAST: vminnm.f64 +; CHECK-LABEL: "fp-armv8_vminnm_NNNuge_rev": +; CHECK: vminnm.f64 +; CHECK-NOT: vminnm.f64 + %cmp1 = fcmp uge double 78., %b + %cond1 = select i1 %cmp1, double %b, double 78. + %cmp2 = fcmp uge double %cond1, 90. + %cond2 = select i1 %cmp2, double 90., double %cond1 + ret double %cond2 +} + +define float @fp-armv8_vmaxnm_NNNo(float %a) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNo": +; CHECK-FAST: vmaxnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo": +; CHECK: vmaxnm.f32 +; CHECK-NOT: vmaxnm.f32 + %cmp1 = fcmp ogt float %a, 12. + %cond1 = select i1 %cmp1, float %a, float 12. + %cmp2 = fcmp ogt float 34., %cond1 + %cond2 = select i1 %cmp2, float 34., float %cond1 + ret float %cond2 +} + +define float @fp-armv8_vmaxnm_NNNoge(float %a) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNoge": +; CHECK-FAST: vmaxnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vmaxnm_NNNoge": +; CHECK: vmaxnm.f32 +; CHECK-NOT: vmaxnm.f32 + %cmp1 = fcmp oge float %a, 34. + %cond1 = select i1 %cmp1, float %a, float 34. + %cmp2 = fcmp oge float 56., %cond1 + %cond2 = select i1 %cmp2, float 56., float %cond1 + ret float %cond2 +} + +define float @fp-armv8_vmaxnm_NNNo_rev(float %a) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNo_rev": +; CHECK-FAST: vmaxnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo_rev": +; CHECK: vmaxnm.f32 +; CHECK-NOT: vmaxnm.f32 + %cmp1 = fcmp olt float %a, 56. + %cond1 = select i1 %cmp1, float 56., float %a + %cmp2 = fcmp olt float 78., %cond1 + %cond2 = select i1 %cmp2, float %cond1, float 78. + ret float %cond2 +} + +define float @fp-armv8_vmaxnm_NNNole_rev(float %a) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNole_rev": +; CHECK-FAST: vmaxnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vmaxnm_NNNole_rev": +; CHECK: vmaxnm.f32 +; CHECK-NOT: vmaxnm.f32 + %cmp1 = fcmp ole float %a, 78. + %cond1 = select i1 %cmp1, float 78., float %a + %cmp2 = fcmp ole float 90., %cond1 + %cond2 = select i1 %cmp2, float %cond1, float 90. + ret float %cond2 +} + +define float @fp-armv8_vmaxnm_NNNu(float %b) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNu": +; CHECK-FAST: vmaxnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu": +; CHECK: vmaxnm.f32 +; CHEC-NOT: vmaxnm.f32 + %cmp1 = fcmp ugt float 12., %b + %cond1 = select i1 %cmp1, float 12., float %b + %cmp2 = fcmp ugt float %cond1, 34. + %cond2 = select i1 %cmp2, float %cond1, float 34. + ret float %cond2 +} + +define float @fp-armv8_vmaxnm_NNNuge(float %b) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNuge": +; CHECK-FAST: vmaxnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vmaxnm_NNNuge": +; CHECK: vmaxnm.f32 +; CHECK-NOT: vmaxnm.f32 + %cmp1 = fcmp uge float 34., %b + %cond1 = select i1 %cmp1, float 34., float %b + %cmp2 = fcmp uge float %cond1, 56. + %cond2 = select i1 %cmp2, float %cond1, float 56. + ret float %cond2 +} + +define float @fp-armv8_vmaxnm_NNNu_rev(float %b) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNu_rev": +; CHECK-FAST: vmaxnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu_rev": +; CHECK: vmaxnm.f32 +; CHECK-NOT: vmaxnm.f32 + %cmp1 = fcmp ult float 56., %b + %cond1 = select i1 %cmp1, float %b, float 56. + %cmp2 = fcmp ult float %cond1, 78. + %cond2 = select i1 %cmp2, float 78., float %cond1 + ret float %cond2 +} + +define double @fp-armv8_vmaxnm_NNNule_rev( double %b) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNule_rev": +; CHECK-FAST: vmaxnm.f64 +; CHECK-FAST: vmaxnm.f64 +; CHECK-LABEL: "fp-armv8_vmaxnm_NNNule_rev": +; CHECK: vmaxnm.f64 +; CHECK-NOT: vmaxnm.f64 + %cmp1 = fcmp ule double 78., %b + %cond1 = select i1 %cmp1, double %b, double 78. + %cmp2 = fcmp ule double %cond1, 90. + %cond2 = select i1 %cmp2, double 90., double %cond1 + ret double %cond2 +} + +define float @fp-armv8_vminmaxnm_0(float %a) { +; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_0": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vminnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vminmaxnm_0": +; CHECK-NOT: vminnm.f32 +; CHECK: vmaxnm.f32 + %cmp1 = fcmp olt float %a, 0. + %cond1 = select i1 %cmp1, float %a, float 0. + %cmp2 = fcmp ogt float %cond1, 0. + %cond2 = select i1 %cmp2, float %cond1, float 0. + ret float %cond2 +} + +define float @fp-armv8_vminmaxnm_neg0(float %a) { +; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_neg0": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vminnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vminmaxnm_neg0": +; CHECK: vminnm.f32 +; CHECK-NOT: vmaxnm.f32 + %cmp1 = fcmp olt float %a, -0. + %cond1 = select i1 %cmp1, float %a, float -0. + %cmp2 = fcmp ogt float %cond1, -0. + %cond2 = select i1 %cmp2, float %cond1, float -0. + ret float %cond2 +} + +define float @fp-armv8_vminmaxnm_e_0(float %a) { +; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_e_0": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vminnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vminmaxnm_e_0": +; CHECK-NOT: vminnm.f32 +; CHECK: vmaxnm.f32 + %cmp1 = fcmp ule float 0., %a + %cond1 = select i1 %cmp1, float 0., float %a + %cmp2 = fcmp uge float 0., %cond1 + %cond2 = select i1 %cmp2, float 0., float %cond1 + ret float %cond2 +} + +define float @fp-armv8_vminmaxnm_e_neg0(float %a) { +; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_e_neg0": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vminnm.f32 +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vminmaxnm_e_neg0": +; CHECK: vminnm.f32 +; CHECK-NOT: vmaxnm.f32 + %cmp1 = fcmp ule float -0., %a + %cond1 = select i1 %cmp1, float -0., float %a + %cmp2 = fcmp uge float -0., %cond1 + %cond2 = select i1 %cmp2, float -0., float %cond1 + ret float %cond2 +} declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone -- 2.34.1