From: Evan Cheng Date: Thu, 1 Mar 2012 23:27:13 +0000 (+0000) Subject: Neuter the optimization I implemented with r107852 and r108258 which turn some X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=fc501a3ec9d97e372ecb1bd9cf32d861da46b2c9;p=oota-llvm.git Neuter the optimization I implemented with r107852 and r108258 which turn some floating point equality comparisons into integer ones with -ffast-math. The issue is the optimization causes +0.0 != -0.0. Now the optimization is only done when one side is known to be 0.0. The other side's sign bit is masked off for the comparison. rdar://10964603 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151861 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 8ad7136879e..0ca97042d29 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2978,12 +2978,11 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); DebugLoc dl = Op.getDebugLoc(); - bool SeenZero = false; - if (canChangeToInt(LHS, SeenZero, Subtarget) && - canChangeToInt(RHS, SeenZero, Subtarget) && - // If one of the operand is zero, it's safe to ignore the NaN case since - // we only care about equality comparisons. - (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { + bool LHSSeenZero = false; + bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); + bool RHSSeenZero = false; + bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); + if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { // If unsafe fp math optimization is enabled and there are no other uses of // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. @@ -2992,10 +2991,13 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { else if (CC == ISD::SETUNE) CC = ISD::SETNE; + SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); SDValue ARMcc; if (LHS.getValueType() == MVT::f32) { - LHS = bitcastf32Toi32(LHS, DAG); - RHS = bitcastf32Toi32(RHS, DAG); + LHS = DAG.getNode(ISD::AND, dl, MVT::i32, + bitcastf32Toi32(LHS, DAG), Mask); + RHS = DAG.getNode(ISD::AND, dl, MVT::i32, + bitcastf32Toi32(RHS, DAG), Mask); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, @@ -3006,6 +3008,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue RHS1, RHS2; expandf64Toi32(LHS, DAG, LHS1, LHS2); expandf64Toi32(RHS, DAG, RHS1, RHS2); + LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); + RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll index ad03202f8eb..bbac92d106f 100644 --- a/test/CodeGen/ARM/fpcmp-opt.ll +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -1,24 +1,16 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s -; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s ; rdar://7461510 +; rdar://10964603 +; Disable this optimization unless we know one of them is zero. define arm_apcscc i32 @t1(float* %a, float* %b) nounwind { entry: -; FINITE: t1: -; FINITE-NOT: vldr -; FINITE: ldr -; FINITE: ldr -; FINITE: cmp r0, r1 -; FINITE-NOT: vcmpe.f32 -; FINITE-NOT: vmrs -; FINITE: beq - -; NAN: t1: -; NAN: vldr s0, -; NAN: vldr s1, -; NAN: vcmpe.f32 s1, s0 -; NAN: vmrs apsr_nzcv, fpscr -; NAN: beq +; CHECK: t1: +; CHECK: vldr s0, +; CHECK: vldr s1, +; CHECK: vcmpe.f32 s1, s0 +; CHECK: vmrs apsr_nzcv, fpscr +; CHECK: beq %0 = load float* %a %1 = load float* %b %2 = fcmp une float %0, %1 @@ -33,17 +25,21 @@ bb2: ret i32 %4 } +; If one side is zero, the other size sign bit is masked off to allow +; +0.0 == -0.0 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind { entry: -; FINITE: t2: -; FINITE-NOT: vldr -; FINITE: ldrd r0, r1, [r0] -; FINITE-NOT: b LBB -; FINITE: cmp r0, #0 -; FINITE: cmpeq r1, #0 -; FINITE-NOT: vcmpe.f32 -; FINITE-NOT: vmrs -; FINITE: bne +; CHECK: t2: +; CHECK-NOT: vldr +; CHECK: ldr [[REG1:(r[0-9]+)]], [r0] +; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4] +; CHECK-NOT: b LBB +; CHECK: cmp [[REG1]], #0 +; CHECK: bfc [[REG2]], #31, #1 +; CHECK: cmpeq [[REG2]], #0 +; CHECK-NOT: vcmpe.f32 +; CHECK-NOT: vmrs +; CHECK: bne %0 = load double* %a %1 = fcmp oeq double %0, 0.000000e+00 br i1 %1, label %bb1, label %bb2 @@ -59,13 +55,14 @@ bb2: define arm_apcscc i32 @t3(float* %a, float* %b) nounwind { entry: -; FINITE: t3: -; FINITE-NOT: vldr -; FINITE: ldr r0, [r0] -; FINITE: cmp r0, #0 -; FINITE-NOT: vcmpe.f32 -; FINITE-NOT: vmrs -; FINITE: bne +; CHECK: t3: +; CHECK-NOT: vldr +; CHECK: ldr [[REG3:(r[0-9]+)]], [r0] +; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648 +; CHECK: tst [[REG3]], [[REG4]] +; CHECK-NOT: vcmpe.f32 +; CHECK-NOT: vmrs +; CHECK: bne %0 = load float* %a %1 = fcmp oeq float %0, 0.000000e+00 br i1 %1, label %bb1, label %bb2