From: Evan Cheng Date: Fri, 11 Feb 2011 02:28:55 +0000 (+0000) Subject: Fix buggy fcopysign lowering. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=c143dd4f63889ca6b4f656200f43a1fa7bbf1c34;p=oota-llvm.git Fix buggy fcopysign lowering. This define float @foo(float %x, float %y) nounwind readnone { entry: %0 = tail call float @copysignf(float %x, float %y) nounwind readnone ret float %0 } Was compiled to: vmov s0, r1 bic r0, r0, #-2147483648 vmov s1, r0 vcmpe.f32 s0, #0 vmrs apsr_nzcv, fpscr it lt vneglt.f32 s1, s1 vmov r0, s1 bx lr This fails to copy the sign of -0.0f because it's lost during the float to int conversion. Also, it's sub-optimal when the inputs are in GPR registers. Now it uses integer and + or operations when it's profitable. And it's correct! lsrs r1, r1, #31 bfi r0, r1, #31, #1 bx lr rdar://8984306 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@125357 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4afc8c7fac7..fe398602717 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2833,12 +2833,46 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); - SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32); - SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); - SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl); + bool F2IisFast = Subtarget->isCortexA9() || + Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR; + + // Bitcast operand 1 to i32. + if (SrcVT == MVT::f64) + Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), + &Tmp1, 1).getValue(1); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); + + // If float to int conversion isn't going to be super expensive, then simply + // or in the signbit. + if (F2IisFast) { + SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); + SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); + Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); + if (VT == MVT::f32) { + Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); + } + + // f64: Or the high part with signbit and then combine two parts. + Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), + &Tmp0, 1); + SDValue Lo = Tmp0.getValue(0); + SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); + Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); + } + + // Remove the signbit of operand 0. + Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0); + + // If operand 1 signbit is one, then negate operand 0. + SDValue ARMcc; + SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32), + ISD::SETLT, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp); + return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll index a6d741087a8..1050cd26599 100644 --- a/test/CodeGen/ARM/fcopysign.ll +++ b/test/CodeGen/ARM/fcopysign.ll @@ -1,18 +1,45 @@ -; RUN: llc < %s -march=arm | grep bic | count 2 -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \ -; RUN: grep vneg | count 2 +; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT +; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD -define float @test1(float %x, double %y) { - %tmp = fpext float %x to double - %tmp2 = tail call double @copysign( double %tmp, double %y ) - %tmp3 = fptrunc double %tmp2 to float - ret float %tmp3 +; rdar://8984306 +define float @test1(float %x, float %y) nounwind { +entry: +; SOFT: test1: +; SOFT: lsr r1, r1, #31 +; SOFT: bfi r0, r1, #31, #1 + +; HARD: test1: +; HARD: vabs.f32 d0, d0 +; HARD: cmp r0, #0 +; HARD: vneglt.f32 s0, s0 + %0 = tail call float @copysignf(float %x, float %y) nounwind + ret float %0 +} + +define double @test2(double %x, double %y) nounwind { +entry: +; SOFT: test2: +; SOFT: lsr r2, r3, #31 +; SOFT: bfi r1, r2, #31, #1 + +; HARD: test2: +; HARD: vabs.f64 d0, d0 +; HARD: cmp r1, #0 +; HARD: vneglt.f64 d0, d0 + %0 = tail call double @copysign(double %x, double %y) nounwind + ret double %0 } -define double @test2(double %x, float %y) { - %tmp = fpext float %y to double - %tmp2 = tail call double @copysign( double %x, double %tmp ) - ret double %tmp2 +define double @test3(double %x, double %y, double %z) nounwind { +entry: +; SOFT: test3: +; SOFT: vabs.f64 +; SOFT: cmp {{.*}}, #0 +; SOFT: vneglt.f64 + %0 = fmul double %x, %y + %1 = tail call double @copysign(double %0, double %z) nounwind + ret double %1 } -declare double @copysign(double, double) +declare double @copysign(double, double) nounwind +declare float @copysignf(float, float) nounwind