From d412c608fcaee75a08160bcf4646257af9f82466 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Wed, 7 Jan 2015 17:33:03 +0000 Subject: [PATCH] [X86] Teach FCOPYSIGN lowering to recognize constant magnitudes. For code like: float foo(float x) { return copysign(1.0, x); } We used to generate: andps <-0.000000e+00,0,0,0>, %xmm0 movss <1.000000e+00>, %xmm1 andps , %xmm1 orps %xmm0, %xmm1 Basically doing an abs(1.0f) in the two middle instructions. We now generate: andps <-0.000000e+00,0,0,0>, %xmm0 orps <1.000000e+00,0,0,0>, %xmm0 Builds on cleanups r223415, r223542. rdar://19049548 Differential Revision: http://reviews.llvm.org/D6555 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225357 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 25 ++++- .../X86/copysign-constant-magnitude.ll | 105 ++++++++++++++++++ test/CodeGen/X86/copysign-zero.ll | 14 --- 3 files changed, 124 insertions(+), 20 deletions(-) create mode 100644 test/CodeGen/X86/copysign-constant-magnitude.ll delete mode 100644 test/CodeGen/X86/copysign-zero.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 98935890d36..57fedb8f387 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14530,14 +14530,27 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); // Next, clear the sign bit from the first operand (magnitude). - CV[0] = ConstantFP::get( - *Context, APFloat(Sem, APInt::getLowBitsSet(SizeInBits, SizeInBits - 1))); + // If it's a constant, we can clear it here. + if (ConstantFPSDNode *Op0CN = dyn_cast(Op0)) { + APFloat APF = Op0CN->getValueAPF(); + // If the magnitude is a positive zero, the sign bit alone is enough. + if (APF.isPosZero()) + return SignBit; + APF.clearSign(); + CV[0] = ConstantFP::get(*Context, APF); + } else { + CV[0] = ConstantFP::get( + *Context, + APFloat(Sem, APInt::getLowBitsSet(SizeInBits, SizeInBits - 1))); + } C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16); - SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); - SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); + SDValue Val = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + false, false, false, 16); + // If the magnitude operand wasn't a constant, we need to AND out the sign. + if (!isa(Op0)) + Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Val); // OR the magnitude value with the sign bit. return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit); diff --git a/test/CodeGen/X86/copysign-constant-magnitude.ll b/test/CodeGen/X86/copysign-constant-magnitude.ll new file mode 100644 index 00000000000..537d6298ddf --- /dev/null +++ b/test/CodeGen/X86/copysign-constant-magnitude.ll @@ -0,0 +1,105 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +define void @test_copysign_const_magnitude_d(double %X) { +; CHECK: [[SIGNMASK:L.+]]: +; CHECK-NEXT: .quad -9223372036854775808 ## double -0.000000e+00 +; CHECK-NEXT: .quad 0 ## double 0.000000e+00 +; CHECK: [[ZERO:L.+]]: +; CHECK-NEXT: .space 16 +; CHECK: [[ONE:L.+]]: +; CHECK-NEXT: .quad 4607182418800017408 ## double 1.000000e+00 +; CHECK-NEXT: .quad 0 ## double 0.000000e+00 +; CHECK-LABEL: test_copysign_const_magnitude_d: + +; CHECK: id + %iX = call double @id_d(double %X) + +; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0 + %d0 = call double @copysign(double 0.000000e+00, double %iX) + +; CHECK-NEXT: id + %id0 = call double @id_d(double %d0) + +; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0 +; CHECK-NEXT: orpd [[ZERO]](%rip), %xmm0 + %dn0 = call double @copysign(double -0.000000e+00, double %id0) + +; CHECK-NEXT: id + %idn0 = call double @id_d(double %dn0) + +; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0 +; CHECK-NEXT: orpd [[ONE]](%rip), %xmm0 + %d1 = call double @copysign(double 1.000000e+00, double %idn0) + +; CHECK-NEXT: id + %id1 = call double @id_d(double %d1) + +; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0 +; CHECK-NEXT: orpd [[ONE]](%rip), %xmm0 + %dn1 = call double @copysign(double -1.000000e+00, double %id1) + +; CHECK-NEXT: id + %idn1 = call double @id_d(double %dn1) + +; CHECK: retq + ret void +} + +define void @test_copysign_const_magnitude_f(float %X) { +; CHECK: [[SIGNMASK:L.+]]: +; CHECK-NEXT: .long 2147483648 ## float -0.000000e+00 +; CHECK-NEXT: .long 0 ## float 0.000000e+00 +; CHECK-NEXT: .long 0 ## float 0.000000e+00 +; CHECK-NEXT: .long 0 ## float 0.000000e+00 +; CHECK: [[ZERO:L.+]]: +; CHECK-NEXT: .space 16 +; CHECK: [[ONE:L.+]]: +; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00 +; CHECK-NEXT: .long 0 ## float 0.000000e+00 +; CHECK-NEXT: .long 0 ## float 0.000000e+00 +; CHECK-NEXT: .long 0 ## float 0.000000e+00 +; CHECK-LABEL: test_copysign_const_magnitude_f: + +; CHECK: id + %iX = call float @id_f(float %X) + +; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0 + %d0 = call float @copysignf(float 0.000000e+00, float %iX) + +; CHECK-NEXT: id + %id0 = call float @id_f(float %d0) + +; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0 +; CHECK-NEXT: orps [[ZERO]](%rip), %xmm0 + %dn0 = call float @copysignf(float -0.000000e+00, float %id0) + +; CHECK-NEXT: id + %idn0 = call float @id_f(float %dn0) + +; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0 +; CHECK-NEXT: orps [[ONE]](%rip), %xmm0 + %d1 = call float @copysignf(float 1.000000e+00, float %idn0) + +; CHECK-NEXT: id + %id1 = call float @id_f(float %d1) + +; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0 +; CHECK-NEXT: orps [[ONE]](%rip), %xmm0 + %dn1 = call float @copysignf(float -1.000000e+00, float %id1) + +; CHECK-NEXT: id + %idn1 = call float @id_f(float %dn1) + +; CHECK: retq + ret void +} + +declare double @copysign(double, double) nounwind readnone +declare float @copysignf(float, float) nounwind readnone + +; Dummy identity functions, so we always have xmm0, and prevent optimizations. +declare double @id_d(double) +declare float @id_f(float) diff --git a/test/CodeGen/X86/copysign-zero.ll b/test/CodeGen/X86/copysign-zero.ll deleted file mode 100644 index 47522d80805..00000000000 --- a/test/CodeGen/X86/copysign-zero.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: llc < %s | not grep orpd -; RUN: llc < %s | grep andpd | count 1 - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-apple-darwin8" - -define double @test(double %X) nounwind { -entry: - %tmp2 = tail call double @copysign( double 0.000000e+00, double %X ) nounwind readnone ; [#uses=1] - ret double %tmp2 -} - -declare double @copysign(double, double) nounwind readnone - -- 2.34.1