From 9b0157716902f900919d4d1c7f211f9ae36cd5b1 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 27 Oct 2015 01:28:07 +0000 Subject: [PATCH] [x86] replace integer logic ops with packed SSE FP logic ops If we have an operand to a bitwise logic op that's already in an XMM register and the result is going to be sent to an XMM register, then use an SSE logic op to avoid moves between the integer and vector register files. Related commits: http://reviews.llvm.org/rL248395 http://reviews.llvm.org/rL248399 http://reviews.llvm.org/rL248404 http://reviews.llvm.org/rL248409 http://reviews.llvm.org/rL248415 This should solve PR22428: https://llvm.org/bugs/show_bug.cgi?id=22428 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251378 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 28 ++++++++++++++++++++++++++-- test/CodeGen/X86/fp-logic.ll | 30 ++++++++++++------------------ 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6ffc524cdb8..bf702e2788a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -23123,7 +23123,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } -static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -23139,6 +23140,29 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00); } + // Convert a bitcasted integer logic operation that has one bitcasted + // floating-point operand and one constant operand into a floating-point + // logic operation. This may create a load of the constant, but that is + // cheaper than materializing the constant in an integer register and + // transferring it to an SSE register or transferring the SSE operand to + // integer register and back. + unsigned FPOpcode; + switch (N0.getOpcode()) { + case ISD::AND: FPOpcode = X86ISD::FAND; break; + case ISD::OR: FPOpcode = X86ISD::FOR; break; + case ISD::XOR: FPOpcode = X86ISD::FXOR; break; + default: return SDValue(); + } + if (((Subtarget->hasSSE1() && VT == MVT::f32) || + (Subtarget->hasSSE2() && VT == MVT::f64)) && + isa(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::BITCAST && + N0.getOperand(0).getOperand(0).getValueType() == VT) { + SDValue N000 = N0.getOperand(0).getOperand(0); + SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1)); + return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst); + } + return SDValue(); } @@ -26635,7 +26659,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SELECT: case X86ISD::SHRUNKBLEND: return PerformSELECTCombine(N, DAG, DCI, Subtarget); - case ISD::BITCAST: return PerformBITCASTCombine(N, DAG); + case ISD::BITCAST: return PerformBITCASTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); diff --git a/test/CodeGen/X86/fp-logic.ll b/test/CodeGen/X86/fp-logic.ll index 6494d4967c3..64c3f6b79a2 100644 --- a/test/CodeGen/X86/fp-logic.ll +++ b/test/CodeGen/X86/fp-logic.ll @@ -110,9 +110,8 @@ define float @f6(float %x, i32 %y) { define float @f7(float %x) { ; CHECK-LABEL: f7: ; CHECK: # BB#0: -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: andl $3, %eax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq %bc1 = bitcast float %x to i32 @@ -126,9 +125,8 @@ define float @f7(float %x) { define float @f8(float %x) { ; CHECK-LABEL: f8: ; CHECK: # BB#0: -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: andl $4, %eax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq %bc1 = bitcast float %x to i32 @@ -196,9 +194,8 @@ define float @xor(float %x, float %y) { define float @f7_or(float %x) { ; CHECK-LABEL: f7_or: ; CHECK: # BB#0: -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: orl $3, %eax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: orps %xmm1, %xmm0 ; CHECK-NEXT: retq %bc1 = bitcast float %x to i32 @@ -210,9 +207,8 @@ define float @f7_or(float %x) { define float @f7_xor(float %x) { ; CHECK-LABEL: f7_xor: ; CHECK: # BB#0: -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: xorl $3, %eax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: xorps %xmm1, %xmm0 ; CHECK-NEXT: retq %bc1 = bitcast float %x to i32 @@ -239,9 +235,8 @@ define double @doubles(double %x, double %y) { define double @f7_double(double %x) { ; CHECK-LABEL: f7_double: ; CHECK: # BB#0: -; CHECK-NEXT: movd %xmm0, %rax -; CHECK-NEXT: andl $3, %eax -; CHECK-NEXT: movd %rax, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: andpd %xmm1, %xmm0 ; CHECK-NEXT: retq %bc1 = bitcast double %x to i64 @@ -257,9 +252,8 @@ define double @f7_double(double %x) { define float @movmsk(float %x) { ; CHECK-LABEL: movmsk: ; CHECK: # BB#0: -; CHECK-NEXT: movmskps %xmm0, %eax -; CHECK-NEXT: shll $31, %eax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq %bc1 = bitcast float %x to i32 -- 2.34.1