From 99f78061e05833e815cb7a27e6c17eadcd028ce2 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 25 Dec 2012 13:09:08 +0000 Subject: [PATCH] X86: Shave off one shuffle from the pcmpeqq sequence for SSE2 by making use of and commutativity. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171064 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 ++++------ test/CodeGen/X86/vec_compare.ll | 6 ++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a173712b5bc..ad86c99fd26 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9173,7 +9173,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { return SDValue(); if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with - // pcmpeqd + 2 shuffles + pand. + // pcmpeqd + pshufd + pand. assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); // First cast everything to the right type, @@ -9184,11 +9184,9 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); // Make sure the lower and upper halves are both all-ones. - const int Mask1[] = { 0, 0, 2, 2 }; - SDValue S1 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask1); - const int Mask2[] = { 1, 1, 3, 3 }; - SDValue S2 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask2); - Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, S1, S2); + const int Mask[] = { 1, 0, 3, 2 }; + SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask); + Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf); if (Invert) Result = DAG.getNOT(dl, Result, MVT::v4i32); diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll index cf86c737c73..b6d91a3f770 100644 --- a/test/CodeGen/X86/vec_compare.ll +++ b/test/CodeGen/X86/vec_compare.ll @@ -45,8 +45,7 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind { define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK: test5: ; CHECK: pcmpeqd -; CHECK: pshufd $-11 -; CHECK: pshufd $-96 +; CHECK: pshufd $-79 ; CHECK: pand ; CHECK: ret %C = icmp eq <2 x i64> %A, %B @@ -57,8 +56,7 @@ define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind { define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK: test6: ; CHECK: pcmpeqd -; CHECK: pshufd $-11 -; CHECK: pshufd $-96 +; CHECK: pshufd $-79 ; CHECK: pand ; CHECK: pcmpeqd ; CHECK: pxor -- 2.34.1