From 11be760757fc0dd7e562afdd194aba2f4ddae760 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 2 Jul 2014 15:09:44 +0000 Subject: [PATCH] X86: When combining shuffles just remove shuffles that are completely redundant. CombineTo doesn't allow replacing a node with itself so this would crash if the combined shuffle is the same as the input shuffle. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212181 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 +++++++ test/CodeGen/X86/vector-shuffle-combining.ll | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 40a80329df1..f0d8f222104 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18442,6 +18442,13 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V.getOperand(0), getV4X86ShuffleImm8ForMask(Mask, DAG)); + // It is possible that one of the combinable shuffles was completely absorbed + // by the other, just replace it and revisit all users in that case. + if (Old.getNode() == V.getNode()) { + DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo=*/true); + return true; + } + // Replace N with its operand as we're going to combine that shuffle away. DAG.ReplaceAllUsesWith(N, N.getOperand(0)); diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll index 1bc2aee6efe..e60ecb70dec 100644 --- a/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/test/CodeGen/X86/vector-shuffle-combining.ll @@ -66,6 +66,16 @@ define <4 x i32> @combine_pshufd5(<4 x i32> %a) { ret <4 x i32> %d } +define <4 x i32> @combine_pshufd6(<4 x i32> %a) { +; CHECK-SSE2-LABEL: @combine_pshufd6 +; CHECK-SSE2: # BB#0: +; CHECK-SSE2-NEXT: pshufd $0 +; CHECK-SSE2-NEXT: retq + %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0) + %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8) + ret <4 x i32> %c +} + define <8 x i16> @combine_pshuflw1(<8 x i16> %a) { ; CHECK-SSE2-LABEL: @combine_pshuflw1 ; CHECK-SSE2: # BB#0: -- 2.34.1