From 6548096a2e2b34e685680e6e1055b8e407c2c243 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 30 Apr 2013 20:43:52 +0000 Subject: [PATCH] InstCombine: Fold more shuffles of shuffles. Always fold a shuffle-of-shuffle into a single shuffle when there's only one input vector in the first place. Continue to be more conservative when there's multiple inputs. rdar://13402653 PR15866 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180802 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineVectorOps.cpp | 17 ++++++++++++----- test/Transforms/BBVectorize/simple.ll | 9 ++++----- test/Transforms/InstCombine/vec_shuffle.ll | 12 ++++++------ 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index de8a3acdbd8..56243059a61 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -614,11 +614,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // we are absolutely afraid of producing a shuffle mask not in the input // program, because the code gen may not be smart enough to turn a merged // shuffle into two specific shuffles: it may produce worse code. As such, - // we only merge two shuffles if the result is either a splat or one of the - // input shuffle masks. In this case, merging the shuffles just removes - // one instruction, which we know is safe. This is good for things like + // we only merge two shuffles if the result is a splat, one of the input + // input shuffle masks, or if there's only one input to the shuffle. + // In this case, merging the shuffles just removes one instruction, which + // we know is safe. This is good for things like // turning: (splat(splat)) -> splat, or // merge(V[0..n], V[n+1..2n]) -> V[0..2n] + // + // FIXME: This is almost certainly far, far too conservative. We should + // have a better model. Perhaps a TargetTransformInfo hook to ask whether + // a shuffle is considered OK? ShuffleVectorInst* LHSShuffle = dyn_cast(LHS); ShuffleVectorInst* RHSShuffle = dyn_cast(RHS); if (LHSShuffle) @@ -743,8 +748,10 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { } // If the result mask is equal to one of the original shuffle masks, - // or is a splat, do the replacement. - if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { + // or is a splat, do the replacement. Similarly, if there is only one + // input vector, go ahead and do the folding. + if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask || + isa(RHS)) { SmallVector Elts; Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); for (unsigned i = 0, e = newMask.size(); i != e; ++i) { diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll index a447908d16c..a22ad15940c 100644 --- a/test/Transforms/BBVectorize/simple.ll +++ b/test/Transforms/BBVectorize/simple.ll @@ -139,11 +139,10 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) { ; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1 %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> -; CHECK: %Q1.v.i1 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <16 x i32> -; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> - %R = mul <8 x i8> %Q1, %Q2 -; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> -; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> + %R = mul <8 x i8> %Q1, %Q2 +; CHECK: %Q1.v.i1 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <16 x i32> +; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <8 x i32> +; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <8 x i32> ; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2 ret <8 x i8> %R ; CHECK: ret <8 x i8> %R diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll index 37d4d56e913..37c19f1558b 100644 --- a/test/Transforms/InstCombine/vec_shuffle.ll +++ b/test/Transforms/InstCombine/vec_shuffle.ll @@ -86,14 +86,14 @@ define <4 x i8> @test9(<16 x i8> %tmp6) nounwind { } ; Same as test9, but make sure that "undef" mask values are not confused with -; mask values of 2*N, where N is the mask length. These shuffles should not -; be folded (because [8,9,4,8] may not be a mask supported by the target). -define <4 x i8> @test9a(<16 x i8> %tmp6) nounwind { +; mask values of 2*N, where N is the mask length of the result. Make sure when +; folding these shuffles that 'undef' mask values stay that way in the result +; instead of getting mapped to the 2*N'th entry of the source. +define <4 x i8> @test9a(<16 x i8> %in, <16 x i8> %in2) nounwind { ; CHECK: @test9a -; CHECK-NEXT: shufflevector -; CHECK-NEXT: shufflevector +; CHECK-NEXT: shufflevector <16 x i8> %in, <16 x i8> %in2, <4 x i32> ; CHECK-NEXT: ret - %tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> < i32 undef, i32 9, i32 4, i32 8 > ; <<4 x i8>> [#uses=1] + %tmp7 = shufflevector <16 x i8> %in, <16 x i8> %in2, <4 x i32> < i32 undef, i32 9, i32 4, i32 16 > ; <<4 x i8>> [#uses=1] %tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 > ; <<4 x i8>> [#uses=1] ret <4 x i8> %tmp9 } -- 2.34.1