From 1596373671e4df54e53e79dc613545d5cf9d83bb Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 26 Jun 2012 08:04:10 +0000 Subject: [PATCH] Shuffle optimization for AVX/AVX2. The current patch optimizes frequently used shuffle patterns and gives these instruction sequence reduction. Before: vshufps $-35, %xmm1, %xmm0, %xmm2 ## xmm2 = xmm0[1,3],xmm1[1,3] vpermilps $-40, %xmm2, %xmm2 ## xmm2 = xmm2[0,2,1,3] vextractf128 $1, %ymm1, %xmm1 vextractf128 $1, %ymm0, %xmm0 vshufps $-35, %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[1,3],xmm1[1,3] vpermilps $-40, %xmm0, %xmm0 ## xmm0 = xmm0[0,2,1,3] vinsertf128 $1, %xmm0, %ymm2, %ymm0 After: vshufps $13, %ymm0, %ymm1, %ymm1 ## ymm1 = ymm1[1,3],ymm0[0,0],ymm1[5,7],ymm0[4,4] vshufps $13, %ymm0, %ymm0, %ymm0 ## ymm0 = ymm0[1,3,0,0,5,7,4,4] vunpcklps %ymm1, %ymm0, %ymm0 ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159188 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 52 ++++++++++++++++++++++++++++++ test/CodeGen/X86/avx-shuffle.ll | 21 ++++++++++++ 2 files changed, 73 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ee7a6352732..e84ec2959cf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3496,6 +3496,53 @@ static bool isMOVLHPSMask(ArrayRef Mask, EVT VT) { return true; } +// +// Some special combinations that can be optimized. +// +static +SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG) { + EVT VT = SVOp->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + DebugLoc dl = SVOp->getDebugLoc(); + + if (VT != MVT::v8i32 && VT != MVT::v8f32) + return SDValue(); + + ArrayRef Mask = SVOp->getMask(); + + // These are the special masks that may be optimized. + static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14}; + static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15}; + bool MatchEvenMask = true; + bool MatchOddMask = true; + for (int i=0; i<8; ++i) { + if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i])) + MatchEvenMask = false; + if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i])) + MatchOddMask = false; + } + static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1}; + static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1}; + + const int *CompactionMask; + if (MatchEvenMask) + CompactionMask = CompactionMaskEven; + else if (MatchOddMask) + CompactionMask = CompactionMaskOdd; + else + return SDValue(); + + SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT); + + SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0), + UndefNode, CompactionMask); + SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1), + UndefNode, CompactionMask); + static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13}; + return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask); +} + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(ArrayRef Mask, EVT VT, @@ -5982,6 +6029,11 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, /// which could not be matched by any known target speficic shuffle static SDValue LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + + SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG); + if (NewOp.getNode()) + return NewOp; + EVT VT = SVOp->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index a88b0b9cadf..9b41709a3b1 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -227,3 +227,24 @@ define <8 x float> @test17(<4 x float> %y) { %x = shufflevector <4 x float> %y, <4 x float> undef, <8 x i32> ret <8 x float> %x } + +; CHECK: test18 +; CHECK: vshufps +; CHECK: vshufps +; CHECK: vunpcklps +; CHECK: ret +define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind { + %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> + ret <8 x float>%S +} + +; CHECK: test19 +; CHECK: vshufps +; CHECK: vshufps +; CHECK: vunpcklps +; CHECK: ret +define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { + %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> + ret <8 x float>%S +} + -- 2.34.1