From 798f2849c39701276caad261ce3a66e21c567878 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 23 Sep 2014 22:14:14 +0000 Subject: [PATCH] [x86] Teach the rest of the 'target shuffle' machinery about blends and add VPBLENDD to the InstPrinter's comment generation so we get nice comments everywhere. Now that we have the nice comments, I can see the bug introduced by a silly typo in the commit that enabled VPBLENDD, and have fixed it. Yay tests that are easy to inspect. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218335 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/InstPrinter/X86InstComments.cpp | 24 +++++++++++++++++++ lib/Target/X86/X86ISelLowering.cpp | 7 +++++- test/CodeGen/X86/vector-shuffle-256-v16.ll | 12 +++++----- test/CodeGen/X86/vector-shuffle-256-v32.ll | 12 +++++----- 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 472dbfa62f8..fc2932b181f 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -103,6 +103,30 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; + case X86::VPBLENDDrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPBLENDDrmi: + if(MI->getOperand(MI->getNumOperands()-1).isImm()) + DecodeBLENDMask(MVT::v4i32, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + + case X86::VPBLENDDYrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPBLENDDYrmi: + if(MI->getOperand(MI->getNumOperands()-1).isImm()) + DecodeBLENDMask(MVT::v8i32, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::INSERTPSrr: case X86::VINSERTPSrr: DestName = getRegName(MI->getOperand(0).getReg()); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b7c1c1d8be5..0f0341e8ace 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3539,6 +3539,7 @@ static bool MayFoldIntoStore(SDValue Op) { static bool isTargetShuffle(unsigned Opcode) { switch(Opcode) { default: return false; + case X86ISD::BLENDI: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::PSHUFHW: @@ -5288,6 +5289,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, IsUnary = false; bool IsFakeUnary = false; switch(N->getOpcode()) { + case X86ISD::BLENDI: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeBLENDMask(VT, cast(ImmN)->getZExtValue(), Mask); + break; case X86ISD::SHUFP: ImmN = N->getOperand(N->getNumOperands()-1); DecodeSHUFPMask(VT, cast(ImmN)->getZExtValue(), Mask); @@ -7270,7 +7275,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into // that instruction. if (Subtarget->hasAVX2()) { - int Scale = 8 / VT.getVectorNumElements(); + int Scale = 4 / VT.getVectorNumElements(); BlendMask = 0; for (int i = 0, Size = Mask.size(); i < Size; ++i) if (Mask[i] >= Size) diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll index ea9289e3849..12e84f08607 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -716,9 +716,9 @@ define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_1 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 ; AVX2-NEXT: vpshuflw {{.*}} # xmm3 = xmm3[0,0,0,0,4,5,6,7] -; AVX2-NEXT: vpblendd $-16, %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm3[0,1],xmm2[2,3] ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7] -; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -745,10 +745,10 @@ define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_1 ; AVX2-NEXT: vpshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4] ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 ; AVX2-NEXT: vpshuflw {{.*}} # xmm3 = xmm3[3,2,1,0,4,5,6,7] -; AVX2-NEXT: vpblendd $-16, %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm3[0,1],xmm2[2,3] ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4] ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7] -; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -778,11 +778,11 @@ define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 ; AVX2-NEXT: vpshufd {{.*}} # xmm3 = xmm3[0,1,0,1] ; AVX2-NEXT: vpshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4] -; AVX2-NEXT: vpblendd $-16, %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm2[0,1],xmm3[2,3] ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7] ; AVX2-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4] -; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index 5dec202813f..3a5c89455d5 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -1138,11 +1138,11 @@ define <32 x i8> @shuffle_v32i8_32_32_32_32_32_32_32_32_08_08_10_10_12_12_14_14_ ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX2-NEXT: vpshuflw {{.*}} # xmm4 = xmm4[0,0,0,0,4,5,6,7] -; AVX2-NEXT: vpblendd $-16, %xmm2, %xmm4, %xmm2 +; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3] ; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7] -; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1173,10 +1173,10 @@ define <32 x i8> @shuffle_v32i8_38_38_36_36_34_34_32_32_14_14_12_12_10_10_08_08_ ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4 ; AVX2-NEXT: vmovdqa .LCPI50_1(%rip), %xmm5 ; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4 -; AVX2-NEXT: vpblendd $-16, %xmm2, %xmm4, %xmm2 +; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3] ; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm5, %xmm1, %xmm1 -; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1207,10 +1207,10 @@ define <32 x i8> @shuffle_v32i8_38_38_36_36_34_34_32_32_06_06_04_04_02_02_00_00_ ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4 ; AVX2-NEXT: vmovdqa .LCPI51_1(%rip), %xmm5 ; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4 -; AVX2-NEXT: vpblendd $-16, %xmm2, %xmm4, %xmm2 +; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3] ; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm5, %xmm1, %xmm1 -; AVX2-NEXT: vpblendd $-16, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> -- 2.34.1