From 5dff2189414a3050580496026a9c0095634f6933 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Nov 2015 23:29:49 +0000 Subject: [PATCH] [X86][AVX512] Added AVX512 SHUFP*/VPERMILP* shuffle decode comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253396 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/InstPrinter/X86InstComments.cpp | 43 +++--- test/CodeGen/X86/avx512-intrinsics.ll | 41 ++++-- test/CodeGen/X86/avx512vl-intrinsics.ll | 29 +++- test/CodeGen/X86/vector-shuffle-128-v2.ll | 56 ++------ test/CodeGen/X86/vector-shuffle-256-v4.ll | 136 +++++------------- 5 files changed, 119 insertions(+), 186 deletions(-) diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 4519fe22ef7..e3f59fb3bfd 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -155,6 +155,21 @@ static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) { CASE_AVX_INS_COMMON(Inst, Y, r##src) \ CASE_SSE_INS_COMMON(Inst, r##src) \ +#define CASE_SHUF(Inst, src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src##i) \ + CASE_MASK_INS_COMMON(Inst, Z256, r##src##i) \ + CASE_MASK_INS_COMMON(Inst, Z128, r##src##i) \ + CASE_AVX_INS_COMMON(Inst, , r##src##i) \ + CASE_AVX_INS_COMMON(Inst, Y, r##src##i) \ + CASE_SSE_INS_COMMON(Inst, r##src##i) \ + +#define CASE_VPERM(Inst, src) \ + CASE_MASK_INS_COMMON(Inst, Z, src##i) \ + CASE_MASK_INS_COMMON(Inst, Z256, src##i) \ + CASE_MASK_INS_COMMON(Inst, Z128, src##i) \ + CASE_AVX_INS_COMMON(Inst, , src##i) \ + CASE_AVX_INS_COMMON(Inst, Y, src##i) \ + #define CASE_VSHUF(Inst, src) \ CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \ CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \ @@ -515,14 +530,10 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask); break; - case X86::SHUFPDrri: - case X86::VSHUFPDrri: - case X86::VSHUFPDYrri: + CASE_SHUF(SHUFPD, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::SHUFPDrmi: - case X86::VSHUFPDrmi: - case X86::VSHUFPDYrmi: + CASE_SHUF(SHUFPD, m) if (MI->getOperand(MI->getNumOperands() - 1).isImm()) DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f64, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), @@ -531,14 +542,10 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::SHUFPSrri: - case X86::VSHUFPSrri: - case X86::VSHUFPSYrri: + CASE_SHUF(SHUFPS, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::SHUFPSrmi: - case X86::VSHUFPSrmi: - case X86::VSHUFPSYrmi: + CASE_SHUF(SHUFPS, m) if (MI->getOperand(MI->getNumOperands() - 1).isImm()) DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f32, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), @@ -605,12 +612,10 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::VPERMILPSri: - case X86::VPERMILPSYri: + CASE_VPERM(PERMILPS, r) Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. - case X86::VPERMILPSmi: - case X86::VPERMILPSYmi: + CASE_VPERM(PERMILPS, m) if (MI->getOperand(MI->getNumOperands() - 1).isImm()) DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f32, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), @@ -618,12 +623,10 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::VPERMILPDri: - case X86::VPERMILPDYri: + CASE_VPERM(PERMILPD, r) Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. - case X86::VPERMILPDmi: - case X86::VPERMILPDYmi: + CASE_VPERM(PERMILPD, m) if (MI->getOperand(MI->getNumOperands() - 1).isImm()) DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f64, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 72d41be12ce..ef5199bbf8d 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -4399,8 +4399,11 @@ define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: ## zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6] ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: ## zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6] ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq @@ -4420,11 +4423,13 @@ define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: ## zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12] ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) - %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) + %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) %res2 = fadd <16 x float> %res, %res1 ret <16 x float> %res2 } @@ -4435,13 +4440,16 @@ define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512: ; CHECK: ## BB#0: ; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0 -; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1 -; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: ## zmm1 = zmm1[0,1,3,2,5,4,6,6] +; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: ## zmm2 = k1[0,1,3,2,5,4,6,6] +; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[0,1,3,2,5,4,6,6] +; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3) %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3) %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1) @@ -4455,13 +4463,16 @@ declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0 -; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 -; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: ## zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] +; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: ## zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] +; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] +; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3) %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3) %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1) diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index 686a06f23fe..839ae2c5eb2 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -4726,9 +4726,12 @@ define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[0],k1[1] ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: ## xmm3 = k1[0],xmm0[1] ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0 -; CHECK: vaddpd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[1] +; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4) @@ -4747,8 +4750,10 @@ define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[0],k1[1],ymm2[3],k1[2] ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0 -; CHECK: vaddpd %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4) %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1) @@ -4764,8 +4769,10 @@ define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x fl ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[2,1],k1[1,0] ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0 -; CHECK: vaddps %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: ## xmm0 = xmm0[2,1],xmm1[1,0] +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4) %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1) @@ -4781,8 +4788,10 @@ define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x fl ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[2,1],k1[1,0],ymm2[6,5],k1[5,4] ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0 -; CHECK: vaddps %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) @@ -4870,8 +4879,11 @@ define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: ## ymm1 = ymm1[0,1,3,2] ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: ## ymm2 = k1[0,1,3,2] ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0 +; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2] ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq @@ -4891,8 +4903,11 @@ define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: ## xmm1 = xmm1[1,0] ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: ## xmm2 = k1[1,0] ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 +; CHECK-NEXT: ## xmm0 = xmm0[1,0] ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -4912,8 +4927,11 @@ define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: ## ymm1 = ymm1[2,1,1,0,6,5,5,4] ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: ## ymm2 = k1[2,1,1,0,6,5,5,4] ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 +; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4] ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -4933,8 +4951,11 @@ define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: ## xmm1 = xmm1[2,1,1,0] ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: ## xmm2 = k1[2,1,1,0] ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 +; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0] ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll index a235ccad5d6..c81ea51f21e 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -142,20 +142,10 @@ define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2f64_10: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2f64_10: -; AVX2: # BB#0: -; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2f64_10: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2f64_10: +; AVX: # BB#0: +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle @@ -219,20 +209,10 @@ define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2f64_32: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2f64_32: -; AVX2: # BB#0: -; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2f64_32: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpermilpd $1, %xmm1, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2f64_32: +; AVX: # BB#0: +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] +; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle @@ -955,7 +935,7 @@ define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; AVX512VL-NEXT: retq %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> @@ -1437,20 +1417,10 @@ define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) { ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_mem_v2f64_10: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_mem_v2f64_10: -; AVX2: # BB#0: -; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_mem_v2f64_10: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpermilpd $1, (%rdi), %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_mem_v2f64_10: +; AVX: # BB#0: +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] +; AVX-NEXT: retq %a = load <2 x double>, <2 x double>* %ptr %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index b4b5c7085d8..7e2879c7c43 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -170,20 +170,10 @@ define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_0023: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_0023: -; AVX2: # BB#0: -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_0023: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpermilpd $8, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_0023: +; ALL: # BB#0: +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle @@ -199,77 +189,37 @@ define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_1032: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_1032: -; AVX2: # BB#0: -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_1032: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_1032: +; ALL: # BB#0: +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_1133: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_1133: -; AVX2: # BB#0: -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_1133: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpermilpd $15, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_1133: +; ALL: # BB#0: +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_1023: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_1023: -; AVX2: # BB#0: -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_1023: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpermilpd $9, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_1023: +; ALL: # BB#0: +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_1022: -; AVX1: # BB#0: -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_1022: -; AVX2: # BB#0: -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_1022: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpermilpd $1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_1022: +; ALL: # BB#0: +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } @@ -344,20 +294,10 @@ define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_5163: -; AVX1: # BB#0: -; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_5163: -; AVX2: # BB#0: -; AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_5163: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufpd $11, %ymm0, %ymm1, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_5163: +; ALL: # BB#0: +; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } @@ -443,7 +383,7 @@ define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { ; AVX512VL-LABEL: shuffle_v4f64_1054: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0 +; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle @@ -465,7 +405,7 @@ define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) { ; AVX512VL-LABEL: shuffle_v4f64_3254: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0 +; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle @@ -487,30 +427,18 @@ define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) { ; AVX512VL-LABEL: shuffle_v4f64_3276: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0 +; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_1076: -; AVX1: # BB#0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_1076: -; AVX2: # BB#0: -; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_1076: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] -; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_1076: +; ALL: # BB#0: +; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } -- 2.34.1