From: Simon Pilgrim Date: Tue, 17 Nov 2015 22:35:45 +0000 (+0000) Subject: [X86][AVX512] Added support for AVX512 UNPCK shuffle decode comments. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=ae2a51e7ef60651aa00ee98c7dcceea105b7c070;p=oota-llvm.git [X86][AVX512] Added support for AVX512 UNPCK shuffle decode comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253391 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index f670e3ec43a..4519fe22ef7 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -147,6 +147,14 @@ static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) { CASE_AVX_INS_COMMON(Inst, Y, r##src) \ CASE_SSE_INS_COMMON(Inst, r##src) \ +#define CASE_UNPCK(Inst, src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z256, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z128, r##src) \ + CASE_AVX_INS_COMMON(Inst, , r##src) \ + CASE_AVX_INS_COMMON(Inst, Y, r##src) \ + CASE_SSE_INS_COMMON(Inst, r##src) \ + #define CASE_VSHUF(Inst, src) \ CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \ CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \ @@ -423,125 +431,85 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodePSWAPMask(MVT::v2i32, ShuffleMask); break; + CASE_UNPCK(PUNPCKHBW, r) case X86::MMX_PUNPCKHBWirr: - case X86::PUNPCKHBWrr: - case X86::VPUNPCKHBWrr: - case X86::VPUNPCKHBWYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + CASE_UNPCK(PUNPCKHBW, m) case X86::MMX_PUNPCKHBWirm: - case X86::PUNPCKHBWrm: - case X86::VPUNPCKHBWrm: - case X86::VPUNPCKHBWYrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask); break; + CASE_UNPCK(PUNPCKHWD, r) case X86::MMX_PUNPCKHWDirr: - case X86::PUNPCKHWDrr: - case X86::VPUNPCKHWDrr: - case X86::VPUNPCKHWDYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + CASE_UNPCK(PUNPCKHWD, m) case X86::MMX_PUNPCKHWDirm: - case X86::PUNPCKHWDrm: - case X86::VPUNPCKHWDrm: - case X86::VPUNPCKHWDYrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask); break; + CASE_UNPCK(PUNPCKHDQ, r) case X86::MMX_PUNPCKHDQirr: - case X86::PUNPCKHDQrr: - case X86::VPUNPCKHDQrr: - case X86::VPUNPCKHDQYrr: - case X86::VPUNPCKHDQZrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + CASE_UNPCK(PUNPCKHDQ, m) case X86::MMX_PUNPCKHDQirm: - case X86::PUNPCKHDQrm: - case X86::VPUNPCKHDQrm: - case X86::VPUNPCKHDQYrm: - case X86::VPUNPCKHDQZrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask); break; - case X86::PUNPCKHQDQrr: - case X86::VPUNPCKHQDQrr: - case X86::VPUNPCKHQDQYrr: - case X86::VPUNPCKHQDQZrr: + CASE_UNPCK(PUNPCKHQDQ, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::PUNPCKHQDQrm: - case X86::VPUNPCKHQDQrm: - case X86::VPUNPCKHQDQYrm: - case X86::VPUNPCKHQDQZrm: + CASE_UNPCK(PUNPCKHQDQ, m) Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask); break; + CASE_UNPCK(PUNPCKLBW, r) case X86::MMX_PUNPCKLBWirr: - case X86::PUNPCKLBWrr: - case X86::VPUNPCKLBWrr: - case X86::VPUNPCKLBWYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + CASE_UNPCK(PUNPCKLBW, m) case X86::MMX_PUNPCKLBWirm: - case X86::PUNPCKLBWrm: - case X86::VPUNPCKLBWrm: - case X86::VPUNPCKLBWYrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask); break; + CASE_UNPCK(PUNPCKLWD, r) case X86::MMX_PUNPCKLWDirr: - case X86::PUNPCKLWDrr: - case X86::VPUNPCKLWDrr: - case X86::VPUNPCKLWDYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + CASE_UNPCK(PUNPCKLWD, m) case X86::MMX_PUNPCKLWDirm: - case X86::PUNPCKLWDrm: - case X86::VPUNPCKLWDrm: - case X86::VPUNPCKLWDYrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask); break; + CASE_UNPCK(PUNPCKLDQ, r) case X86::MMX_PUNPCKLDQirr: - case X86::PUNPCKLDQrr: - case X86::VPUNPCKLDQrr: - case X86::VPUNPCKLDQYrr: - case X86::VPUNPCKLDQZrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + CASE_UNPCK(PUNPCKLDQ, m) case X86::MMX_PUNPCKLDQirm: - case X86::PUNPCKLDQrm: - case X86::VPUNPCKLDQrm: - case X86::VPUNPCKLDQYrm: - case X86::VPUNPCKLDQZrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask); break; - case X86::PUNPCKLQDQrr: - case X86::VPUNPCKLQDQrr: - case X86::VPUNPCKLQDQYrr: - case X86::VPUNPCKLQDQZrr: + CASE_UNPCK(PUNPCKLQDQ, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::PUNPCKLQDQrm: - case X86::VPUNPCKLQDQrm: - case X86::VPUNPCKLQDQYrm: - case X86::VPUNPCKLQDQZrm: + CASE_UNPCK(PUNPCKLQDQ, m) Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask); @@ -601,61 +569,37 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; } - case X86::UNPCKLPDrr: - case X86::VUNPCKLPDrr: - case X86::VUNPCKLPDYrr: - case X86::VUNPCKLPDZrr: + CASE_UNPCK(UNPCKLPD, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::UNPCKLPDrm: - case X86::VUNPCKLPDrm: - case X86::VUNPCKLPDYrm: - case X86::VUNPCKLPDZrm: + CASE_UNPCK(UNPCKLPD, m) DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::UNPCKLPSrr: - case X86::VUNPCKLPSrr: - case X86::VUNPCKLPSYrr: - case X86::VUNPCKLPSZrr: + CASE_UNPCK(UNPCKLPS, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::UNPCKLPSrm: - case X86::VUNPCKLPSrm: - case X86::VUNPCKLPSYrm: - case X86::VUNPCKLPSZrm: + CASE_UNPCK(UNPCKLPS, m) DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::UNPCKHPDrr: - case X86::VUNPCKHPDrr: - case X86::VUNPCKHPDYrr: - case X86::VUNPCKHPDZrr: + CASE_UNPCK(UNPCKHPD, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::UNPCKHPDrm: - case X86::VUNPCKHPDrm: - case X86::VUNPCKHPDYrm: - case X86::VUNPCKHPDZrm: + CASE_UNPCK(UNPCKHPD, m) DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::UNPCKHPSrr: - case X86::VUNPCKHPSrr: - case X86::VUNPCKHPSYrr: - case X86::VUNPCKHPSZrr: + CASE_UNPCK(UNPCKHPS, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::UNPCKHPSrm: - case X86::VUNPCKHPSrm: - case X86::VUNPCKHPSYrm: - case X86::VUNPCKHPSZrm: + CASE_UNPCK(UNPCKHPS, m) DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index a16143f4eea..72d41be12ce 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -3296,7 +3296,9 @@ define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: ## zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7] ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) %res2 = fadd <8 x double> %res, %res1 @@ -3310,7 +3312,9 @@ define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: ## zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15] ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) %res2 = fadd <16 x float> %res, %res1 @@ -3325,7 +3329,9 @@ define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6] ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) %res2 = fadd <8 x double> %res, %res1 @@ -3339,7 +3345,9 @@ define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13] ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) %res2 = fadd <16 x float> %res, %res1 @@ -3354,9 +3362,12 @@ define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i6 ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z} -; CHECK-NEXT: vpunpcklqdq {{.*#+}} -; CHECK: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6] +; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z +; CHECK-NEXT: ## zmm3 = k1[0],zmm0[0],k1[2],zmm0[2],k1[4],zmm0[4],k1[6],zmm0[6] +; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) @@ -3375,8 +3386,10 @@ define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i6 ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vpunpckhqdq {{.*#+}} -; CHECK: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: ## zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7] +; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) @@ -3391,8 +3404,10 @@ define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vpunpckhdq {{.*#+}} -; CHECK: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: ## zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15] +; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) @@ -3407,8 +3422,10 @@ define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vpunpckldq {{.*#+}} -; CHECK: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13] +; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 6376657cf16..9e702be1ef4 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1125,7 +1125,9 @@ define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovq %rdi, %k1 ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: ## zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63] ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) @@ -1141,7 +1143,9 @@ define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovq %rdi, %k1 ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55] ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) @@ -1157,7 +1161,9 @@ define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: ## zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31] ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) @@ -1173,7 +1179,9 @@ define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27] ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 733cb01e7b0..713f1c9782c 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -4104,7 +4104,9 @@ declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_128: ; CHECK: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[8],k1[8],xmm2[9],k1[9],xmm2[10],k1[10],xmm2[11],k1[11],xmm2[12],k1[12],xmm2[13],k1[13],xmm2[14],k1[14],xmm2[15],k1[15] ; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x68,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) %res2 = add <16 x i8> %res, %res1 @@ -4116,7 +4118,9 @@ declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_128: ; CHECK: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3],xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7] ; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x60,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) %res2 = add <16 x i8> %res, %res1 @@ -4128,7 +4132,9 @@ declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_256: ; CHECK: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15],ymm2[24],k1[24],ymm2[25],k1[25],ymm2[26],k1[26],ymm2[27],k1[27],ymm2[28],k1[28],ymm2[29],k1[29],ymm2[30],k1[30],ymm2[31],k1[31] ; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x68,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) %res2 = add <32 x i8> %res, %res1 @@ -4140,7 +4146,9 @@ declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_256: ; CHECK: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[16],k1[16],ymm2[17],k1[17],ymm2[18],k1[18],ymm2[19],k1[19],ymm2[20],k1[20],ymm2[21],k1[21],ymm2[22],k1[22],ymm2[23],k1[23] ; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x60,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) %res2 = add <32 x i8> %res, %res1 @@ -4152,7 +4160,9 @@ declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_128: ; CHECK: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3] ; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x61,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) %res2 = add <8 x i16> %res, %res1 @@ -4164,7 +4174,9 @@ declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_128: ; CHECK: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7] ; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x69,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) %res2 = add <8 x i16> %res, %res1 @@ -4176,7 +4188,9 @@ declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_256: ; CHECK: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11] ; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x61,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) %res2 = add <16 x i16> %res, %res1 @@ -4188,7 +4202,9 @@ declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_256: ; CHECK: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15] ; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x69,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) %res2 = add <16 x i16> %res, %res1 diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index bc51af03e4e..686a06f23fe 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3010,7 +3010,9 @@ declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x doub define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128: ; CHECK: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[1],k1[1] ; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[1],xmm1[1] %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) %res2 = fadd <2 x double> %res, %res1 @@ -3022,7 +3024,9 @@ declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x doub define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256: ; CHECK: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3] ; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) %res2 = fadd <4 x double> %res, %res1 @@ -3034,7 +3038,9 @@ declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float> define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128: ; CHECK: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3] ; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) %res2 = fadd <4 x float> %res, %res1 @@ -3047,7 +3053,9 @@ define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256: ; CHECK: ## BB#0: ; CHECK: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7] ; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) %res2 = fadd <8 x float> %res, %res1 @@ -3059,7 +3067,9 @@ declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x doub define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128: ; CHECK: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0] ; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0] %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) %res2 = fadd <2 x double> %res, %res1 @@ -3071,7 +3081,9 @@ declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x doub define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256: ; CHECK: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2] ; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) %res2 = fadd <4 x double> %res, %res1 @@ -3083,7 +3095,9 @@ declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float> define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128: ; CHECK: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1] ; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) %res2 = fadd <4 x float> %res, %res1 @@ -3095,7 +3109,9 @@ declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float> define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256: ; CHECK: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5] ; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) %res2 = fadd <8 x float> %res, %res1 @@ -3107,7 +3123,9 @@ declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128: ; CHECK: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3] ; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) %res2 = add <4 x i32> %res, %res1 @@ -3119,7 +3137,9 @@ declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128: ; CHECK: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1] ; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) %res2 = add <4 x i32> %res, %res1 @@ -3132,7 +3152,9 @@ define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256: ; CHECK: ## BB#0: ; CHECK: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7] ; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) %res2 = add <8 x i32> %res, %res1 @@ -3144,7 +3166,9 @@ declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256: ; CHECK: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5] ; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) %res2 = add <8 x i32> %res, %res1 @@ -3156,7 +3180,9 @@ declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128: ; CHECK: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[1],k1[1] ; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[1],xmm1[1] %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) %res2 = add <2 x i64> %res, %res1 @@ -3168,7 +3194,9 @@ declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128: ; CHECK: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0] ; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0] %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) %res2 = add <2 x i64> %res, %res1 @@ -3180,7 +3208,9 @@ declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256: ; CHECK: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2] ; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) %res2 = add <4 x i64> %res, %res1 @@ -3192,7 +3222,9 @@ declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256: ; CHECK: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3] ; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xc1] +; CHECK-NEXT: ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) %res2 = add <4 x i64> %res, %res1 diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll index ad33ef7645b..a235ccad5d6 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -328,20 +328,10 @@ define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_02: -; AVX1: # BB#0: -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_02: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_02: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_02: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -352,20 +342,10 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_02_copy: -; AVX1: # BB#0: -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_02_copy: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_02_copy: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_02_copy: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -520,20 +500,10 @@ define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_13: -; AVX1: # BB#0: -; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_13: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_13: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_13: +; AVX: # BB#0: +; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -544,20 +514,10 @@ define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_13_copy: -; AVX1: # BB#0: -; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_13_copy: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_13_copy: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpckhqdq %xmm2, %xmm1, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_13_copy: +; AVX: # BB#0: +; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -568,20 +528,10 @@ define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_20: -; AVX1: # BB#0: -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_20: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_20: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_20: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -592,20 +542,10 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ; SSE-NEXT: movdqa %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_20_copy: -; AVX1: # BB#0: -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_20_copy: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_20_copy: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm2, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_20_copy: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -758,20 +698,10 @@ define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_31: -; AVX1: # BB#0: -; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_31: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_31: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpckhqdq %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_31: +; AVX: # BB#0: +; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -782,20 +712,10 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ; SSE-NEXT: movdqa %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_31_copy: -; AVX1: # BB#0: -; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_31_copy: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_31_copy: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpckhqdq %xmm1, %xmm2, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_31_copy: +; AVX: # BB#0: +; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -944,7 +864,7 @@ define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { ; AVX512VL-LABEL: shuffle_v2f64_1z: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle @@ -973,7 +893,7 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { ; AVX512VL-LABEL: shuffle_v2f64_z0: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0 +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle @@ -1295,23 +1215,11 @@ define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) { ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; -; AVX1-LABEL: insert_reg_hi_v2i64: -; AVX1: # BB#0: -; AVX1-NEXT: vmovq %rdi, %xmm1 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: insert_reg_hi_v2i64: -; AVX2: # BB#0: -; AVX2-NEXT: vmovq %rdi, %xmm1 -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: insert_reg_hi_v2i64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovq %rdi, %xmm1 -; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: insert_reg_hi_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vmovq %rdi, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq %v = insertelement <2 x i64> undef, i64 %a, i32 0 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle @@ -1339,7 +1247,7 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { ; AVX512VL-LABEL: insert_mem_hi_v2i64: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vmovq (%rdi), %xmm1 -; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq %a = load i64, i64* %ptr %v = insertelement <2 x i64> undef, i64 %a, i32 0 @@ -1396,20 +1304,10 @@ define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: insert_reg_hi_v2f64: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: insert_reg_hi_v2f64: -; AVX2: # BB#0: -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: insert_reg_hi_v2f64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: insert_reg_hi_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq %v = insertelement <2 x double> undef, double %a, i32 0 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index 3d9661084e9..b4b5c7085d8 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -308,77 +308,37 @@ define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_0426: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_0426: -; AVX2: # BB#0: -; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_0426: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_0426: +; ALL: # BB#0: +; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_1537: -; AVX1: # BB#0: -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_1537: -; AVX2: # BB#0: -; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_1537: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_1537: +; ALL: # BB#0: +; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_4062: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_4062: -; AVX2: # BB#0: -; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_4062: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vunpcklpd %ymm0, %ymm1, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_4062: +; ALL: # BB#0: +; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_5173: -; AVX1: # BB#0: -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_5173: -; AVX2: # BB#0: -; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_5173: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vunpckhpd %ymm0, %ymm1, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_5173: +; ALL: # BB#0: +; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } @@ -581,20 +541,10 @@ define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_u062: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_u062: -; AVX2: # BB#0: -; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_u062: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vunpcklpd %ymm0, %ymm1, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_u062: +; ALL: # BB#0: +; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } @@ -1189,7 +1139,7 @@ define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-LABEL: shuffle_v4i64_40u2: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpcklqdq %ymm0, %ymm1, %ymm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle @@ -1446,7 +1396,7 @@ define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) { ; ; AVX512VL-LABEL: bitcast_v4f64_0426: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; AVX512VL-NEXT: retq %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>