X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Favx512bw-intrinsics.ll;h=182f3cd5e65a36555b02040d8bbf82a3b1997e22;hb=6c7a788883c58037d3a63de37a5da45952e8298b;hp=0eba131a67c4b1667ab1403b1ef381cc9b673cef;hpb=e407ee0520400e5fa7316aed15caf788990de2af;p=oota-llvm.git diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 0eba131a67c..182f3cd5e65 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -2736,3 +2736,353 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %res2 = add <8 x i64> %res, %res1 ret <8 x i64> %res2 } + +declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) + +define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { +; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) + ret i32 %res +} + +declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) + +define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { +; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdi, %k0 +; AVX512BW-NEXT: kmovq %rsi, %k1 +; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0 +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp8: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) + ret i64 %res +} + +declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>) + +define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) { +; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp9: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0) + ret i64 %res +} + +declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>) + +define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) { +; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0) + ret i32 %res +} + +declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64) + +define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) { +; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdi, %k0 +; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: retl + %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0) + ret <64 x i8> %res +} + +declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32) + +define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) { +; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: vpmovm2w %k0, %zmm0 +; AVX512F-32-NEXT: retl + %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 +; AVX512BW-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i8, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_wi_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} +; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z} +; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psrlv32hi: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i8, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: vpsraw $3, %zmm0, %zmm0 +; CHECK-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i8, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pshufh_w_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpshufhw $3, %zmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: vpshufhw $3, %zmm0, %zmm0 +; CHECK-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i8, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pshufl_w_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: vpshuflw $3, %zmm0, %zmm0 +; CHECK-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.mask.psrav32_hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psrav32_hi: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i8, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_wi_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} +; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z} +; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_psllv32hi: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res3, %res2 + ret <32 x i16> %res4 +}