X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Favx512bwvl-intrinsics.ll;h=2ccaf80bce99e63d1dca3a0bee7eeb204a0c4897;hb=6c7a788883c58037d3a63de37a5da45952e8298b;hp=1db6756c23a84612251a356eae29ccf3f81bf505;hpb=48d1be172b10bc72020330f4c9fa98c01e05f55e;p=oota-llvm.git diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 1db6756c23a..2ccaf80bce9 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -4591,3 +4591,421 @@ define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i8 %x1, < %res4 = add <16 x i16> %res3, %res2 ret <16 x i16> %res4 } + +declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psrlv16_hi: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) + %res3 = add <16 x i16> %res, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_hi: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res3 = add <8 x i16> %res, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res3 = add <8 x i16> %res, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i8, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i8 %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpsraw $3, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpsraw $3, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i8 3, <8 x i16> zeroinitializer, i8 %x3) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 -1) + %res3 = add <8 x i16> %res, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) + %res3 = add <16 x i16> %res, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i8, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i8 %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpsraw $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpsraw $3, %ymm0, %ymm0 +; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i8 3, <16 x i16> zeroinitializer, i16 %x3) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 -1) + %res3 = add <16 x i16> %res, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i16, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i16 %x1, <4 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpshufd $3, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpshufd $3, %xmm0, %xmm0 +; CHECK-NEXT: ## xmm0 = xmm0[3,0,0,0] +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i16 3, <4 x i32> %x2, i8 %x3) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i16 3, <4 x i32> zeroinitializer, i8 %x3) + %res2 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i16 3, <4 x i32> %x2, i8 -1) + %res3 = add <4 x i32> %res, %res1 + %res4 = add <4 x i32> %res3, %res2 + ret <4 x i32> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i16, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i16 %x1, <8 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpshufd $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpshufd $3, %ymm0, %ymm0 +; CHECK-NEXT: ## ymm0 = ymm0[3,0,0,0,7,4,4,4] +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i16 3, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i16 3, <8 x i32> zeroinitializer, i8 %x3) + %res2 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i16 3, <8 x i32> %x2, i8 -1) + %res3 = add <8 x i32> %res, %res1 + %res4 = add <8 x i32> %res3, %res2 + ret <8 x i32> %res4 +} + +declare <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16>, i8, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i8 %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pshufh_w_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm0 +; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,7,4,4,4] +; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i8 3, <8 x i16> zeroinitializer, i8 %x3) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 -1) + %res3 = add <8 x i16> %res, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16>, i8, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i8 %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pshufh_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm0 +; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] +; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i8 3, <16 x i16> zeroinitializer, i16 %x3) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 -1) + %res3 = add <16 x i16> %res, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16>, i8, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i8 %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pshufl_w_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm0 +; CHECK-NEXT: ## xmm0 = xmm0[3,0,0,0,4,5,6,7] +; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i8 3, <8 x i16> zeroinitializer, i8 %x3) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 -1) + %res3 = add <8 x i16> %res, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16>, i8, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i8 %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pshufl_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm0 +; CHECK-NEXT: ## ymm0 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] +; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i8 3, <16 x i16> zeroinitializer, i16 %x3) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 -1) + %res3 = add <16 x i16> %res, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psrav16_hi: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) + %res3 = add <16 x i16> %res, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_hi: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res3 = add <8 x i16> %res, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + + +declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res3 = add <8 x i16> %res, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) + %res3 = add <16 x i16> %res, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i8, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i8 %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpsllw $3, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpsllw $3, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i8 3, <8 x i16> zeroinitializer, i8 %x3) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 -1) + %res3 = add <8 x i16> %res, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i8, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i8 %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpsllw $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0 +; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i8 3, <16 x i16> zeroinitializer, i16 %x3) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 -1) + %res3 = add <16 x i16> %res, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psllv16_hi: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) + %res3 = add <16 x i16> %res, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_hi: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res3 = add <8 x i16> %res, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} \ No newline at end of file