From: Igor Breger Date: Thu, 24 Dec 2015 07:11:53 +0000 (+0000) Subject: AVX512: VPMOVM2B/W/D/Q intrinsic implementation. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=d7d8cb8af1e5dd8e52ef0bb425e74b0ec2f5550e;p=oota-llvm.git AVX512: VPMOVM2B/W/D/Q intrinsic implementation. Differential Revision: http://reviews.llvm.org//D15747 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256364 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index b26f276895d..8f8c9ac3b69 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -4087,6 +4087,35 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtmask2b_128 : GCCBuiltin<"__builtin_ia32_cvtmask2b128">, + Intrinsic<[llvm_v16i8_ty], [llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2b_256 : GCCBuiltin<"__builtin_ia32_cvtmask2b256">, + Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2b_512 : GCCBuiltin<"__builtin_ia32_cvtmask2b512">, + Intrinsic<[llvm_v64i8_ty], [llvm_i64_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtmask2w_128 : GCCBuiltin<"__builtin_ia32_cvtmask2w128">, + Intrinsic<[llvm_v8i16_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2w_256 : GCCBuiltin<"__builtin_ia32_cvtmask2w256">, + Intrinsic<[llvm_v16i16_ty], [llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2w_512 : GCCBuiltin<"__builtin_ia32_cvtmask2w512">, + Intrinsic<[llvm_v32i16_ty], [llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtmask2d_128 : GCCBuiltin<"__builtin_ia32_cvtmask2d128">, + Intrinsic<[llvm_v4i32_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2d_256 : GCCBuiltin<"__builtin_ia32_cvtmask2d256">, + Intrinsic<[llvm_v8i32_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2d_512 : GCCBuiltin<"__builtin_ia32_cvtmask2d512">, + Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtmask2q_128 : GCCBuiltin<"__builtin_ia32_cvtmask2q128">, + Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2q_256 : GCCBuiltin<"__builtin_ia32_cvtmask2q256">, + Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2q_512 : GCCBuiltin<"__builtin_ia32_cvtmask2q512">, + Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>; + } // Pack ops. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 542dd97996b..9dfc5be0be7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16848,6 +16848,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Src2, Src1); return DAG.getBitcast(VT, Res); } + case CONVERT_MASK_TO_VEC: { + SDValue Mask = Op.getOperand(1); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + return DAG.getNode(IntrData->Opc0, dl, VT, VMask); + } default: break; } diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index caf9497ee06..c06b1ce38bf 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -30,7 +30,7 @@ enum IntrinsicType { COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC, - TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK + TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, CONVERT_MASK_TO_VEC }; struct IntrinsicData { @@ -143,7 +143,7 @@ static const IntrinsicData IntrinsicsWithChain[] = { EXPAND_FROM_MEM, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512, EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, + X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), @@ -324,6 +324,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_broadcastmw_128, BROADCASTM, X86ISD::VBROADCASTM, 0), X86_INTRINSIC_DATA(avx512_broadcastmw_256, BROADCASTM, X86ISD::VBROADCASTM, 0), X86_INTRINSIC_DATA(avx512_broadcastmw_512, BROADCASTM, X86ISD::VBROADCASTM, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2b_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2b_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2b_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2d_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2d_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2d_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2q_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2q_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2q_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2w_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2w_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), + X86_INTRINSIC_DATA(avx512_cvtmask2w_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0), X86_INTRINSIC_DATA(avx512_cvtsi2sd32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), @@ -1624,23 +1636,23 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0), X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0), X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0), - X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), + X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), + X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), + X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), - X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), + X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0), diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index c6ba0dd6eb4..769387e80a2 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -2786,3 +2786,41 @@ define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) ret i64 %res } + +declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64) + +define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) { +; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdi, %k0 +; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: retl + %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0) + ret <64 x i8> %res +} + +declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32) + +define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) { +; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: vpmovm2w %k0, %zmm0 +; AVX512F-32-NEXT: retl + %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0) + ret <32 x i16> %res +} diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 713f1c9782c..56d24a3c800 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -4413,3 +4413,51 @@ define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> ret <32 x i16> %res4 } +declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16) + +define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k0 +; CHECK-NEXT: vpmovm2b %k0, %xmm0 +; CHECK-NEXT: retq + %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0) + ret <16 x i8> %res +} + +declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32) + +define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k0 +; CHECK-NEXT: vpmovm2b %k0, %ymm0 +; CHECK-NEXT: retq + %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0) + ret <32 x i8> %res +} + +declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8) + +define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k0 +; CHECK-NEXT: vpmovm2w %k0, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0) + ret <8 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16) + +define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k0 +; CHECK-NEXT: vpmovm2w %k0, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0) + ret <16 x i16> %res +} diff --git a/test/CodeGen/X86/avx512dq-intrinsics.ll b/test/CodeGen/X86/avx512dq-intrinsics.ll index 137b0e49fb1..e2d2ddcbec8 100644 --- a/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -209,8 +209,8 @@ define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512 -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vreduceps ; CHECK: {sae} ; CKECK: {%k1} @@ -224,8 +224,8 @@ define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512 -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vrangepd ; CKECK: {%k1} ; CHECK: vrangepd @@ -240,8 +240,8 @@ define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512 -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vrangeps ; CKECK: {%k1} ; CHECK: vrangeps @@ -256,8 +256,8 @@ define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vreducess ; CKECK: {%k1} ; CHECK: vreducess @@ -271,13 +271,13 @@ define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x floa declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vrangess ; CHECK: {sae} ; CKECK: {%k1} ; CHECK: vrangess -; CHECK: {sae} +; CHECK: {sae} define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8) %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) @@ -288,12 +288,12 @@ define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vreducesd ; CKECK: {%k1} ; CHECK: vreducesd -; CHECK: {sae} +; CHECK: {sae} define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) @@ -303,12 +303,12 @@ define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x do declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vrangesd ; CKECK: {%k1} ; CHECK: vrangesd -; CHECK: {sae} +; CHECK: {sae} define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) @@ -440,39 +440,39 @@ define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i6 declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8) ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_512 -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vfpclasspd -; CHECK: {%k1} +; CHECK: {%k1} ; CHECK: vfpclasspd ; CHECK: kmovb %k0 define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) { - %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1) - %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) - %res2 = add i8 %res, %res1 - ret i8 %res2 + %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1) + %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) + %res2 = add i8 %res, %res1 + ret i8 %res2 } declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16) ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_512 -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vfpclassps ; CHECK: vfpclassps -; CHECK: {%k1} +; CHECK: {%k1} ; CHECK: kmov define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) { - %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1) - %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) - %res2 = add i16 %res, %res1 - ret i16 %res2 + %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1) + %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) + %res2 = add i16 %res, %res1 + ret i16 %res2 } declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vfpclasssd ; CHECK: %k0 {%k1} ; CHECK: vfpclasssd @@ -487,8 +487,8 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) { declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8) ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss -; CHECK-NOT: call -; CHECK: kmovw +; CHECK-NOT: call +; CHECK: kmovw ; CHECK: vfpclassss ; CHECK: %k0 ; CHECK: {%k1} @@ -541,3 +541,28 @@ define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 %res4 = add <16 x i32> %res3, %res2 ret <16 x i32> %res4 } + +declare <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16) + +define <16 x i32>@test_int_x86_avx512_cvtmask2d_512(i16 %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k0 +; CHECK-NEXT: vpmovm2d %k0, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16 %x0) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8) + +define <8 x i64>@test_int_x86_avx512_cvtmask2q_512(i8 %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k0 +; CHECK-NEXT: vpmovm2q %k0, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8 %x0) + ret <8 x i64> %res +} + diff --git a/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/test/CodeGen/X86/avx512dqvl-intrinsics.ll index b587e93f80a..1612ea40d5c 100644 --- a/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -1833,3 +1833,50 @@ define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x ret <4 x i32> %res4 } +declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8) + +define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k0 +; CHECK-NEXT: vpmovm2d %k0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0) + ret <4 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8) + +define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k0 +; CHECK-NEXT: vpmovm2d %k0, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0) + ret <8 x i32> %res +} + +declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8) + +define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k0 +; CHECK-NEXT: vpmovm2q %k0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0) + ret <2 x i64> %res +} + +declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8) + +define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k0 +; CHECK-NEXT: vpmovm2q %k0, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0) + ret <4 x i64> %res +}