From: Elena Demikhovsky Date: Mon, 29 Jun 2015 12:14:24 +0000 (+0000) Subject: AVX-512: all forms of SCATTER instruction on SKX, X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=546178bfe574fa0dde9a76521c81718ae9d7eae3 AVX-512: all forms of SCATTER instruction on SKX, encoding, intrinsics and tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240936 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 15f5078ccef..80446d35a4b 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -4634,6 +4634,102 @@ let TargetPrefix = "x86" in { llvm_i32_ty], [IntrReadWriteArgMem]>; + def int_x86_avx512_scatterdiv2_df : + GCCBuiltin<"__builtin_ia32_scatterdiv2df">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scatterdiv2_di : + GCCBuiltin<"__builtin_ia32_scatterdiv2di">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scatterdiv4_df : + GCCBuiltin<"__builtin_ia32_scatterdiv4df">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scatterdiv4_di : + GCCBuiltin<"__builtin_ia32_scatterdiv4di">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scatterdiv4_sf : + GCCBuiltin<"__builtin_ia32_scatterdiv4sf">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scatterdiv4_si : + GCCBuiltin<"__builtin_ia32_scatterdiv4si">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scatterdiv8_sf : + GCCBuiltin<"__builtin_ia32_scatterdiv8sf">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scatterdiv8_si : + GCCBuiltin<"__builtin_ia32_scatterdiv8si">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scattersiv2_df : + GCCBuiltin<"__builtin_ia32_scattersiv2df">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scattersiv2_di : + GCCBuiltin<"__builtin_ia32_scattersiv2di">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scattersiv4_df : + GCCBuiltin<"__builtin_ia32_scattersiv4df">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scattersiv4_di : + GCCBuiltin<"__builtin_ia32_scattersiv4di">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scattersiv4_sf : + GCCBuiltin<"__builtin_ia32_scattersiv4sf">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scattersiv4_si : + GCCBuiltin<"__builtin_ia32_scattersiv4si">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scattersiv8_sf : + GCCBuiltin<"__builtin_ia32_scattersiv8sf">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + + def int_x86_avx512_scattersiv8_si : + GCCBuiltin<"__builtin_ia32_scattersiv8si">, + Intrinsic<[], + [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + // gather prefetch def int_x86_avx512_gatherpf_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfdpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 76f3fd63ad3..33b01e007db 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15488,7 +15488,12 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Index, SDValue ScaleOp, SDValue Chain) { SDLoc dl(Op); ConstantSDNode *C = dyn_cast(ScaleOp); - assert(C && "Invalid scale type"); + if (!C) + llvm_unreachable("Invalid scale type"); + unsigned ScaleVal = C->getZExtValue(); + if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8) + llvm_unreachable("Valid scale values are 1, 2, 4, 8"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); @@ -15498,8 +15503,16 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, ConstantSDNode *MaskC = dyn_cast(Mask); if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); - else - MaskInReg = DAG.getBitcast(MaskVT, Mask); + else { + EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + Mask.getValueType().getSizeInBits()); + + // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements + // are extracted by EXTRACT_SUBVECTOR. + MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, + DAG.getBitcast(BitcastVT, Mask), + DAG.getIntPtrConstant(0, dl)); + } SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index db1f432b0e9..2e8c2c59beb 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5596,40 +5596,58 @@ defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q multiclass avx512_scatter opc, string OpcodeStr, X86VectorVTInfo _, X86MemOperand memop, PatFrag ScatterNode> { -let mayStore = 1, Constraints = "$mask = $mask_wb" in +let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in def mr : AVX5128I, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; } -let ExeDomain = SSEPackedDouble in { -defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem, - mscatterv8i32>, EVEX_V512, VEX_W; -defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem, - mscatterv8i64>, EVEX_V512, VEX_W; +multiclass avx512_scatter_q_pd dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_scatter, EVEX_V512, VEX_W; + defm NAME##Q##SUFF##Z: avx512_scatter, EVEX_V512, VEX_W; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_scatter, EVEX_V256, VEX_W; + defm NAME##Q##SUFF##Z256: avx512_scatter, EVEX_V256, VEX_W; + defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128, VEX_W; + defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128, VEX_W; +} } -let ExeDomain = SSEPackedSingle in { -defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem, - mscatterv16i32>, EVEX_V512; -defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem, - mscatterv8i64>, EVEX_V512; +multiclass avx512_scatter_d_ps dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_scatter, EVEX_V512; + defm NAME##Q##SUFF##Z: avx512_scatter, EVEX_V512; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_scatter, EVEX_V256; + defm NAME##Q##SUFF##Z256: avx512_scatter, EVEX_V256; + defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128; + defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128; +} } -defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem, - mscatterv8i32>, EVEX_V512, VEX_W; -defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem, - mscatterv16i32>, EVEX_V512; +defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, + avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; -defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem, - mscatterv8i64>, EVEX_V512, VEX_W; -defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem, - mscatterv8i64>, EVEX_V512; +defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, + avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; // prefetch multiclass avx512_gather_scatter_prefetch opc, Format F, string OpcodeStr, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 072bc050aad..fe245c3a7e3 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -606,6 +606,30 @@ def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), return false; }]>; +def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_scatter node:$src1, node:$src2, node:$src3) , [{ + if (MaskedScatterSDNode *Sc = dyn_cast(N)) + return (Sc->getIndex().getValueType() == MVT::v2i64 || + Sc->getBasePtr().getValueType() == MVT::v2i64); + return false; +}]>; + +def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_scatter node:$src1, node:$src2, node:$src3) , [{ + if (MaskedScatterSDNode *Sc = dyn_cast(N)) + return (Sc->getIndex().getValueType() == MVT::v4i32 || + Sc->getBasePtr().getValueType() == MVT::v4i32); + return false; +}]>; + +def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_scatter node:$src1, node:$src2, node:$src3) , [{ + if (MaskedScatterSDNode *Sc = dyn_cast(N)) + return (Sc->getIndex().getValueType() == MVT::v4i64 || + Sc->getBasePtr().getValueType() == MVT::v4i64); + return false; +}]>; + def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_scatter node:$src1, node:$src2, node:$src3) , [{ if (MaskedScatterSDNode *Sc = dyn_cast(N)) diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index c9823a24a5e..61a33484b8b 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -146,15 +146,30 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0), - - X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, - X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm), - X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, - X86::VSCATTERPF0DPSm, X86::VSCATTERPF1DPSm), - X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, - X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm), - X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, - X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm), + X86_INTRINSIC_DATA(avx512_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm, + X86::VSCATTERPF1DPDm), + X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm, + X86::VSCATTERPF1DPSm), + X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, X86::VSCATTERPF0QPDm, + X86::VSCATTERPF1QPDm), + X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm, + X86::VSCATTERPF1QPSm), + X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0), + X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0), X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0), X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0), diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll index ea3563b312a..3fca5a89a6a 100644 --- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -548,3 +548,244 @@ define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x %res2 = add <8 x i32> %res, %res1 ret <8 x i32> %res2 } + +declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32) + +define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,0) {%k2} +; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 0) + call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32) + +define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 0) + call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32) + +define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 0) + call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32) + +define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 0) + call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32) + +define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 0) + call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32) + +define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,0) {%k2} +; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 0) + call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32) + +define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 0) + call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32) + +define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 0) + call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32) + +define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,0) {%k2} +; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 0) + call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32) + +define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,0) {%k2} +; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 0) + call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32) + +define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 0) + call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32) + +define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,0) {%k2} +; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 0) + call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32) + +define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 0) + call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32) + +define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 0) + call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32) + +define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 0) + call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4) + ret void +} + +declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32) + +define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) { +; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,0) {%k1} +; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 0) + call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4) + ret void +} + diff --git a/test/MC/X86/avx512vl-encoding.s b/test/MC/X86/avx512vl-encoding.s index b4b28a62b85..e1fc32848cc 100644 --- a/test/MC/X86/avx512vl-encoding.s +++ b/test/MC/X86/avx512vl-encoding.s @@ -1643,3 +1643,131 @@ // CHECK: vgatherqps 1024(%rcx,%ymm31,4), %xmm19 {%k1} // CHECK: encoding: [0x62,0xa2,0x7d,0x21,0x93,0x9c,0xb9,0x00,0x04,0x00,0x00] vgatherqps 1024(%rcx,%ymm31,4), %xmm19 {%k1} + +// CHECK: vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1} +// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00] + vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1} + +// CHECK: vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1} +// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00] + vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1} + +// CHECK: vpscatterdd %xmm20, 256(%r9,%xmm31) {%k1} +// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa0,0x64,0x39,0x40] + vpscatterdd %xmm20, 256(%r9,%xmm31) {%k1} + +// CHECK: vpscatterdd %xmm20, 1024(%rcx,%xmm31,4) {%k1} +// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0xa0,0xa4,0xb9,0x00,0x04,0x00,0x00] + vpscatterdd %xmm20, 1024(%rcx,%xmm31,4) {%k1} + +// CHECK: vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1} +// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00] + vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1} + +// CHECK: vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1} +// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00] + vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1} + +// CHECK: vpscatterdd %ymm28, 256(%r9,%ymm31) {%k1} +// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa0,0x64,0x39,0x40] + vpscatterdd %ymm28, 256(%r9,%ymm31) {%k1} + +// CHECK: vpscatterdd %ymm28, 1024(%rcx,%ymm31,4) {%k1} +// CHECK: encoding: [0x62,0x22,0x7d,0x21,0xa0,0xa4,0xb9,0x00,0x04,0x00,0x00] + vpscatterdd %ymm28, 1024(%rcx,%ymm31,4) {%k1} + +// CHECK: vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1} +// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa0,0xac,0xfe,0x7b,0x00,0x00,0x00] + vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1} + +// CHECK: vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1} +// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa0,0xac,0xfe,0x7b,0x00,0x00,0x00] + vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1} + +// CHECK: vpscatterdq %xmm21, 256(%r9,%xmm31) {%k1} +// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa0,0x6c,0x39,0x20] + vpscatterdq %xmm21, 256(%r9,%xmm31) {%k1} + +// CHECK: vpscatterdq %xmm21, 1024(%rcx,%xmm31,4) {%k1} +// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0xa0,0xac,0xb9,0x00,0x04,0x00,0x00] + vpscatterdq %xmm21, 1024(%rcx,%xmm31,4) {%k1} + +// CHECK: vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1} +// CHECK: encoding: [0x62,0x02,0xfd,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00] + vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1} + +// CHECK: vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1} +// CHECK: encoding: [0x62,0x02,0xfd,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00] + vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1} + +// CHECK: vpscatterdq %ymm28, 256(%r9,%xmm31) {%k1} +// CHECK: encoding: [0x62,0x02,0xfd,0x21,0xa0,0x64,0x39,0x20] + vpscatterdq %ymm28, 256(%r9,%xmm31) {%k1} + +// CHECK: vpscatterdq %ymm28, 1024(%rcx,%xmm31,4) {%k1} +// CHECK: encoding: [0x62,0x22,0xfd,0x21,0xa0,0xa4,0xb9,0x00,0x04,0x00,0x00] + vpscatterdq %ymm28, 1024(%rcx,%xmm31,4) {%k1} + +// CHECK: vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1} +// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa1,0xb4,0xfe,0x7b,0x00,0x00,0x00] + vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1} + +// CHECK: vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1} +// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa1,0xb4,0xfe,0x7b,0x00,0x00,0x00] + vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1} + +// CHECK: vpscatterqd %xmm22, 256(%r9,%xmm31) {%k1} +// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa1,0x74,0x39,0x40] + vpscatterqd %xmm22, 256(%r9,%xmm31) {%k1} + +// CHECK: vpscatterqd %xmm22, 1024(%rcx,%xmm31,4) {%k1} +// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0xa1,0xb4,0xb9,0x00,0x04,0x00,0x00] + vpscatterqd %xmm22, 1024(%rcx,%xmm31,4) {%k1} + +// CHECK: vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1} +// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa1,0x84,0xfe,0x7b,0x00,0x00,0x00] + vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1} + +// CHECK: vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1} +// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa1,0x84,0xfe,0x7b,0x00,0x00,0x00] + vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1} + +// CHECK: vpscatterqd %xmm24, 256(%r9,%ymm31) {%k1} +// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa1,0x44,0x39,0x40] + vpscatterqd %xmm24, 256(%r9,%ymm31) {%k1} + +// CHECK: vpscatterqd %xmm24, 1024(%rcx,%ymm31,4) {%k1} +// CHECK: encoding: [0x62,0x22,0x7d,0x21,0xa1,0x84,0xb9,0x00,0x04,0x00,0x00] + vpscatterqd %xmm24, 1024(%rcx,%ymm31,4) {%k1} + +// CHECK: vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1} +// CHECK: encoding: [0x62,0x02,0xfd,0x01,0xa1,0xa4,0xfe,0x7b,0x00,0x00,0x00] + vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1} + +// CHECK: vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1} +// CHECK: encoding: [0x62,0x02,0xfd,0x01,0xa1,0xa4,0xfe,0x7b,0x00,0x00,0x00] + vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1} + +// CHECK: vpscatterqq %xmm28, 256(%r9,%xmm31) {%k1} +// CHECK: encoding: [0x62,0x02,0xfd,0x01,0xa1,0x64,0x39,0x20] + vpscatterqq %xmm28, 256(%r9,%xmm31) {%k1} + +// CHECK: vpscatterqq %xmm28, 1024(%rcx,%xmm31,4) {%k1} +// CHECK: encoding: [0x62,0x22,0xfd,0x01,0xa1,0xa4,0xb9,0x00,0x04,0x00,0x00] + vpscatterqq %xmm28, 1024(%rcx,%xmm31,4) {%k1} + +// CHECK: vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1} +// CHECK: encoding: [0x62,0x82,0xfd,0x21,0xa1,0x9c,0xfe,0x7b,0x00,0x00,0x00] + vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1} + +// CHECK: vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1} +// CHECK: encoding: [0x62,0x82,0xfd,0x21,0xa1,0x9c,0xfe,0x7b,0x00,0x00,0x00] + vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1} + +// CHECK: vpscatterqq %ymm19, 256(%r9,%ymm31) {%k1} +// CHECK: encoding: [0x62,0x82,0xfd,0x21,0xa1,0x5c,0x39,0x20] + vpscatterqq %ymm19, 256(%r9,%ymm31) {%k1} + +// CHECK: vpscatterqq %ymm19, 1024(%rcx,%ymm31,4) {%k1} +// CHECK: encoding: [0x62,0xa2,0xfd,0x21,0xa1,0x9c,0xb9,0x00,0x04,0x00,0x00] + vpscatterqq %ymm19, 1024(%rcx,%ymm31,4) {%k1}