X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrAVX512.td;h=2e8c2c59bebe4bfed4473e64f9603c1cdc5324f0;hb=546178bfe574fa0dde9a76521c81718ae9d7eae3;hp=85bdfa7d7afdc75a3bb5a93e163cf9d3812afc6b;hpb=4a25fbea03a5ff0376820f3c075d92cf31ff5c86;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 85bdfa7d7af..2e8c2c59beb 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -180,21 +180,20 @@ multiclass AVX512_maskable_custom O, Format F, list Pattern, list MaskingPattern, list ZeroMaskingPattern, - string Round = "", string MaskingConstraint = "", InstrItinClass itin = NoItinerary, bit IsCommutable = 0> { let isCommutable = IsCommutable in def NAME: AVX512; // Prefer over VMOV*rrk Pat<> let AddedComplexity = 20 in def NAME#k: AVX512, EVEX_K { // In case of the 3src subclass this is overridden with a let. @@ -202,8 +201,8 @@ multiclass AVX512_maskable_custom O, Format F, } let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> def NAME#kz: AVX512, EVEX_KZ; @@ -217,7 +216,7 @@ multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, - SDNode Select = vselect, string Round = "", + SDNode Select = vselect, string MaskingConstraint = "", InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : @@ -227,7 +226,7 @@ multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, [(set _.RC:$dst, MaskingRHS)], [(set _.RC:$dst, (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], - Round, MaskingConstraint, NoItinerary, IsCommutable>; + MaskingConstraint, NoItinerary, IsCommutable>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the @@ -235,7 +234,7 @@ multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, multiclass AVX512_maskable O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, string Round = "", + dag RHS, InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : AVX512_maskable_common O, Format F, X86VectorVTInfo _, !con((ins _.KRCWM:$mask), Ins), OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (vselect _.KRCWM:$mask, RHS, _.RC:$src0), vselect, - Round, "$src0 = $dst", itin, IsCommutable>; + "$src0 = $dst", itin, IsCommutable>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the scalar instruction. multiclass AVX512_maskable_scalar O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, string Round = "", + dag RHS, InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : AVX512_maskable_common O, Format F, X86VectorVTInfo _, !con((ins _.KRCWM:$mask), Ins), OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (X86select _.KRCWM:$mask, RHS, _.RC:$src0), X86select, - Round, "$src0 = $dst", itin, IsCommutable>; + "$src0 = $dst", itin, IsCommutable>; // Similar to AVX512_maskable but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved @@ -284,7 +283,7 @@ multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, AVX512_maskable_custom; @@ -305,8 +304,8 @@ multiclass AVX512_maskable_custom_cmp O, Format F, Pattern, itin>; def NAME#k: AVX512, EVEX_K; } @@ -335,6 +334,14 @@ multiclass AVX512_maskable_cmp O, Format F, X86VectorVTInfo _, (and _.KRCWM:$mask, RHS), Round, itin>; +multiclass AVX512_maskable_cmp_alt O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm> : + AVX512_maskable_custom_cmp; + // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion let Predicates = [HasAVX512] in { @@ -802,12 +809,8 @@ def : Pat <(v8i64 (X86vzext VK8WM:$mask)), def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))), (VPBROADCASTDrZr GR32:$src)>; -def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))), - (VPBROADCASTDrZrkz VK16WM:$mask, GR32:$src)>; def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))), (VPBROADCASTQrZr GR64:$src)>; -def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))), - (VPBROADCASTQrZrkz VK8WM:$mask, GR64:$src)>; def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))), (VPBROADCASTDrZr GR32:$src)>; @@ -829,24 +832,33 @@ multiclass avx512_int_broadcast_rm opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set DstRC:$dst, (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX; - def krr : AVX5128I, EVEX, EVEX_K; + def rrkz : AVX5128I, - EVEX, EVEX_KZ; + []>, EVEX, EVEX_KZ; let mayLoad = 1 in { def rm : AVX5128I, EVEX; - def krm : AVX5128I, EVEX, EVEX_K; + def rmkz : AVX5128I, EVEX, EVEX_KZ; + [(set DstRC:$dst, (OpVT (vselect KRC:$mask, + (X86VBroadcast (ld_frag addr:$src)), + (OpVT (bitconvert (v16i32 immAllZerosV))))))]>, EVEX, EVEX_KZ; } } @@ -857,28 +869,71 @@ defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -multiclass avx512_int_subvec_broadcast_rm opc, string OpcodeStr, - X86MemOperand x86memop, PatFrag ld_frag, - RegisterClass KRC> { +multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, + X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { let mayLoad = 1 in { - def rm : AVX5128I, EVEX; - def krm : AVX5128I, EVEX; + def rmk : AVX5128I, EVEX, EVEX_K; + def rmkz : AVX5128I, EVEX, EVEX_KZ; } } -defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", - i128mem, loadv2i64, VK16WM>, +defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", + v16i32_info, v4i32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT4>; -defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", - i256mem, loadv4i64, VK16WM>, VEX_W, +defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", + v16f32_info, v4f32x_info>, + EVEX_V512, EVEX_CD8<32, CD8VT4>; +defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", + v8i64_info, v4i64x_info>, VEX_W, + EVEX_V512, EVEX_CD8<64, CD8VT4>; +defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", + v8f64_info, v4f64x_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; +let Predicates = [HasVLX] in { +defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", + v8i32x_info, v4i32x_info>, + EVEX_V256, EVEX_CD8<32, CD8VT4>; +defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", + v8f32x_info, v4f32x_info>, + EVEX_V256, EVEX_CD8<32, CD8VT4>; +} +let Predicates = [HasVLX, HasDQI] in { +defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2", + v4i64x_info, v2i64x_info>, VEX_W, + EVEX_V256, EVEX_CD8<64, CD8VT2>; +defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2", + v4f64x_info, v2f64x_info>, VEX_W, + EVEX_V256, EVEX_CD8<64, CD8VT2>; +} +let Predicates = [HasDQI] in { +defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2", + v8i64_info, v2i64x_info>, VEX_W, + EVEX_V512, EVEX_CD8<64, CD8VT2>; +defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti32x8", + v16i32_info, v8i32x_info>, + EVEX_V512, EVEX_CD8<32, CD8VT8>; +defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2", + v8f64_info, v2f64x_info>, VEX_W, + EVEX_V512, EVEX_CD8<64, CD8VT2>; +defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8", + v16f32_info, v8f32x_info>, + EVEX_V512, EVEX_CD8<32, CD8VT8>; +} + def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))), (VPBROADCASTDZrr VR128X:$src)>; def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))), @@ -886,13 +941,23 @@ def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))), def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; +def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), + (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>; + def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; +def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), + (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>; def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))), (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; +def : Pat<(v16i32 (X86VBroadcast (v8i32 VR256X:$src))), + (VPBROADCASTDZrr (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm))>; + def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))), (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; +def : Pat<(v8i64 (X86VBroadcast (v4i64 VR256X:$src))), + (VPBROADCASTQZrr (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm))>; def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))), (VBROADCASTSSZr VR128X:$src)>; @@ -907,12 +972,6 @@ def : Pat<(v8f64 (X86VBroadcast FR64X:$src)), (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; -let Predicates = [HasAVX512] in { -def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))), - (EXTRACT_SUBREG - (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - addr:$src)), sub_ymm)>; -} //===----------------------------------------------------------------------===// // AVX-512 BROADCAST MASK TO VECTOR REGISTER //--- @@ -988,12 +1047,6 @@ multiclass avx512_permil OpcImm, bits<8> OpcVar, X86VectorVTInfo _, EVEX_4V; } } - -defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>, - EVEX_V512, VEX_W; -defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>, - EVEX_V512, VEX_W; - defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>, EVEX_V512; defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>, @@ -1004,150 +1057,88 @@ def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), (VPERMILPDZri VR512:$src1, imm:$imm)>; -// -- VPERM - register form -- -multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { - - def rr : AVX5128I, EVEX_4V; - - def rm : AVX5128I, - EVEX_4V; -} - -defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, loadv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, loadv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedSingle in -defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, loadv16f32, f512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, loadv8f64, f512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - // -- VPERM2I - 3 source operands form -- -multiclass avx512_perm_3src opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, - SDNode OpNode, ValueType OpVT, RegisterClass KRC> { +multiclass avx512_perm_3src opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _> { let Constraints = "$src1 = $dst" in { - def rr : AVX5128I, - EVEX_4V; - - def rrk : AVX5128I, - EVEX_4V, EVEX_K; - - let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> - def rrkz : AVX5128I, - EVEX_4V, EVEX_KZ; + defm rr: AVX512_maskable_3src, EVEX_4V, + AVX5128IBase; - def rm : AVX5128I, EVEX_4V; + let mayLoad = 1 in + defm rm: AVX512_maskable_3src, + EVEX_4V, AVX5128IBase; + } +} +multiclass avx512_perm_3src_mb opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _> { + let mayLoad = 1, Constraints = "$src1 = $dst" in + defm rmb: AVX512_maskable_3src, + AVX5128IBase, EVEX_4V, EVEX_B; +} - def rmk : AVX5128I, - EVEX_4V, EVEX_K; - - let AddedComplexity = 10 in // Prefer over the rrkz variant - def rmkz : AVX5128I, - EVEX_4V, EVEX_KZ; +multiclass avx512_perm_3src_sizes opc, string OpcodeStr, + SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasAVX512] in + defm NAME: avx512_perm_3src, + avx512_perm_3src_mb, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#128: avx512_perm_3src, + avx512_perm_3src_mb, + EVEX_V128; + defm NAME#256: avx512_perm_3src, + avx512_perm_3src_mb, + EVEX_V256; + } +} +multiclass avx512_perm_3src_sizes_w opc, string OpcodeStr, + SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasBWI] in + defm NAME: avx512_perm_3src, + avx512_perm_3src_mb, + EVEX_V512; + let Predicates = [HasBWI, HasVLX] in { + defm NAME#128: avx512_perm_3src, + avx512_perm_3src_mb, + EVEX_V128; + defm NAME#256: avx512_perm_3src, + avx512_perm_3src_mb, + EVEX_V256; } } -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, loadv16i32, - i512mem, X86VPermiv3, v16i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, loadv8i64, - i512mem, X86VPermiv3, v8i64, VK8WM>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, loadv16f32, - i512mem, X86VPermiv3, v16f32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, loadv8f64, - i512mem, X86VPermiv3, v8f64, VK8WM>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_perm_table_3src opc, string Suffix, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, - SDNode OpNode, ValueType OpVT, RegisterClass KRC, - ValueType MaskVT, RegisterClass MRC> : - avx512_perm_3src { - def : Pat<(OpVT (!cast("int_x86_avx512_mask_vpermt_"##Suffix##"_512") - VR512:$idx, VR512:$src1, VR512:$src2, -1)), - (!cast(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>; - - def : Pat<(OpVT (!cast("int_x86_avx512_mask_vpermt_"##Suffix##"_512") - VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)), - (!cast(NAME#rrk) VR512:$src1, - (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>; -} - -defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, loadv16i32, i512mem, - X86VPermv3, v16i32, VK16WM, v16i1, GR16>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, loadv8i64, i512mem, - X86VPermv3, v8i64, VK8WM, v8i1, GR8>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, loadv16f32, i512mem, - X86VPermv3, v16f32, VK16WM, v16i1, GR16>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, loadv8f64, i512mem, - X86VPermv3, v8f64, VK8WM, v8i1, GR8>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2D : avx512_perm_3src_sizes<0x76, "vpermi2d", X86VPermiv3, + avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_3src_sizes<0x76, "vpermi2q", X86VPermiv3, + avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2PS : avx512_perm_3src_sizes<0x77, "vpermi2ps", X86VPermiv3, + avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_3src_sizes<0x77, "vpermi2pd", X86VPermiv3, + avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPERMT2D : avx512_perm_3src_sizes<0x7E, "vpermt2d", X86VPermv3, + avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2Q : avx512_perm_3src_sizes<0x7E, "vpermt2q", X86VPermv3, + avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMT2PS : avx512_perm_3src_sizes<0x7F, "vpermt2ps", X86VPermv3, + avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2PD : avx512_perm_3src_sizes<0x7F, "vpermt2pd", X86VPermv3, + avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPERMT2W : avx512_perm_3src_sizes_w<0x7D, "vpermt2w", X86VPermv3, + avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMI2W : avx512_perm_3src_sizes_w<0x75, "vpermi2w", X86VPermiv3, + avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask @@ -1591,53 +1582,97 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info, defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; -// avx512_cmp_packed - compare packed instructions -multiclass avx512_cmp_packed { - def rri : AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>; - let hasSideEffects = 0 in - def rrib: AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", suffix, - "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"), - [], d>, EVEX_B; - def rmi : AVX512PIi8<0xC2, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"), - [(set KRC:$dst, - (X86cmpm (vt RC:$src1), (load addr:$src2), imm:$cc))], d>; +multiclass avx512_vcmp_common { + + defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (X86cmpm (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)>; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (X86cmpm (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)>; + + defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr, + (X86cmpm (_.VT _.RC:$src1), + (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), + imm:$cc)>,EVEX_B; + } // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { - def rri_alt : AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), - !strconcat("vcmp", suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; - def rrib_alt: AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), - !strconcat("vcmp", suffix, - "\t{{sae}, $cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc, {sae}}"), - [], d>, EVEX_B; - let mayLoad = 1 in - def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), - !strconcat("vcmp", suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; + defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">; + + let mayLoad = 1 in { + defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">; + + defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, ${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B; + } + } +} + +multiclass avx512_vcmp_sae { + // comparison code form (VCMP[EQ/LT/LE/...] + defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "{sae}, $src2, $src1", "$src1, $src2,{sae}", + (X86cmpmRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))>, EVEX_B; + + let isAsmParserOnly = 1, hasSideEffects = 0 in { + defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc,{sae}, $src2, $src1", + "$src1, $src2,{sae}, $cc">, EVEX_B; + } +} + +multiclass avx512_vcmp { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcmp_common<_.info512>, + avx512_vcmp_sae<_.info512>, EVEX_V512; + + } + let Predicates = [HasAVX512,HasVLX] in { + defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128; + defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256; } } -defm VCMPPSZ : avx512_cmp_packed, PS, EVEX_4V, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VCMPPDZ : avx512_cmp_packed, PD, EVEX_4V, VEX_W, EVEX_V512, - EVEX_CD8<64, CD8VF>; +defm VCMPPD : avx512_vcmp, + AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VCMPPS : avx512_vcmp, + AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), (COPY_TO_REGCLASS (VCMPPSZrri @@ -1655,30 +1690,7 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), imm:$cc), VK8)>; -def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1), - FROUND_NO_EXC)), - (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR16)>; - -def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1), - FROUND_NO_EXC)), - (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR8)>; - -def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1), - FROUND_CURRENT)), - (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR16)>; - -def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1), - FROUND_CURRENT)), - (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR8)>; - +//----------------------------------------------------------------- // Mask register copy, including // - copy between mask registers // - load/store mask registers @@ -1775,7 +1787,9 @@ let Predicates = [HasAVX512] in { def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst), (KMOVWmk addr:$dst, VK16:$src)>; def : Pat<(i1 (load addr:$src)), - (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>; + (COPY_TO_REGCLASS (AND16ri (i16 (SUBREG_TO_REG (i32 0), + (MOV8rm addr:$src), sub_8bit)), + (i16 1)), VK1)>; def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))), (KMOVWkm addr:$src)>; } @@ -1811,6 +1825,8 @@ let Predicates = [HasAVX512] in { def : Pat<(i32 (zext VK1:$src)), (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>; + def : Pat<(i32 (anyext VK1:$src)), + (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>; def : Pat<(i8 (zext VK1:$src)), (EXTRACT_SUBREG (AND32ri (KMOVWrk @@ -1836,17 +1852,18 @@ let Predicates = [HasBWI] in { // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. -let Predicates = [HasAVX512] in { +let Predicates = [HasAVX512, NoDQI] in { // GR from/to 8-bit mask without native support def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), (COPY_TO_REGCLASS - (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), - VK8)>; + (KMOVWkr (MOVZX32rr8 GR8 :$src)), VK8)>; def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), sub_8bit)>; +} +let Predicates = [HasAVX512] in { def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))), (COPY_TO_REGCLASS VK16:$src, VK1)>; def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))), @@ -1996,11 +2013,11 @@ defm : avx512_binop_pat; def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)), (KXNORWrr VK16:$src1, VK16:$src2)>; def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)), - (KXNORBrr VK8:$src1, VK8:$src2)>; + (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>; def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)), - (KXNORDrr VK32:$src1, VK32:$src2)>; + (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>; def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)), - (KXNORQrr VK64:$src1, VK64:$src2)>; + (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>; let Predicates = [NoDQI] in def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)), @@ -2128,8 +2145,8 @@ let Predicates = [HasAVX512] in { def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>; - def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>; - def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>; + def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>; + def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>; } def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))), (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>; @@ -2140,6 +2157,12 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; +def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))), + (v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>; + +def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))), + (v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>; + let Predicates = [HasVLX] in { def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))), (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>; @@ -2934,7 +2957,7 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2)), - "", itins.rr, IsCommutable>, + itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V; let mayLoad = 1 in @@ -2943,7 +2966,7 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))), - "", itins.rm>, + itins.rm>, AVX512BIBase, EVEX_4V; } @@ -2959,7 +2982,7 @@ multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, (_.VT (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), - "", itins.rm>, + itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B; } @@ -3061,7 +3084,7 @@ multiclass avx512_binop_rm2 opc, string OpcodeStr, OpndItins itins, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (_Src.VT _Src.RC:$src2))), - "",itins.rr, IsCommutable>, + itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V; let mayLoad = 1 in { defm rm : AVX512_maskable opc, string OpcodeStr, OpndItins itins, "$src2, $src1", "$src1, $src2", (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Src.LdFrag addr:$src2)))), - "", itins.rm>, + itins.rm>, AVX512BIBase, EVEX_4V; defm rmb : AVX512_maskable opc, string OpcodeStr, OpndItins itins, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Dst.VT (X86VBroadcast (_Dst.ScalarLdFrag addr:$src2)))))), - "", itins.rm>, + itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B; } } @@ -3089,13 +3112,22 @@ defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, SSE_INTALU_ITINS_P, 1>; defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, SSE_INTALU_ITINS_P, 0>; +defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, + SSE_INTALU_ITINS_P, HasBWI, 0>; +defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, + SSE_INTALU_ITINS_P, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, SSE_INTALU_ITINS_P, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; - +defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, + SSE_INTALU_ITINS_P, HasBWI, 1>; multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, SDNode OpNode, bit IsCommutable = 0> { @@ -3118,6 +3150,76 @@ defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, X86pmuludq, 1>; +multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _Src, X86VectorVTInfo _Dst> { + let mayLoad = 1 in { + defm rmb : AVX512_maskable, + EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>; + } +} + +multiclass avx512_packs_rm opc, string OpcodeStr, + SDNode OpNode,X86VectorVTInfo _Src, + X86VectorVTInfo _Dst> { + defm rr : AVX512_maskable, + EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V; + let mayLoad = 1 in { + defm rm : AVX512_maskable, + EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>; + } +} + +multiclass avx512_packs_all_i32_i16 opc, string OpcodeStr, + SDNode OpNode> { + defm NAME#Z : avx512_packs_rm, + avx512_packs_rmb, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_packs_rm, + avx512_packs_rmb, EVEX_V256; + defm NAME#Z128 : avx512_packs_rm, + avx512_packs_rmb, EVEX_V128; + } +} +multiclass avx512_packs_all_i16_i8 opc, string OpcodeStr, + SDNode OpNode> { + defm NAME#Z : avx512_packs_rm, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_packs_rm, EVEX_V256; + defm NAME#Z128 : avx512_packs_rm, EVEX_V128; + } +} +let Predicates = [HasBWI] in { + defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD; + defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD; + defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W; + defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W; +} + defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax, @@ -3146,30 +3248,6 @@ defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin, defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMAXSDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMAXUDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMAXSQZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMAXUQZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMINSDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMINUDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMINSQZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMINUQZrr VR512:$src1, VR512:$src2)>; //===----------------------------------------------------------------------===// // AVX-512 - Unpack Instructions //===----------------------------------------------------------------------===// @@ -3232,32 +3310,6 @@ defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, VR512, loadv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// -// AVX-512 - PSHUFD -// - -multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT> { - def ri : AVX512Ii8, - EVEX; - def mi : AVX512Ii8, EVEX; -} - -defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, loadv16i32, - i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - -//===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// @@ -3282,7 +3334,7 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, "$src2, $src1", "$src1, $src2", (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 FROUND_CURRENT)), - "", itins.rr, IsCommutable>; + itins.rr, IsCommutable>; defm rm_Int : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, (VecNode (_.VT _.RC:$src1), (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT)), - "", itins.rm, IsCommutable>; + itins.rm, IsCommutable>; let isCodeGenOnly = 1, isCommutable = IsCommutable, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), @@ -3313,7 +3365,7 @@ multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, "$rc, $src2, $src1", "$src1, $src2, $rc", (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$rc)), "", itins.rr, IsCommutable>, + (i32 imm:$rc)), itins.rr, IsCommutable>, EVEX_B, EVEX_RC; } multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, @@ -3321,9 +3373,9 @@ multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, defm rrb : AVX512_maskable_scalar, EVEX_B; + (i32 FROUND_NO_EXC))>, EVEX_B; } multiclass avx512_binop_s_round opc, string OpcodeStr, SDNode OpNode, @@ -3384,7 +3436,7 @@ multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fp_round_packed opc, string OpcodeStr, SDNode OpNodeRnd, - X86VectorVTInfo _, bit IsCommutable> { + X86VectorVTInfo _> { defm rb: AVX512_maskable opc, string OpcodeStr, SDNode OpNodeRn EVEX_4V, EVEX_B, EVEX_RC; } + +multiclass avx512_fp_sae_packed opc, string OpcodeStr, SDNode OpNodeRnd, + X86VectorVTInfo _> { + defm rb: AVX512_maskable, + EVEX_4V, EVEX_B; +} + multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, bit IsCommutable = 0> { defm PSZ : avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fp_binop_p_round opc, string OpcodeStr, SDNode OpNodeRnd> { - defm PSZ : avx512_fp_round_packed, + defm PSZ : avx512_fp_round_packed, + EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_round_packed, + EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; +} + +multiclass avx512_fp_binop_p_sae opc, string OpcodeStr, SDNode OpNodeRnd> { + defm PSZ : avx512_fp_sae_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_round_packed, + defm PDZ : avx512_fp_sae_packed, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } @@ -3433,33 +3502,59 @@ defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>; defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>; -defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>; -defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>; +defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>, + avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>; +defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>, + avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>; let Predicates = [HasDQI] in { defm VAND : avx512_fp_binop_p<0x54, "vand", X86fand, 1>; defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, 0>; defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, 1>; defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>; } -def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)), - (i16 -1), FROUND_CURRENT)), - (VMAXPSZrr VR512:$src1, VR512:$src2)>; - -def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)), - (i8 -1), FROUND_CURRENT)), - (VMAXPDZrr VR512:$src1, VR512:$src2)>; - -def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)), - (i16 -1), FROUND_CURRENT)), - (VMINPSZrr VR512:$src1, VR512:$src2)>; - -def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)), - (i8 -1), FROUND_CURRENT)), - (VMINPDZrr VR512:$src1, VR512:$src2)>; + +multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr: AVX512_maskable, EVEX_4V; + let mayLoad = 1 in { + defm rm: AVX512_maskable, EVEX_4V; + defm rmb: AVX512_maskable, + EVEX_4V, EVEX_B; + }//let mayLoad = 1 +} + +multiclass avx512_fp_scalef_all opc, string OpcodeStr, SDNode OpNode> { + defm PSZ : avx512_fp_scalef_p, + avx512_fp_round_packed, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_scalef_p, + avx512_fp_round_packed, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp_scalef_p, + EVEX_V128, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp_scalef_p, + EVEX_V256, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp_scalef_p, + EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp_scalef_p, + EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; + } +} +defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD; + //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions //===----------------------------------------------------------------------===// @@ -3559,14 +3654,14 @@ multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, (ins _.RC:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))), - " ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V; + SSE_INTSHIFT_ITINS_P.rr>; let mayLoad = 1 in defm mi : AVX512_maskable, AVX512BIi8Base, EVEX_4V; + SSE_INTSHIFT_ITINS_P.rm>; } multiclass avx512_shift_rmbi opc, Format ImmFormM, @@ -3576,7 +3671,7 @@ multiclass avx512_shift_rmbi opc, Format ImmFormM, (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))), - " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V, EVEX_B; + SSE_INTSHIFT_ITINS_P.rm>, EVEX_B; } multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, @@ -3586,12 +3681,12 @@ multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, VR128X:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))), - " ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V; + SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V; defm rm : AVX512_maskable, AVX512BIBase, + SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, EVEX_4V; } @@ -3666,16 +3761,16 @@ multiclass avx512_shift_rmi_dq opcd, bits<8> opcq, } defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>, - avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>; + avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>, - avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>; + avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V; -defm VPSRA : avx512_shift_rmi_dq<0x72, 0x73, MRM4r, MRM4m, "vpsra", X86vsrai>, - avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>; +defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>, + avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V; -defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>; -defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>; +defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V; +defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>; defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>; @@ -3690,13 +3785,14 @@ multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))), - " ", SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V; + SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V; let mayLoad = 1 in defm rm : AVX512_maskable, AVX5128IBase, EVEX_4V, + (_.VT (OpNode _.RC:$src1, + (_.VT (bitconvert (_.LdFrag addr:$src2))))), + SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; } @@ -3709,7 +3805,7 @@ multiclass avx512_var_shift_mb opc, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2))))), - " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B, + SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; } multiclass avx512_var_shift_sizes opc, string OpcodeStr, SDNode OpNode, @@ -3757,6 +3853,78 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>, defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>; +//===-------------------------------------------------------------------===// +// 1-src variable permutation VPERMW/D/Q +//===-------------------------------------------------------------------===// +multiclass avx512_vperm_dq_sizes opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in + defm Z : avx512_var_shift, + avx512_var_shift_mb, EVEX_V512; + + let Predicates = [HasAVX512, HasVLX] in + defm Z256 : avx512_var_shift, + avx512_var_shift_mb, EVEX_V256; +} + +multiclass avx512_vpermi_dq_sizes opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasAVX512] in + defm Z: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in + defm Z256: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V256; +} + + +defm VPERM : avx512_var_shift_w<0x8D, "vpermw", X86VPermv>; + +defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, + avx512vl_i32_info>; +defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, + avx512vl_i64_info>, VEX_W; +defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, + avx512vl_f32_info>; +defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, + avx512vl_f64_info>, VEX_W; + +defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", + X86VPermi, avx512vl_i64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; +defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", + X86VPermi, avx512vl_f64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; + +//===----------------------------------------------------------------------===// +// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW +//===----------------------------------------------------------------------===// + +defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", + X86PShufd, avx512vl_i32_info>, + EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; +defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", + X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W; +defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", + X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W; + +multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode> { + let Predicates = [HasBWI] in + defm Z: avx512_var_shift, EVEX_V512; + + let Predicates = [HasVLX, HasBWI] in { + defm Z256: avx512_var_shift, EVEX_V256; + defm Z128: avx512_var_shift, EVEX_V128; + } +} + +defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>; + //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// @@ -3837,147 +4005,203 @@ let Predicates = [HasAVX512] in { // let Constraints = "$src1 = $dst" in { -// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3p_rm opc, string OpcodeStr, X86VectorVTInfo _, - SDPatternOperator OpNode = null_frag> { +multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { defm r: AVX512_maskable_3src, AVX512FMA3Base; - let mayLoad = 1 in - defm m: AVX512_maskable_3src, AVX512FMA3Base; - defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B; - } -} // Constraints = "$src1 = $dst" + } +} -let Constraints = "$src1 = $dst" in { -// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3_round_rrb opc, string OpcodeStr, - X86VectorVTInfo _, - SDPatternOperator OpNode> { - defm rb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC; - } +} } // Constraints = "$src1 = $dst" -multiclass avx512_fma3_round_forms opc213, string OpcodeStr, - X86VectorVTInfo VTI, SDPatternOperator OpNode> { - defm v213r : avx512_fma3_round_rrb, EVEX_CD8; +multiclass avx512_fma3p_213_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_213_rm, + avx512_fma3_213_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_213_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_213_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; + } } -multiclass avx512_fma3p_forms opc213, bits<8> opc231, - string OpcodeStr, X86VectorVTInfo VTI, - SDPatternOperator OpNode> { - defm v213r : avx512_fma3p_rm, EVEX_CD8; - defm v231r : avx512_fma3p_rm, EVEX_CD8; +multiclass avx512_fma3p_213_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_213_common; + defm PD : avx512_fma3p_213_common, VEX_W; } -multiclass avx512_fma3p opc213, bits<8> opc231, - string OpcodeStr, - SDPatternOperator OpNode, - SDPatternOperator OpNodeRnd> { -let ExeDomain = SSEPackedSingle in { - defm NAME##PSZ : avx512_fma3p_forms, - avx512_fma3_round_forms, EVEX_V512; - defm NAME##PSZ256 : avx512_fma3p_forms, EVEX_V256; - defm NAME##PSZ128 : avx512_fma3p_forms, EVEX_V128; +defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; +defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; + + +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_maskable_3src, + AVX512FMA3Base; + + let mayLoad = 1 in { + defm m: AVX512_maskable_3src, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B; } -let ExeDomain = SSEPackedDouble in { - defm NAME##PDZ : avx512_fma3p_forms, - avx512_fma3_round_forms, EVEX_V512, VEX_W; - defm NAME##PDZ256 : avx512_fma3p_forms, - EVEX_V256, VEX_W; - defm NAME##PDZ128 : avx512_fma3p_forms, - EVEX_V128, VEX_W; +} + +multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B, EVEX_RC; +} +} // Constraints = "$src1 = $dst" + +multiclass avx512_fma3p_231_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_231_rm, + avx512_fma3_231_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_231_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_231_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; + } +} + +multiclass avx512_fma3p_231_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_231_common; + defm PD : avx512_fma3p_231_common, VEX_W; } -defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>; -defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>; -defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>; -defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>; -defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; -defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; +defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; +defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_m132 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; +multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_maskable_3src, + AVX512FMA3Base; + + let mayLoad = 1 in { + defm m: AVX512_maskable_3src, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B; + } } -} // Constraints = "$src1 = $dst" -multiclass avx512_fma3p_m132_f opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B, EVEX_RC; +} +} // Constraints = "$src1 = $dst" -let ExeDomain = SSEPackedSingle in { - defm NAME##PSZ : avx512_fma3p_m132, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm NAME##PSZ256 : avx512_fma3p_m132, EVEX_V256, - EVEX_CD8<32, CD8VF>; - defm NAME##PSZ128 : avx512_fma3p_m132, EVEX_V128, - EVEX_CD8<32, CD8VF>; +multiclass avx512_fma3p_132_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_132_rm, + avx512_fma3_132_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } -let ExeDomain = SSEPackedDouble in { - defm NAME##PDZ : avx512_fma3p_m132, EVEX_V512, - VEX_W, EVEX_CD8<32, CD8VF>; - defm NAME##PDZ256 : avx512_fma3p_m132, EVEX_V256, - VEX_W, EVEX_CD8<32, CD8VF>; - defm NAME##PDZ128 : avx512_fma3p_m132, EVEX_V128, - VEX_W, EVEX_CD8<32, CD8VF>; + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_132_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_132_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } -defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>; -defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>; -defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>; -defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>; -defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>; -defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>; +multiclass avx512_fma3p_132_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_132_common; + defm PD : avx512_fma3p_132_common, VEX_W; +} + +defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; +defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; // Scalar FMA let Constraints = "$src1 = $dst" in { @@ -4024,29 +4248,72 @@ defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X, // AVX-512 Scalar convert from sign integer to float/double //===----------------------------------------------------------------------===// -multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, - X86MemOperand x86memop, string asm> { -let hasSideEffects = 0 in { - def rr : SI opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + let hasSideEffects = 0 in { + def rr : SI, EVEX_4V; - let mayLoad = 1 in - def rm : SI, EVEX_4V; -} // hasSideEffects = 0 + } // hasSideEffects = 0 + let isCodeGenOnly = 1 in { + def rr_Int : SI, EVEX_4V; + + def rm_Int : SI, EVEX_4V; + }//isCodeGenOnly = 1 +} + +multiclass avx512_vcvtsi_round opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, string asm> { + def rrb_Int : SI, EVEX_4V, EVEX_B, EVEX_RC; +} + +multiclass avx512_vcvtsi_common opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + defm NAME : avx512_vcvtsi_round, + avx512_vcvtsi, + VEX_LIG; } let Predicates = [HasAVX512] in { -defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">, - XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">, - XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">, - XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">, - XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, + v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">, + XS, EVEX_CD8<32, CD8VT1>; +defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, + v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">, + XS, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, + v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">, + XD, EVEX_CD8<32, CD8VT1>; +defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, + v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -4066,14 +4333,18 @@ def : Pat<(f64 (sint_to_fp GR32:$src)), def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; -defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">, - XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">, - XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">, +defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR32, + v4f32x_info, i32mem, loadi32, + "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64, + v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, + XS, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86SuintToFpRnd, GR32, v2f64x_info, + i32mem, loadi32, "cvtusi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">, - XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64, + v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -4154,18 +4425,9 @@ let isCodeGenOnly = 1 in { int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; - defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V; - defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}", SSE_CVT_Scalar, 0>, XD, EVEX_4V; - defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}", - SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; } // isCodeGenOnly = 1 // Convert float/double to signed/unsigned int 32/64 with truncation @@ -4667,9 +4929,9 @@ multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, defm rb : AVX512_maskable_scalar, EVEX_B; + (i32 FROUND_NO_EXC))>, EVEX_B; defm m : AVX512_maskable_scalar opc, string OpcodeStr, X86VectorVTInfo _, (ins _.RC:$src), OpcodeStr, "$src", "$src", (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>; - defm rb : AVX512_maskable, EVEX_B; - defm m : AVX512_maskable opc, string OpcodeStr, X86VectorVTInfo _, (i32 FROUND_CURRENT))>; defm mb : AVX512_maskable, EVEX_B; } +multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, + SDNode OpNode> { + defm rb : AVX512_maskable, EVEX_B; +} multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode> { defm PS : avx512_fp28_p, - EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round, + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PD : avx512_fp28_p, - VEX_W, EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round, + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } +multiclass avx512_fp_unaryop_packed opc, string OpcodeStr, + SDNode OpNode> { + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp28_p, + EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp28_p, + EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp28_p, + EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp28_p, + EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + } +} let Predicates = [HasERI], hasSideEffects = 0 in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX; +} +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>, + avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX; + +multiclass avx512_sqrt_packed_round opc, string OpcodeStr, + SDNode OpNodeRnd, X86VectorVTInfo _>{ + defm rb: AVX512_maskable, + EVEX, EVEX_B, EVEX_RC; } multiclass avx512_sqrt_packed opc, string OpcodeStr, @@ -4838,20 +5128,22 @@ multiclass avx512_sqrt_packed_all opc, string OpcodeStr, } } -defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>; +multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr, + SDNode OpNodeRnd> { + defm PSZ : avx512_sqrt_packed_round, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_sqrt_packed_round, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; +} + +defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, + avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>; defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, SSE_SQRTSS, SSE_SQRTSD>; let Predicates = [HasAVX512] in { - def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), - (VSQRTPSZr VR512:$src1)>; - def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), - (VSQRTPDZr VR512:$src1)>; - def : Pat<(f32 (fsqrt FR32X:$src)), (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -4943,9 +5235,9 @@ avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { defm rb : AVX512_maskable_scalar, EVEX_B; + (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B; let mayLoad = 1 in defm m : AVX512_maskable_scalar; -multiclass avx512_extend opc, string OpcodeStr, RegisterClass KRC, - RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode, - PatFrag mem_frag, X86MemOperand x86memop, - ValueType OpVT, ValueType InVT> { +multiclass avx512_extend_common opc, string OpcodeStr, + X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, + X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ - def rr : AVX5128I, EVEX; + defm rr : AVX512_maskable, + EVEX; - def rrk : AVX5128I, EVEX, EVEX_K; + let mayLoad = 1 in { + defm rm : AVX512_maskable, + EVEX; + } +} - def rrkz : AVX5128I, EVEX, EVEX_KZ; +multiclass avx512_extend_BW opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + let Predicates = [HasVLX, HasBWI] in { + defm Z128: avx512_extend_common, + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128; - let mayLoad = 1 in { - def rm : AVX5128I, - EVEX; - - def rmk : AVX5128I, - EVEX, EVEX_K; - - def rmkz : AVX5128I, - EVEX, EVEX_KZ; + defm Z256: avx512_extend_common, + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256; + } + let Predicates = [HasBWI] in { + defm Z : avx512_extend_common, + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_BD opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_BQ opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_WD opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_WQ opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_DQ opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi32")> { + + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; } } -defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext, - loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VQ>; -defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext, - loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VO>; -defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext, - loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512, - EVEX_CD8<16, CD8VH>; -defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext, - loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512, - EVEX_CD8<16, CD8VQ>; -defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext, - loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512, - EVEX_CD8<32, CD8VH>; - -defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext, - loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VQ>; -defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext, - loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VO>; -defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext, - loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512, - EVEX_CD8<16, CD8VH>; -defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext, - loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512, - EVEX_CD8<16, CD8VQ>; -defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext, - loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512, - EVEX_CD8<32, CD8VH>; +defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">; +defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">; +defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">; +defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">; +defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">; +defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">; + + +defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">; +defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">; +defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">; +defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">; +defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">; +defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">; //===----------------------------------------------------------------------===// // GATHER - SCATTER Operations multiclass avx512_gather opc, string OpcodeStr, X86VectorVTInfo _, X86MemOperand memop, PatFrag GatherNode> { - let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in + let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", + ExeDomain = _.ExeDomain in def rm : AVX5128I opc, string OpcodeStr, X86VectorVTInfo _, EVEX_CD8<_.EltSize, CD8VT1>; } -let ExeDomain = SSEPackedDouble in { -defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem, - mgatherv8i32>, EVEX_V512, VEX_W; -defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem, - mgatherv8i64>, EVEX_V512, VEX_W; +multiclass avx512_gather_q_pd dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_gather, EVEX_V512, VEX_W; + defm NAME##Q##SUFF##Z: avx512_gather, EVEX_V512, VEX_W; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_gather, EVEX_V256, VEX_W; + defm NAME##Q##SUFF##Z256: avx512_gather, EVEX_V256, VEX_W; + defm NAME##D##SUFF##Z128: avx512_gather, EVEX_V128, VEX_W; + defm NAME##Q##SUFF##Z128: avx512_gather, EVEX_V128, VEX_W; +} +} + +multiclass avx512_gather_d_ps dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_gather, EVEX_V512; + defm NAME##Q##SUFF##Z: avx512_gather, EVEX_V512; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_gather, EVEX_V256; + defm NAME##Q##SUFF##Z256: avx512_gather, EVEX_V256; + defm NAME##D##SUFF##Z128: avx512_gather, EVEX_V128; + defm NAME##Q##SUFF##Z128: avx512_gather, EVEX_V128; } - -let ExeDomain = SSEPackedSingle in { -defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem, - mgatherv16i32>, EVEX_V512; -defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem, - mgatherv8i64>, EVEX_V512; } -defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem, - mgatherv8i32>, EVEX_V512, VEX_W; -defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem, - mgatherv16i32>, EVEX_V512; -defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem, - mgatherv8i64>, EVEX_V512, VEX_W; -defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem, - mgatherv8i64>, EVEX_V512; +defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, + avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; + +defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, + avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; multiclass avx512_scatter opc, string OpcodeStr, X86VectorVTInfo _, X86MemOperand memop, PatFrag ScatterNode> { -let mayStore = 1, Constraints = "$mask = $mask_wb" in +let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in def mr : AVX5128I, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; } -let ExeDomain = SSEPackedDouble in { -defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem, - mscatterv8i32>, EVEX_V512, VEX_W; -defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem, - mscatterv8i64>, EVEX_V512, VEX_W; +multiclass avx512_scatter_q_pd dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_scatter, EVEX_V512, VEX_W; + defm NAME##Q##SUFF##Z: avx512_scatter, EVEX_V512, VEX_W; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_scatter, EVEX_V256, VEX_W; + defm NAME##Q##SUFF##Z256: avx512_scatter, EVEX_V256, VEX_W; + defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128, VEX_W; + defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128, VEX_W; +} +} + +multiclass avx512_scatter_d_ps dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_scatter, EVEX_V512; + defm NAME##Q##SUFF##Z: avx512_scatter, EVEX_V512; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_scatter, EVEX_V256; + defm NAME##Q##SUFF##Z256: avx512_scatter, EVEX_V256; + defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128; + defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128; } - -let ExeDomain = SSEPackedSingle in { -defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem, - mscatterv16i32>, EVEX_V512; -defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem, - mscatterv8i64>, EVEX_V512; } -defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem, - mscatterv8i32>, EVEX_V512, VEX_W; -defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem, - mscatterv16i32>, EVEX_V512; +defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, + avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; -defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem, - mscatterv8i64>, EVEX_V512, VEX_W; -defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem, - mscatterv8i64>, EVEX_V512; +defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, + avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; // prefetch multiclass avx512_gather_scatter_prefetch opc, Format F, string OpcodeStr, @@ -5347,105 +5744,10 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, (loadv8i64 addr:$src2), (i8 imm:$imm))), (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; -multiclass avx512_valign { - defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), - "valign"##_.Suffix, - "$src3, $src2, $src1", "$src1, $src2, $src3", - (_.VT (X86VAlign _.RC:$src2, _.RC:$src1, - (i8 imm:$src3)))>, - AVX512AIi8Base, EVEX_4V; - - // Also match valign of packed floats. - def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))), - (!cast(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>; - - let mayLoad = 1 in - def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst), - (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), - !strconcat("valign"##_.Suffix, - "\t{$src3, $src2, $src1, $dst|" - "$dst, $src1, $src2, $src3}"), - []>, EVEX_4V; -} -defm VALIGND : avx512_valign, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VALIGNQ : avx512_valign, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - // Helper fragments to match sext vXi1 to vXiY. def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; -multiclass avx512_vpabs opc, string OpcodeStr, ValueType OpVT, - RegisterClass KRC, RegisterClass RC, - X86MemOperand x86memop, X86MemOperand x86scalar_mop, - string BrdcstStr> { - def rr : AVX5128I, EVEX; - def rrk : AVX5128I, EVEX, EVEX_K; - def rrkz : AVX5128I, EVEX, EVEX_KZ; - let mayLoad = 1 in { - def rm : AVX5128I, EVEX; - def rmk : AVX5128I, EVEX, EVEX_K; - def rmkz : AVX5128I, EVEX, EVEX_KZ; - def rmb : AVX5128I, EVEX, EVEX_B; - def rmbk : AVX5128I, EVEX, EVEX_B, EVEX_K; - def rmbkz : AVX5128I, EVEX, EVEX_B, EVEX_KZ; - } -} - -defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512, - i512mem, i32mem, "{1to16}">, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512, - i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -def : Pat<(xor - (bc_v16i32 (v16i1sextv16i32)), - (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), - (VPABSDZrr VR512:$src)>; -def : Pat<(xor - (bc_v8i64 (v8i1sextv8i64)), - (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), - (VPABSQZrr VR512:$src)>; - -def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1))), - (VPABSDZrr VR512:$src)>; -def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src), - (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPABSQZrr VR512:$src)>; - multiclass avx512_conflict opc, string OpcodeStr, RegisterClass RC, RegisterClass KRC, X86MemOperand x86memop, @@ -5644,26 +5946,24 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", //===----------------------------------------------------------------------===// // AVX-512 - COMPRESS and EXPAND // + multiclass compress_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { - def rrkz : AVX5128I, EVEX_KZ; - - let Constraints = "$src0 = $dst" in - def rrk : AVX5128I, EVEX_K; + defm rr : AVX512_maskable, AVX5128IBase; let mayStore = 1 in { + def mr : AVX5128I, EVEX_CD8<_.EltSize, CD8VT1>; + def mrk : AVX5128I, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; } @@ -5691,38 +5991,16 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info // expand multiclass expand_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { - def rrkz : AVX5128I, EVEX_KZ; - - let Constraints = "$src0 = $dst" in - def rrk : AVX5128I, EVEX_K; + defm rr : AVX512_maskable, AVX5128IBase; - let mayLoad = 1, Constraints = "$src0 = $dst" in - def rmk : AVX5128I, - EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; - let mayLoad = 1 in - def rmkz : AVX5128I, - EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>; - + defm rm : AVX512_maskable, + AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>; } multiclass expand_by_elt_width opc, string OpcodeStr, @@ -5743,3 +6021,300 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>, EVEX; defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, EVEX, VEX_W; + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +// op(reg_vec2,broadcast(eltVt),imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable; + let mayLoad = 1 in { + defm rmi : AVX512_maskable; + defm rmbi : AVX512_maskable, EVEX_B; + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +// op(reg_vec2,broadcast(eltVt),imm) +multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable; + let mayLoad = 1 in { + defm rmi : AVX512_maskable; + defm rmbi : AVX512_maskable, EVEX_B; + } +} + +//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_scalar,imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + + defm rri : AVX512_maskable_scalar; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_scalar; + + let isAsmParserOnly = 1 in { + defm rmi_alt :AVX512_maskable_in_asm; + } + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ + defm rrib : AVX512_maskable, EVEX_B; +} +//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _> { + defm NAME: avx512_fp_sae_packed_imm; +} + +multiclass avx512_common_fp_sae_packed_imm opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z : avx512_fp_packed_imm, + avx512_fp_sae_packed_imm, + EVEX_V512; + + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_fp_packed_imm, + EVEX_V128; + defm Z256 : avx512_fp_packed_imm, + EVEX_V256; + } +} + +multiclass avx512_common_3Op_imm8 opc, SDNode OpNode>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8, EVEX_V512; + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_3Op_imm8, EVEX_V128; + defm Z256 : avx512_3Op_imm8, EVEX_V256; + } +} + +multiclass avx512_common_fp_sae_scalar_imm opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z128 : avx512_fp_scalar_imm, + avx512_fp_sae_scalar_imm; + } +} + +defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd", + avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps", + avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + +defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info, + 0x55, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info, + 0x55, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, + 0x50, X86VRange, HasDQI>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, + 0x50, X86VRange, HasDQI>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + +defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info, + 0x51, X86VRange, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, + 0x51, X86VRange, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + + +multiclass avx512_shuff_packed_128 opc, SDNode OpNode = X86Shuf128>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8, EVEX_V512; + + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_3Op_imm8, EVEX_V256; + } +} + +defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; + +multiclass avx512_valign{ + defm NAME: avx512_common_3Op_imm8, + AVX512AIi8Base, EVEX_4V; + let isCodeGenOnly = 1 in { + defm NAME#_FP: avx512_common_3Op_imm8, + AVX512AIi8Base, EVEX_4V; + } +} + +defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>, + EVEX_CD8<32, CD8VF>; +defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>, + EVEX_CD8<64, CD8VF>, VEX_W; + +multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr : AVX512_maskable, EVEX, AVX5128IBase; + + let mayLoad = 1 in + defm rm : AVX512_maskable, + EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>; +} + +multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> : + avx512_unary_rm { + let mayLoad = 1 in + defm rmb : AVX512_maskable, + EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; +} + +multiclass avx512_unary_rm_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_unary_rm, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_unary_rm, + EVEX_V256; + defm Z128 : avx512_unary_rm, + EVEX_V128; + } +} + +multiclass avx512_unary_rmb_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_unary_rmb, + EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_unary_rmb, + EVEX_V256; + defm Z128 : avx512_unary_rmb, + EVEX_V128; + } +} + +multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, + SDNode OpNode, Predicate prd> { + defm Q : avx512_unary_rmb_vl, VEX_W; + defm D : avx512_unary_rmb_vl; +} + +multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, + SDNode OpNode, Predicate prd> { + defm W : avx512_unary_rm_vl; + defm B : avx512_unary_rm_vl; +} + +multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, + bits<8> opc_d, bits<8> opc_q, + string OpcodeStr, SDNode OpNode> { + defm NAME : avx512_unary_rm_vl_dq, + avx512_unary_rm_vl_bw; +} + +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>; + +def : Pat<(xor + (bc_v16i32 (v16i1sextv16i32)), + (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), + (VPABSDZrr VR512:$src)>; +def : Pat<(xor + (bc_v8i64 (v8i1sextv8i64)), + (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), + (VPABSQZrr VR512:$src)>;