X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrAVX512.td;h=3dbc3d2abd8fdd62775ee5af007321afb70f48bb;hp=4930db9563dd103e241e91aff758af5c9cfb39f3;hb=4a524934577d85e5095df8ea62ad6a3261076d0c;hpb=9d0ec9212ba8c9bd6662a4591f4c4ba595051f4b diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 4930db9563d..3dbc3d2abd8 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2,9 +2,10 @@ // EltVT). These are things like the register class for the writemask, etc. // The idea is to pass one of these as the template argument rather than the // individual arguments. -class X86VectorVTInfo { RegisterClass RC = rc; + int NumElts = numelts; // Corresponding mask register class. RegisterClass KRC = !cast("VK" # NumElts); @@ -73,6 +74,11 @@ class X86VectorVTInfo("CD8VT" # NumElts), ?); + SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, !if (!eq (Size, 256), sub_ymm, ?)); @@ -124,17 +130,17 @@ def avx512vl_i64_info : AVX512VLVectorVTInfo O, Format F, - dag Outs, - dag Ins, dag MaskingIns, dag ZeroMaskingIns, - string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm, - list Pattern, - list MaskingPattern, - list ZeroMaskingPattern, - string MaskingConstraint = "", - InstrItinClass itin = NoItinerary, - bit IsCommutable = 0> { +multiclass AVX512_maskable_custom O, Format F, + dag Outs, + dag Ins, dag MaskingIns, dag ZeroMaskingIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + list Pattern, + list MaskingPattern, + list ZeroMaskingPattern, + string MaskingConstraint = "", + InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> { let isCommutable = IsCommutable in def NAME: AVX512 O, Format F, } -// Common base class of AVX512_masking and AVX512_masking_3src. -multiclass AVX512_masking_common O, Format F, X86VectorVTInfo _, - dag Outs, - dag Ins, dag MaskingIns, dag ZeroMaskingIns, - string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm, - dag RHS, dag MaskingRHS, - string MaskingConstraint = "", - InstrItinClass itin = NoItinerary, - bit IsCommutable = 0> : - AVX512_masking_custom; +// Common base class of AVX512_maskable and AVX512_maskable_3src. +multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, + dag Outs, + dag Ins, dag MaskingIns, dag ZeroMaskingIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, dag MaskingRHS, + string MaskingConstraint = "", + InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> : + AVX512_maskable_custom; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the instruction. In the masking case, the // perserved vector elements come from a new dummy input operand tied to $dst. -multiclass AVX512_masking O, Format F, X86VectorVTInfo _, - dag Outs, dag Ins, string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm, - dag RHS, InstrItinClass itin = NoItinerary, - bit IsCommutable = 0> : - AVX512_masking_common; - -// Similar to AVX512_masking but in this case one of the source operands +multiclass AVX512_maskable O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> : + AVX512_maskable_common; + +// Similar to AVX512_maskable but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude // $src1. -multiclass AVX512_masking_3src O, Format F, X86VectorVTInfo _, - dag Outs, dag NonTiedIns, string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm, - dag RHS> : - AVX512_masking_common; +multiclass AVX512_maskable_3src O, Format F, X86VectorVTInfo _, + dag Outs, dag NonTiedIns, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS> : + AVX512_maskable_common; + + +multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + list Pattern> : + AVX512_maskable_custom; // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion @@ -329,15 +347,15 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; // AVX-512 - VECTOR INSERT // -multiclass vinsert_for_size { +multiclass vinsert_for_size_no_alt { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { def rr : AVX512AIi8, EVEX_4V, EVEX_V512, EVEX_CD8; + []>, + EVEX_4V, EVEX_V512, EVEX_CD8; } - - // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for - // vinserti32x4 - def : Pat<(vinsert_insert:$ins - (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)), - (AltTo.VT (!cast(NAME # From.EltSize # "x4rr") - VR512:$src1, From.RC:$src2, - (INSERT_get_vinsert_imm VR512:$ins)))>; } -multiclass vinsert_for_type { - defm NAME # "32x4" : vinsert_for_size : + vinsert_for_size_no_alt { + // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for + // vinserti32x4. Only add this if 64x2 and friends are not supported + // natively via AVX512DQ. + let Predicates = [NoDQI] in + def : Pat<(vinsert_insert:$ins + (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)), + (AltTo.VT (!cast(NAME # From.EltSize # "x4rr") + VR512:$src1, From.RC:$src2, + (INSERT_get_vinsert_imm VR512:$ins)))>; +} + +multiclass vinsert_for_type { + defm NAME # "32x4" : vinsert_for_size, X86VectorVTInfo<16, EltVT32, VR512>, X86VectorVTInfo< 2, EltVT64, VR128X>, X86VectorVTInfo< 8, EltVT64, VR512>, vinsert128_insert, INSERT_get_vinsert128_imm>; - defm NAME # "64x4" : vinsert_for_size, + X86VectorVTInfo< 8, EltVT64, VR512>, + vinsert128_insert, + INSERT_get_vinsert128_imm>, VEX_W; + defm NAME # "64x4" : vinsert_for_size, X86VectorVTInfo< 8, EltVT64, VR512>, X86VectorVTInfo< 8, EltVT32, VR256>, X86VectorVTInfo<16, EltVT32, VR512>, vinsert256_insert, INSERT_get_vinsert256_imm>, VEX_W; + let Predicates = [HasDQI] in + defm NAME # "32x8" : vinsert_for_size_no_alt, + X86VectorVTInfo<16, EltVT32, VR512>, + vinsert256_insert, + INSERT_get_vinsert256_imm>; } defm VINSERTF : vinsert_for_type; @@ -405,13 +447,13 @@ multiclass vextract_for_size { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { - def rr : AVX512AIi8, - EVEX, EVEX_V512; + defm rr : AVX512_maskable_in_asm, + AVX512AIi8Base, EVEX, EVEX_V512; let mayStore = 1 in def rm : AVX512AIi8; + + // Intrinsic call with masking. + def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # + "x4_512") + VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask), + (!cast(NAME # To.EltSize # "x4rrk") To.RC:$src0, + (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)), + VR512:$src1, imm:$idx)>; + + // Intrinsic call with zero-masking. + def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # + "x4_512") + VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask), + (!cast(NAME # To.EltSize # "x4rrkz") + (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)), + VR512:$src1, imm:$idx)>; + + // Intrinsic call without masking. + def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # + "x4_512") + VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)), + (!cast(NAME # To.EltSize # "x4rr") + VR512:$src1, imm:$idx)>; } multiclass vextract_for_type; +def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), + (VBROADCASTSSZrr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; +def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), + (VBROADCASTSDZrr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; + +def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))), + (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; +def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))), + (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; + def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))), (VBROADCASTSSZrr VR128X:$src)>; def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))), @@ -670,48 +745,91 @@ def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))), //--- multiclass avx512_mask_broadcast opc, string OpcodeStr, - RegisterClass DstRC, RegisterClass KRC, - ValueType OpVT, ValueType SrcVT> { -def rr : AVX512XS8I { +let Predicates = [HasCDI] in +def Zrr : AVX512XS8I, EVEX; + []>, EVEX, EVEX_V512; + +let Predicates = [HasCDI, HasVLX] in { +def Z128rr : AVX512XS8I, EVEX, EVEX_V128; +def Z256rr : AVX512XS8I, EVEX, EVEX_V256; +} } let Predicates = [HasCDI] in { -defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512, - VK16, v16i32, v16i1>, EVEX_V512; -defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512, - VK8, v8i64, v8i1>, EVEX_V512, VEX_W; +defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", + VK16>; +defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", + VK8>, VEX_W; } //===----------------------------------------------------------------------===// // AVX-512 - VPERM // // -- immediate form -- -multiclass avx512_perm_imm opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT> { - def ri : AVX512AIi8 opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain in { + def ri : AVX512AIi8, + [(set _.RC:$dst, + (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>, EVEX; - def mi : AVX512AIi8, EVEX; + [(set _.RC:$dst, + (_.VT (OpNode (_.MemOpFrag addr:$src1), + (i8 imm:$src2))))]>, + EVEX, EVEX_CD8<_.EltSize, CD8VF>; +} +} + +multiclass avx512_permil OpcImm, bits<8> OpcVar, X86VectorVTInfo _, + X86VectorVTInfo Ctrl> : + avx512_perm_imm { + let ExeDomain = _.ExeDomain in { + def rr : AVX5128I, + EVEX_4V; + def rm : AVX5128I, + EVEX_4V; + } } -defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64, - i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64, - f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>, + EVEX_V512, VEX_W; +defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>, + EVEX_V512, VEX_W; + +defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>, + EVEX_V512; +defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>, + EVEX_V512, VEX_W; + +def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), + (VPERMILPSZri VR512:$src1, imm:$imm)>; +def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), + (VPERMILPDZri VR512:$src1, imm:$imm)>; // -- VPERM - register form -- multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, @@ -2475,22 +2593,29 @@ defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore, multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, OpndItins itins, bit IsCommutable = 0> { - defm rr : AVX512_masking, AVX512BIBase, EVEX_4V; - let mayLoad = 1 in { - defm rm : AVX512_masking, AVX512BIBase, EVEX_4V; - defm rmb : AVX512_masking opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, OpndItins itins, + bit IsCommutable = 0> : + avx512_binop_rm { + let mayLoad = 1 in + defm rmb : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, (_.ScalarLdFrag addr:$src2)))), itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B; +} + +multiclass avx512_binop_rm_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, OpndItins itins, + Predicate prd, bit IsCommutable = 0> { + let Predicates = [prd] in + defm Z : avx512_binop_rm, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_binop_rm, EVEX_V256; + defm Z128 : avx512_binop_rm, EVEX_V128; + } +} + +multiclass avx512_binop_rmb_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, OpndItins itins, + Predicate prd, bit IsCommutable = 0> { + let Predicates = [prd] in + defm Z : avx512_binop_rmb, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_binop_rmb, EVEX_V256; + defm Z128 : avx512_binop_rmb, EVEX_V128; } } +multiclass avx512_binop_rm_vl_q opc, string OpcodeStr, SDNode OpNode, + OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm NAME : avx512_binop_rmb_vl, + VEX_W, EVEX_CD8<64, CD8VF>; +} + +multiclass avx512_binop_rm_vl_d opc, string OpcodeStr, SDNode OpNode, + OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm NAME : avx512_binop_rmb_vl, EVEX_CD8<32, CD8VF>; +} + +multiclass avx512_binop_rm_vl_w opc, string OpcodeStr, SDNode OpNode, + OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm NAME : avx512_binop_rm_vl, EVEX_CD8<16, CD8VF>; +} + +multiclass avx512_binop_rm_vl_b opc, string OpcodeStr, SDNode OpNode, + OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm NAME : avx512_binop_rm_vl, EVEX_CD8<8, CD8VF>; +} + +multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, + SDNode OpNode, OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm Q : avx512_binop_rm_vl_q; + + defm D : avx512_binop_rm_vl_d; +} + +multiclass avx512_binop_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, + SDNode OpNode, OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm W : avx512_binop_rm_vl_w; + + defm B : avx512_binop_rm_vl_b; +} + +multiclass avx512_binop_rm_vl_all opc_b, bits<8> opc_w, + bits<8> opc_d, bits<8> opc_q, + string OpcodeStr, SDNode OpNode, + OpndItins itins, bit IsCommutable = 0> { + defm NAME : avx512_binop_rm_vl_dq, + avx512_binop_rm_vl_bw; +} + multiclass avx512_binop_rm2 opc, string OpcodeStr, ValueType DstVT, ValueType SrcVT, RegisterClass KRC, RegisterClass RC, PatFrag memop_frag, X86MemOperand x86memop, @@ -2559,20 +2772,16 @@ multiclass avx512_binop_rm2 opc, string OpcodeStr, ValueType DstVT, } } -defm VPADDDZ : avx512_binop_rm<0xFE, "vpadd", add, v16i32_info, - SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsub", sub, v16i32_info, - SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPMULLDZ : avx512_binop_rm<0x40, "vpmull", mul, v16i32_info, - SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPADDQZ : avx512_binop_rm<0xD4, "vpadd", add, v8i64_info, - SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W; - -defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsub", sub, v8i64_info, - SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, + SSE_INTALU_ITINS_P, 1>; +defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, + SSE_INTALU_ITINS_P, 0>; +defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; +defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, + SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", @@ -2593,33 +2802,33 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1), (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), (VPMULDQZrr VR512:$src1, VR512:$src2)>; -defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxu", X86umax, v16i32_info, - SSE_INTALU_ITINS_P, 1>, - T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxu", X86umax, v8i64_info, - SSE_INTALU_ITINS_P, 0>, - T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxs", X86smax, v16i32_info, - SSE_INTALU_ITINS_P, 1>, - T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxs", X86smax, v8i64_info, - SSE_INTALU_ITINS_P, 0>, - T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminu", X86umin, v16i32_info, - SSE_INTALU_ITINS_P, 1>, - T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminu", X86umin, v8i64_info, - SSE_INTALU_ITINS_P, 0>, - T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPMINSDZ : avx512_binop_rm<0x39, "vpmins", X86smin, v16i32_info, - SSE_INTALU_ITINS_P, 1>, - T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMINSQZ : avx512_binop_rm<0x39, "vpmins", X86smin, v8i64_info, - SSE_INTALU_ITINS_P, 0>, - T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax, + SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; +defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + +defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax, + SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; +defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + +defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin, + SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; +defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + +defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin, + SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; +defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1), (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), @@ -2732,40 +2941,18 @@ multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32, i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedSingle in -defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilpi, - memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512, - EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilpi, - memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512, - VEX_W, EVEX_CD8<32, CD8VF>; - -def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), - (VPERMILPSZri VR512:$src1, imm:$imm)>; -def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), - (VPERMILPDZri VR512:$src1, imm:$imm)>; - //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// -defm VPANDDZ : avx512_binop_rm<0xDB, "vpand", and, v16i32_info, SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPANDQZ : avx512_binop_rm<0xDB, "vpand", and, v8i64_info, SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPORDZ : avx512_binop_rm<0xEB, "vpor", or, v16i32_info, SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPORQZ : avx512_binop_rm<0xEB, "vpor", or, v8i64_info, SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPXORDZ : avx512_binop_rm<0xEF, "vpxor", xor, v16i32_info, SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPXORQZ : avx512_binop_rm<0xEF, "vpxor", xor, v8i64_info, SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandn", X86andnp, v16i32_info, - SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandn", X86andnp, v8i64_info, - SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, + SSE_INTALU_ITINS_P, HasAVX512, 1>; +defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, + SSE_INTALU_ITINS_P, HasAVX512, 1>; +defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, + SSE_INTALU_ITINS_P, HasAVX512, 1>; +defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, + SSE_INTALU_ITINS_P, HasAVX512, 1>; //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic @@ -3182,10 +3369,12 @@ let Predicates = [HasAVX512] in { //===----------------------------------------------------------------------===// // FMA - Fused Multiply Operations // + let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_rm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - defm r: AVX512_masking_3src opc, string OpcodeStr, X86VectorVTInfo _, + SDPatternOperator OpNode = null_frag> { + defm r: AVX512_maskable_3src, @@ -3206,45 +3395,45 @@ multiclass avx512_fma3p_rm opc, string OpcodeStr, SDNode OpNode, } } // Constraints = "$src1 = $dst" +multiclass avx512_fma3p_forms opc213, bits<8> opc231, + string OpcodeStr, X86VectorVTInfo VTI, + SDPatternOperator OpNode> { + defm v213 : avx512_fma3p_rm, + EVEX_V512, EVEX_CD8; + + defm v231 : avx512_fma3p_rm, + EVEX_V512, EVEX_CD8; +} + let ExeDomain = SSEPackedSingle in { - defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", X86Fmadd, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", X86Fmsub, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", X86Fmaddsub, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", X86Fmsubadd, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", X86Fnmadd, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", X86Fnmsub, - v16f32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMADDPSZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd", + v16f32_info, X86Fmadd>; + defm VFMSUBPSZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub", + v16f32_info, X86Fmsub>; + defm VFMADDSUBPSZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub", + v16f32_info, X86Fmaddsub>; + defm VFMSUBADDPSZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd", + v16f32_info, X86Fmsubadd>; + defm VFNMADDPSZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd", + v16f32_info, X86Fnmadd>; + defm VFNMSUBPSZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub", + v16f32_info, X86Fnmsub>; } let ExeDomain = SSEPackedDouble in { - defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", X86Fmadd, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", X86Fmsub, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", X86Fmaddsub, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", X86Fmsubadd, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", X86Fnmadd, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", X86Fnmsub, - v8f64_info>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMADDPDZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd", + v8f64_info, X86Fmadd>, VEX_W; + defm VFMSUBPDZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub", + v8f64_info, X86Fmsub>, VEX_W; + defm VFMADDSUBPDZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub", + v8f64_info, X86Fmaddsub>, VEX_W; + defm VFMSUBADDPDZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd", + v8f64_info, X86Fmsubadd>, VEX_W; + defm VFNMADDPDZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd", + v8f64_info, X86Fnmadd>, VEX_W; + defm VFNMSUBPDZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub", + v8f64_info, X86Fnmsub>, VEX_W; } let Constraints = "$src1 = $dst" in { @@ -4731,7 +4920,7 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; multiclass avx512_valign { - defm rri : AVX512_masking<0x03, MRMSrcReg, _, (outs _.RC:$dst), + defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$src3), "valign"##_.Suffix, "$src3, $src2, $src1", "$src1, $src2, $src3",