X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrAVX512.td;h=00b00aa75a117311930ea13d6610c6947b0d390d;hp=b205de058a6b9367e309442ed57d2d85f85c0b18;hb=33a95f24bbb55a53eb13a4a589c7d50f28b322c0;hpb=a0cb2c75b018c40e722667e28b1507fb0fe95810 diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index b205de058a6..00b00aa75a1 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -79,6 +79,11 @@ class X86VectorVTInfo("v" # !srl(Size, 5) # "i32"); + dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV))); } def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; @@ -115,26 +120,33 @@ def avx512vl_i32_info : AVX512VLVectorVTInfo; - -// Common base class of AVX512_masking and AVX512_masking_3src. -multiclass AVX512_masking_common O, Format F, X86VectorVTInfo _, - dag Outs, - dag Ins, dag MaskingIns, dag ZeroMaskingIns, - string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm, - dag RHS, dag MaskingRHS, - string MaskingConstraint = ""> { - def NAME: AVX512 O, Format F, + dag Outs, + dag Ins, dag MaskingIns, dag ZeroMaskingIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + list Pattern, + list MaskingPattern, + list ZeroMaskingPattern, + string MaskingConstraint = "", + InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> { + let isCommutable = IsCommutable in + def NAME: AVX512; + Pattern, itin>; // Prefer over VMOV*rrk Pat<> let AddedComplexity = 20 in def NAME#k: AVX512, + MaskingPattern, itin>, EVEX_K { // In case of the 3src subclass this is overridden with a let. string Constraints = MaskingConstraint; @@ -143,41 +155,71 @@ multiclass AVX512_masking_common O, Format F, X86VectorVTInfo _, def NAME#kz: AVX512, + ZeroMaskingPattern, + itin>, EVEX_KZ; } + +// Common base class of AVX512_maskable and AVX512_maskable_3src. +multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, + dag Outs, + dag Ins, dag MaskingIns, dag ZeroMaskingIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, dag MaskingRHS, + string MaskingConstraint = "", + InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> : + AVX512_maskable_custom; + // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the instruction. In the masking case, the // perserved vector elements come from a new dummy input operand tied to $dst. -multiclass AVX512_masking O, Format F, X86VectorVTInfo _, - dag Outs, dag Ins, string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm, - dag RHS> : - AVX512_masking_common; - -// Similar to AVX512_masking but in this case one of the source operands +multiclass AVX512_maskable O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> : + AVX512_maskable_common; + +// Similar to AVX512_maskable but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude // $src1. -multiclass AVX512_masking_3src O, Format F, X86VectorVTInfo _, - dag Outs, dag NonTiedIns, string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm, - dag RHS> : - AVX512_masking_common; +multiclass AVX512_maskable_3src O, Format F, X86VectorVTInfo _, + dag Outs, dag NonTiedIns, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS> : + AVX512_maskable_common; + + +multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + list Pattern> : + AVX512_maskable_custom; // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion @@ -375,13 +417,13 @@ multiclass vextract_for_size { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { - def rr : AVX512AIi8, - EVEX, EVEX_V512; + defm rr : AVX512_maskable_in_asm, + AVX512AIi8Base, EVEX, EVEX_V512; let mayStore = 1 in def rm : AVX512AIi8; + + // Intrinsic call with masking. + def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # + "x4_512") + VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask), + (!cast(NAME # To.EltSize # "x4rrk") To.RC:$src0, + (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)), + VR512:$src1, imm:$idx)>; + + // Intrinsic call with zero-masking. + def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # + "x4_512") + VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask), + (!cast(NAME # To.EltSize # "x4rrkz") + (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)), + VR512:$src1, imm:$idx)>; + + // Intrinsic call without masking. + def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # + "x4_512") + VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)), + (!cast(NAME # To.EltSize # "x4rr") + VR512:$src1, imm:$idx)>; } multiclass vextract_for_type opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass KRC, - RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, PatFrag scalar_mfrag, - X86MemOperand x86scalar_mop, string BrdcstStr, - OpndItins itins, bit IsCommutable = 0> { - let isCommutable = IsCommutable in - def rr : AVX512BI, EVEX_4V; - let AddedComplexity = 30 in { - let Constraints = "$src0 = $dst" in - def rrk : AVX512BI, EVEX_4V, EVEX_K; - def rrkz : AVX512BI, EVEX_4V, EVEX_KZ; + X86VectorVTInfo _, OpndItins itins, + bit IsCommutable = 0> { + defm rr : AVX512_maskable, + AVX512BIBase, EVEX_4V; + + let mayLoad = 1 in + defm rm : AVX512_maskable, + AVX512BIBase, EVEX_4V; +} + +multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, OpndItins itins, + bit IsCommutable = 0> : + avx512_binop_rm { + let mayLoad = 1 in + defm rmb : AVX512_maskable, + AVX512BIBase, EVEX_4V, EVEX_B; +} + +multiclass avx512_binop_rm_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, OpndItins itins, + Predicate prd, bit IsCommutable = 0> { + let Predicates = [prd] in + defm Z : avx512_binop_rm, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_binop_rm, EVEX_V256; + defm Z128 : avx512_binop_rm, EVEX_V128; } +} - let mayLoad = 1 in { - def rm : AVX512BI, EVEX_4V; - let AddedComplexity = 30 in { - let Constraints = "$src0 = $dst" in - def rmk : AVX512BI, EVEX_4V, EVEX_K; - def rmkz : AVX512BI, EVEX_4V, EVEX_KZ; - } - def rmb : AVX512BI, EVEX_4V, EVEX_B; - let AddedComplexity = 30 in { - let Constraints = "$src0 = $dst" in - def rmbk : AVX512BI, EVEX_4V, EVEX_B, EVEX_K; - def rmbkz : AVX512BI, EVEX_4V, EVEX_B, EVEX_KZ; - } +multiclass avx512_binop_rmb_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, OpndItins itins, + Predicate prd, bit IsCommutable = 0> { + let Predicates = [prd] in + defm Z : avx512_binop_rmb, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_binop_rmb, EVEX_V256; + defm Z128 : avx512_binop_rmb, EVEX_V128; } } +multiclass avx512_binop_rm_vl_q opc, string OpcodeStr, SDNode OpNode, + OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm NAME : avx512_binop_rmb_vl, + VEX_W, EVEX_CD8<64, CD8VF>; +} + +multiclass avx512_binop_rm_vl_d opc, string OpcodeStr, SDNode OpNode, + OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm NAME : avx512_binop_rmb_vl, EVEX_CD8<32, CD8VF>; +} + +multiclass avx512_binop_rm_vl_w opc, string OpcodeStr, SDNode OpNode, + OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm NAME : avx512_binop_rm_vl, EVEX_CD8<16, CD8VF>; +} + +multiclass avx512_binop_rm_vl_b opc, string OpcodeStr, SDNode OpNode, + OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm NAME : avx512_binop_rm_vl, EVEX_CD8<8, CD8VF>; +} + +multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, + SDNode OpNode, OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm Q : avx512_binop_rm_vl_q; + + defm D : avx512_binop_rm_vl_d; +} + +multiclass avx512_binop_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, + SDNode OpNode, OpndItins itins, Predicate prd, + bit IsCommutable = 0> { + defm W : avx512_binop_rm_vl_w; + + defm B : avx512_binop_rm_vl_b; +} + +multiclass avx512_binop_rm_vl_all opc_b, bits<8> opc_w, + bits<8> opc_d, bits<8> opc_q, + string OpcodeStr, SDNode OpNode, + OpndItins itins, bit IsCommutable = 0> { + defm NAME : avx512_binop_rm_vl_dq, + avx512_binop_rm_vl_bw; +} + multiclass avx512_binop_rm2 opc, string OpcodeStr, ValueType DstVT, ValueType SrcVT, RegisterClass KRC, RegisterClass RC, PatFrag memop_frag, X86MemOperand x86memop, @@ -2589,25 +2689,14 @@ multiclass avx512_binop_rm2 opc, string OpcodeStr, ValueType DstVT, } } -defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W; - -defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, + SSE_INTALU_ITINS_P, 1>; +defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, + SSE_INTALU_ITINS_P, 0>; +defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; +defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, + SSE_INTALU_ITINS_P, HasBWI, 1>; defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", @@ -2628,41 +2717,33 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1), (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), (VPMULDQZrr VR512:$src1, VR512:$src2)>; -defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - SSE_INTALU_ITINS_P, 1>, - T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTALU_ITINS_P, 0>, - T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - SSE_INTALU_ITINS_P, 1>, - T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTALU_ITINS_P, 0>, - T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - SSE_INTALU_ITINS_P, 1>, - T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTALU_ITINS_P, 0>, - T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - SSE_INTALU_ITINS_P, 1>, - T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTALU_ITINS_P, 0>, - T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax, + SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; +defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + +defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax, + SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; +defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + +defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin, + SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; +defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + +defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin, + SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; +defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1), (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), @@ -2793,30 +2874,14 @@ def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// -defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, + SSE_INTALU_ITINS_P, HasAVX512, 1>; +defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, + SSE_INTALU_ITINS_P, HasAVX512, 1>; +defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, + SSE_INTALU_ITINS_P, HasAVX512, 1>; +defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, + SSE_INTALU_ITINS_P, HasAVX512, 1>; //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic @@ -3236,7 +3301,7 @@ let Predicates = [HasAVX512] in { let Constraints = "$src1 = $dst" in { multiclass avx512_fma3p_rm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { - defm r: AVX512_masking_3src, @@ -4782,7 +4847,7 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; multiclass avx512_valign { - defm rri : AVX512_masking<0x03, MRMSrcReg, _, (outs _.RC:$dst), + defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$src3), "valign"##_.Suffix, "$src3, $src2, $src1", "$src1, $src2, $src3",