X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrAVX512.td;h=5d5ab14cf460a56550d1a2262394cb8fc30de683;hp=421ddf9e1a50f916947ec3c19f2a1e120d75e825;hb=319483520644f593afac39b6e05132bfcfb0a70d;hpb=deb2e51099a71092ef765ce0e8be40616a89f7d9 diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 421ddf9e1a5..66316119498 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1,3 +1,18 @@ +//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 AVX512 instruction set, defining the +// instructions, and properties of the instructions which are needed for code +// generation, machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + // Group template arguments that can be derived from the vector type (EltNum x // EltVT). These are things like the register class for the writemask, etc. // The idea is to pass one of these as the template argument rather than the @@ -59,17 +74,16 @@ class X86VectorVTInfo("load" # EltVT); - // Load patterns used for memory operands. We only have this defined in - // case of i64 element types for sub-512 integer vectors. For now, keep - // MemOpFrag undefined in these cases. - PatFrag MemOpFrag = - !if (!eq (NumElts#EltTypeName, "1f32"), !cast("memopfsf32"), - !if (!eq (NumElts#EltTypeName, "1f64"), !cast("memopfsf64"), - !if (!eq (TypeVariantName, "f"), !cast("memop" # VTName), - !if (!eq (EltTypeName, "i64"), !cast("memop" # VTName), - !if (!eq (VTName, "v16i32"), !cast("memop" # VTName), ?))))); + PatFrag AlignedLdFrag = !cast("alignedload" # + !if (!eq (TypeVariantName, "i"), + !if (!eq (Size, 128), "v2i64", + !if (!eq (Size, 256), "v4i64", + !if (!eq (Size, 512), + !if (!eq (EltSize, 64), "v8i64", "v16i32"), + VTName))), VTName)); + + PatFrag ScalarLdFrag = !cast("load" # EltVT); // The corresponding float type, e.g. v16f32 for v16i32 // Note: For EltSize < 32, FloatVT is illegal and TableGen @@ -96,10 +110,15 @@ class X86VectorVTInfo("v" # !srl(Size, 5) # "i32"); dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV))); + + string ZSuffix = !if (!eq (Size, 128), "Z128", + !if (!eq (Size, 256), "Z256", "Z")); } def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; @@ -126,6 +145,8 @@ def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; // We map scalar types to the smallest (128-bit) vector type // with the appropriate element type. This allows to use the same masking logic. +def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; +def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; @@ -161,21 +182,20 @@ multiclass AVX512_maskable_custom O, Format F, list Pattern, list MaskingPattern, list ZeroMaskingPattern, - string Round = "", string MaskingConstraint = "", InstrItinClass itin = NoItinerary, bit IsCommutable = 0> { let isCommutable = IsCommutable in def NAME: AVX512; // Prefer over VMOV*rrk Pat<> let AddedComplexity = 20 in def NAME#k: AVX512, EVEX_K { // In case of the 3src subclass this is overridden with a let. @@ -183,8 +203,8 @@ multiclass AVX512_maskable_custom O, Format F, } let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> def NAME#kz: AVX512, EVEX_KZ; @@ -198,7 +218,7 @@ multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, - SDNode Select = vselect, string Round = "", + SDNode Select = vselect, string MaskingConstraint = "", InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : @@ -208,7 +228,7 @@ multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, [(set _.RC:$dst, MaskingRHS)], [(set _.RC:$dst, (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], - Round, MaskingConstraint, NoItinerary, IsCommutable>; + MaskingConstraint, NoItinerary, IsCommutable>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the @@ -216,7 +236,7 @@ multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, multiclass AVX512_maskable O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, string Round = "", + dag RHS, InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : AVX512_maskable_common O, Format F, X86VectorVTInfo _, !con((ins _.KRCWM:$mask), Ins), OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (vselect _.KRCWM:$mask, RHS, _.RC:$src0), vselect, - Round, "$src0 = $dst", itin, IsCommutable>; + "$src0 = $dst", itin, IsCommutable>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the scalar instruction. multiclass AVX512_maskable_scalar O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, string Round = "", + dag RHS, InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : AVX512_maskable_common O, Format F, X86VectorVTInfo _, !con((ins _.KRCWM:$mask), Ins), OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (X86select _.KRCWM:$mask, RHS, _.RC:$src0), X86select, - Round, "$src0 = $dst", itin, IsCommutable>; + "$src0 = $dst", itin, IsCommutable>; // Similar to AVX512_maskable but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved @@ -256,6 +276,32 @@ multiclass AVX512_maskable_3src O, Format F, X86VectorVTInfo _, OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>; +// Similar to AVX512_maskable_3rc but in this case the input VT for the tied +// operand differs from the output VT. This requires a bitconvert on +// the preserved vector going into the vselect. +multiclass AVX512_maskable_3src_cast O, Format F, X86VectorVTInfo OutVT, + X86VectorVTInfo InVT, + dag Outs, dag NonTiedIns, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS> : + AVX512_maskable_common; + +multiclass AVX512_maskable_3src_scalar O, Format F, X86VectorVTInfo _, + dag Outs, dag NonTiedIns, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS> : + AVX512_maskable_common; multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, @@ -265,9 +311,65 @@ multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, AVX512_maskable_custom; + +// Instruction with mask that puts result in mask register, +// like "compare" and "vptest" +multiclass AVX512_maskable_custom_cmp O, Format F, + dag Outs, + dag Ins, dag MaskingIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + list Pattern, + list MaskingPattern, + string Round = "", + InstrItinClass itin = NoItinerary> { + def NAME: AVX512; + + def NAME#k: AVX512, EVEX_K; +} + +multiclass AVX512_maskable_common_cmp O, Format F, X86VectorVTInfo _, + dag Outs, + dag Ins, dag MaskingIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, dag MaskingRHS, + string Round = "", + InstrItinClass itin = NoItinerary> : + AVX512_maskable_custom_cmp; + +multiclass AVX512_maskable_cmp O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, string Round = "", + InstrItinClass itin = NoItinerary> : + AVX512_maskable_common_cmp; + +multiclass AVX512_maskable_cmp_alt O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm> : + AVX512_maskable_custom_cmp; + // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion let Predicates = [HasAVX512] in { @@ -387,84 +489,123 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; //===----------------------------------------------------------------------===// // AVX-512 - VECTOR INSERT // - -multiclass vinsert_for_size_no_alt { +multiclass vinsert_for_size { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { - def rr : AVX512AIi8, - EVEX_4V, EVEX_V512; + defm rr : AVX512_maskable, AVX512AIi8Base, EVEX_4V; + + let mayLoad = 1 in + defm rm : AVX512_maskable, AVX512AIi8Base, EVEX_4V, + EVEX_CD8; + } +} + +multiclass vinsert_for_size_lowering p> { + let Predicates = p in { + def : Pat<(vinsert_insert:$ins + (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), + (To.VT (!cast(InstrStr#"rr") + To.RC:$src1, From.RC:$src2, + (INSERT_get_vinsert_imm To.RC:$ins)))>; - let mayLoad = 1 in - def rm : AVX512AIi8, - EVEX_4V, EVEX_V512, EVEX_CD8; - } -} - -multiclass vinsert_for_size : - vinsert_for_size_no_alt { - // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for - // vinserti32x4. Only add this if 64x2 and friends are not supported - // natively via AVX512DQ. - let Predicates = [NoDQI] in def : Pat<(vinsert_insert:$ins - (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)), - (AltTo.VT (!cast(NAME # From.EltSize # "x4rr") - VR512:$src1, From.RC:$src2, - (INSERT_get_vinsert_imm VR512:$ins)))>; + (To.VT To.RC:$src1), + (From.VT (bitconvert (From.LdFrag addr:$src2))), + (iPTR imm)), + (To.VT (!cast(InstrStr#"rm") + To.RC:$src1, addr:$src2, + (INSERT_get_vinsert_imm To.RC:$ins)))>; + } } multiclass vinsert_for_type { - defm NAME # "32x4" : vinsert_for_size, + X86VectorVTInfo< 8, EltVT32, VR256X>, + vinsert128_insert>, EVEX_V256; + + defm NAME # "32x4Z" : vinsert_for_size, X86VectorVTInfo<16, EltVT32, VR512>, - X86VectorVTInfo< 2, EltVT64, VR128X>, + vinsert128_insert>, EVEX_V512; + + defm NAME # "64x4Z" : vinsert_for_size, X86VectorVTInfo< 8, EltVT64, VR512>, - vinsert128_insert, - INSERT_get_vinsert128_imm>; - let Predicates = [HasDQI] in - defm NAME # "64x2" : vinsert_for_size_no_alt, VEX_W, EVEX_V512; + + let Predicates = [HasVLX, HasDQI] in + defm NAME # "64x2Z256" : vinsert_for_size, + X86VectorVTInfo< 4, EltVT64, VR256X>, + vinsert128_insert>, VEX_W, EVEX_V256; + + let Predicates = [HasDQI] in { + defm NAME # "64x2Z" : vinsert_for_size, X86VectorVTInfo< 8, EltVT64, VR512>, - vinsert128_insert, - INSERT_get_vinsert128_imm>, VEX_W; - defm NAME # "64x4" : vinsert_for_size, - X86VectorVTInfo< 8, EltVT64, VR512>, - X86VectorVTInfo< 8, EltVT32, VR256>, - X86VectorVTInfo<16, EltVT32, VR512>, - vinsert256_insert, - INSERT_get_vinsert256_imm>, VEX_W; - let Predicates = [HasDQI] in - defm NAME # "32x8" : vinsert_for_size_no_alt, - X86VectorVTInfo<16, EltVT32, VR512>, - vinsert256_insert, - INSERT_get_vinsert256_imm>; + vinsert128_insert>, VEX_W, EVEX_V512; + + defm NAME # "32x8Z" : vinsert_for_size, + X86VectorVTInfo<16, EltVT32, VR512>, + vinsert256_insert>, EVEX_V512; + } } defm VINSERTF : vinsert_for_type; defm VINSERTI : vinsert_for_type; +// Codegen pattern with the alternative types, +// Only add this if 64x2 and its friends are not supported natively via AVX512DQ. +defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>; + +defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>; + +defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>; +defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>; + +// Codegen pattern with the alternative types insert VEC128 into VEC256 +defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; +// Codegen pattern with the alternative types insert VEC128 into VEC512 +defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; +// Codegen pattern with the alternative types insert VEC256 into VEC512 +defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; +defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; + // vinsertps - insert f32 to XMM def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), @@ -482,90 +623,158 @@ def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), // AVX-512 VECTOR EXTRACT //--- +multiclass vextract_for_size_first_position_lowering { + // A subvector extract from the first vector position is + // a subregister copy that needs no instruction. + def NAME # To.NumElts: + Pat<(To.VT (extract_subvector (From.VT From.RC:$src),(iPTR 0))), + (To.VT (EXTRACT_SUBREG (From.VT From.RC:$src), To.SubRegIdx))>; +} + multiclass vextract_for_size { + X86VectorVTInfo From, X86VectorVTInfo To, + PatFrag vextract_extract> : + vextract_for_size_first_position_lowering { + let hasSideEffects = 0, ExeDomain = To.ExeDomain in { + // use AVX512_maskable_in_asm (AVX512_maskable can't be used due to + // vextract_extract), we interesting only in patterns without mask, + // intrinsics pattern match generated bellow. defm rr : AVX512_maskable_in_asm, - AVX512AIi8Base, EVEX, EVEX_V512; - let mayStore = 1 in - def rm : AVX512AIi8, EVEX, EVEX_V512, EVEX_CD8; - } - - // Codegen pattern with the alternative types, e.g. v8i64 -> v2i64 for - // vextracti32x4 - def : Pat<(vextract_extract:$ext (AltFrom.VT VR512:$src1), (iPTR imm)), - (AltTo.VT (!cast(NAME # To.EltSize # "x4rr") - VR512:$src1, - (EXTRACT_get_vextract_imm To.RC:$ext)))>; - - // A 128/256-bit subvector extract from the first 512-bit vector position is - // a subregister copy that needs no instruction. - def : Pat<(To.VT (extract_subvector (From.VT VR512:$src), (iPTR 0))), - (To.VT - (EXTRACT_SUBREG (From.VT VR512:$src), To.SubRegIdx))>; - - // And for the alternative types. - def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))), - (AltTo.VT - (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>; + AVX512AIi8Base, EVEX; + let mayStore = 1 in { + def rm : AVX512AIi8, EVEX; + + def rmk : AVX512AIi8, EVEX_K, EVEX; + }//mayStore = 1 + } // Intrinsic call with masking. def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x4_512") - VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask), - (!cast(NAME # To.EltSize # "x4rrk") To.RC:$src0, - (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)), - VR512:$src1, imm:$idx)>; + "x" # To.NumElts # "_" # From.Size) + From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask), + (!cast(NAME # To.EltSize # "x" # To.NumElts # + From.ZSuffix # "rrk") + To.RC:$src0, + (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM), + From.RC:$src1, imm:$idx)>; // Intrinsic call with zero-masking. def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x4_512") - VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask), - (!cast(NAME # To.EltSize # "x4rrkz") - (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)), - VR512:$src1, imm:$idx)>; + "x" # To.NumElts # "_" # From.Size) + From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask), + (!cast(NAME # To.EltSize # "x" # To.NumElts # + From.ZSuffix # "rrkz") + (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM), + From.RC:$src1, imm:$idx)>; // Intrinsic call without masking. def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x4_512") - VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)), - (!cast(NAME # To.EltSize # "x4rr") - VR512:$src1, imm:$idx)>; + "x" # To.NumElts # "_" # From.Size) + From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)), + (!cast(NAME # To.EltSize # "x" # To.NumElts # + From.ZSuffix # "rr") + From.RC:$src1, imm:$idx)>; +} + +// Codegen pattern for the alternative types +multiclass vextract_for_size_lowering p> : + vextract_for_size_first_position_lowering { + + let Predicates = p in + def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), + (To.VT (!cast(InstrStr#"rr") + From.RC:$src1, + (EXTRACT_get_vextract_imm To.RC:$ext)))>; } -multiclass vextract_for_type { - defm NAME # "32x4" : vextract_for_size { + defm NAME # "32x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT32, VR128X>, + vextract128_extract>, + EVEX_V512, EVEX_CD8<32, CD8VT4>; + defm NAME # "64x4Z" : vextract_for_size, + X86VectorVTInfo< 4, EltVT64, VR256X>, + vextract256_extract>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; + let Predicates = [HasVLX] in + defm NAME # "32x4Z256" : vextract_for_size, + X86VectorVTInfo< 4, EltVT32, VR128X>, + vextract128_extract>, + EVEX_V256, EVEX_CD8<32, CD8VT4>; + let Predicates = [HasVLX, HasDQI] in + defm NAME # "64x2Z256" : vextract_for_size, X86VectorVTInfo< 2, EltVT64, VR128X>, - vextract128_extract, - EXTRACT_get_vextract128_imm>; - defm NAME # "64x4" : vextract_for_size, + VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>; + let Predicates = [HasDQI] in { + defm NAME # "64x2Z" : vextract_for_size, - X86VectorVTInfo< 4, EltVT64, VR256X>, + X86VectorVTInfo< 2, EltVT64, VR128X>, + vextract128_extract>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; + defm NAME # "32x8Z" : vextract_for_size, - X86VectorVTInfo< 8, EltVT32, VR256>, - vextract256_extract, - EXTRACT_get_vextract256_imm>, VEX_W; + X86VectorVTInfo< 8, EltVT32, VR256X>, + vextract256_extract>, + EVEX_V512, EVEX_CD8<32, CD8VT8>; + } } defm VEXTRACTF : vextract_for_type; defm VEXTRACTI : vextract_for_type; +// extract_subvector codegen patterns with the alternative types. +// Only add this if 64x2 and its friends are not supported natively via AVX512DQ. +defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>; +defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>; + +defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, + vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>; +defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, + vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>; + +defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>; +defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>; + +// Codegen pattern with the alternative types extract VEC128 from VEC512 +defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; +defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; +// Codegen pattern with the alternative types extract VEC256 from VEC512 +defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, + vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; +defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, + vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; + // A 128-bit subvector insert to the first 512-bit vector position // is a subregister copy that needs no instruction. def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)), @@ -593,16 +802,20 @@ def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)), (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)), (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +def : Pat<(insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +def : Pat<(insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; // vextractps - extract 32 bits from XMM def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), - (ins VR128X:$src1, i32i8imm:$src2), + (ins VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, EVEX; def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), - (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2), + (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>; @@ -610,50 +823,49 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), //===---------------------------------------------------------------------===// // AVX-512 BROADCAST //--- -multiclass avx512_fp_broadcast opc, SDNode OpNode, RegisterClass SrcRC, - ValueType svt, X86VectorVTInfo _> { - defm r : AVX512_maskable, - T8PD, EVEX; - let mayLoad = 1 in { - defm m : AVX512_maskable, - T8PD, EVEX; - } +multiclass avx512_broadcast_rm opc, string OpcodeStr, + X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { + + defm r : AVX512_maskable, + T8PD, EVEX; + let mayLoad = 1 in + defm m : AVX512_maskable, + T8PD, EVEX, EVEX_CD8; } -multiclass avx512_fp_broadcast_vl opc, SDNode OpNode, - AVX512VLVectorVTInfo _> { - defm Z : avx512_fp_broadcast, +multiclass avx512_fp_broadcast_vl opc, string OpcodeStr, + AVX512VLVectorVTInfo _> { + defm Z : avx512_broadcast_rm, EVEX_V512; let Predicates = [HasVLX] in { - defm Z256 : avx512_fp_broadcast, - EVEX_V256; + defm Z256 : avx512_broadcast_rm, + EVEX_V256; } } let ExeDomain = SSEPackedSingle in { - defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast, - avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>; + defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, "vbroadcastss", + avx512vl_f32_info>; let Predicates = [HasVLX] in { - defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X, - v4f32, v4f32x_info>, EVEX_V128, - EVEX_CD8<32, CD8VT1>; + defm VBROADCASTSSZ128 : avx512_broadcast_rm<0x18, "vbroadcastss", + v4f32x_info, v4f32x_info>, EVEX_V128; } } let ExeDomain = SSEPackedDouble in { - defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast, - avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>; + defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, "vbroadcastsd", + avx512vl_f64_info>, VEX_W; } // avx512_broadcast_pat introduces patterns for broadcast with a scalar argument. -// Later, we can canonize broadcast instructions before ISel phase and +// Later, we can canonize broadcast instructions before ISel phase and // eliminate additional patterns on ISel. // SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar // representations of source @@ -735,12 +947,8 @@ def : Pat <(v8i64 (X86vzext VK8WM:$mask)), def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))), (VPBROADCASTDrZr GR32:$src)>; -def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))), - (VPBROADCASTDrZrkz VK16WM:$mask, GR32:$src)>; def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))), (VPBROADCASTQrZr GR64:$src)>; -def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))), - (VPBROADCASTQrZrkz VK8WM:$mask, GR64:$src)>; def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))), (VPBROADCASTDrZr GR32:$src)>; @@ -754,78 +962,145 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src), (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))), (VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>; -multiclass avx512_int_broadcast_rm opc, string OpcodeStr, - X86MemOperand x86memop, PatFrag ld_frag, - RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, - RegisterClass KRC> { - def rr : AVX5128I, EVEX; - def krr : AVX5128I, - EVEX, EVEX_KZ; - let mayLoad = 1 in { - def rm : AVX5128I, EVEX; - def krm : AVX5128I, EVEX, EVEX_KZ; - } -} - -defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem, - loadi32, VR512, v16i32, v4i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; -defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, - loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VT1>; - -multiclass avx512_int_subvec_broadcast_rm opc, string OpcodeStr, - X86MemOperand x86memop, PatFrag ld_frag, - RegisterClass KRC> { - let mayLoad = 1 in { - def rm : AVX5128I, EVEX; - def krm : AVX5128I, EVEX, EVEX_KZ; +// Provide aliases for broadcast from the same register class that +// automatically does the extract. +multiclass avx512_int_broadcast_rm_lowering { + def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))), + (!cast(NAME#DestInfo.ZSuffix#"r") + (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>; +} + +multiclass avx512_int_broadcast_rm_vl opc, string OpcodeStr, + AVX512VLVectorVTInfo _, Predicate prd> { + let Predicates = [prd] in { + defm Z : avx512_broadcast_rm, + avx512_int_broadcast_rm_lowering<_.info512, _.info256>, + EVEX_V512; + // Defined separately to avoid redefinition. + defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>; } + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_broadcast_rm, + avx512_int_broadcast_rm_lowering<_.info256, _.info256>, + EVEX_V256; + defm Z128 : avx512_broadcast_rm, + EVEX_V128; + } +} + +defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", + avx512vl_i8_info, HasBWI>; +defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", + avx512vl_i16_info, HasBWI>; +defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", + avx512vl_i32_info, HasAVX512>; +defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", + avx512vl_i64_info, HasAVX512>, VEX_W; + +multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, + X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { + let mayLoad = 1 in + defm rm : AVX512_maskable, + AVX5128IBase, EVEX; } -defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", - i128mem, loadv2i64, VK16WM>, +defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", + v16i32_info, v4i32x_info>, + EVEX_V512, EVEX_CD8<32, CD8VT4>; +defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", + v16f32_info, v4f32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT4>; -defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", - i256mem, loadv4i64, VK16WM>, VEX_W, +defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", + v8i64_info, v4i64x_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; +defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", + v8f64_info, v4f64x_info>, VEX_W, + EVEX_V512, EVEX_CD8<64, CD8VT4>; + +let Predicates = [HasVLX] in { +defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", + v8i32x_info, v4i32x_info>, + EVEX_V256, EVEX_CD8<32, CD8VT4>; +defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", + v8f32x_info, v4f32x_info>, + EVEX_V256, EVEX_CD8<32, CD8VT4>; +} +let Predicates = [HasVLX, HasDQI] in { +defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2", + v4i64x_info, v2i64x_info>, VEX_W, + EVEX_V256, EVEX_CD8<64, CD8VT2>; +defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2", + v4f64x_info, v2f64x_info>, VEX_W, + EVEX_V256, EVEX_CD8<64, CD8VT2>; +} +let Predicates = [HasDQI] in { +defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2", + v8i64_info, v2i64x_info>, VEX_W, + EVEX_V512, EVEX_CD8<64, CD8VT2>; +defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti32x8", + v16i32_info, v8i32x_info>, + EVEX_V512, EVEX_CD8<32, CD8VT8>; +defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2", + v8f64_info, v2f64x_info>, VEX_W, + EVEX_V512, EVEX_CD8<64, CD8VT2>; +defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8", + v16f32_info, v8f32x_info>, + EVEX_V512, EVEX_CD8<32, CD8VT8>; +} + +multiclass avx512_broadcast_32x2 opc, string OpcodeStr, + X86VectorVTInfo _Dst, X86VectorVTInfo _Src, + SDNode OpNode = X86SubVBroadcast> { + + defm r : AVX512_maskable, + T8PD, EVEX; + let mayLoad = 1 in + defm m : AVX512_maskable, + T8PD, EVEX, EVEX_CD8<_Src.EltSize, CD8VT2>; +} + +multiclass avx512_common_broadcast_32x2 opc, string OpcodeStr, + AVX512VLVectorVTInfo _> { + let Predicates = [HasDQI] in + defm Z : avx512_broadcast_32x2, + EVEX_V512; + let Predicates = [HasDQI, HasVLX] in + defm Z256 : avx512_broadcast_32x2, + EVEX_V256; +} + +multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, + AVX512VLVectorVTInfo _> : + avx512_common_broadcast_32x2 { + + let Predicates = [HasDQI, HasVLX] in + defm Z128 : avx512_broadcast_32x2, EVEX_V128; +} -def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))), - (VPBROADCASTDZrr VR128X:$src)>; -def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))), - (VPBROADCASTQZrr VR128X:$src)>; +defm VPBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", + avx512vl_i32_info>; +defm VPBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", + avx512vl_f32_info>; def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; +def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), + (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>; + def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; - -def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))), - (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; -def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))), - (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; +def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), + (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>; def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))), (VBROADCASTSSZr VR128X:$src)>; @@ -840,247 +1115,181 @@ def : Pat<(v8f64 (X86VBroadcast FR64X:$src)), (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; -let Predicates = [HasAVX512] in { -def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))), - (EXTRACT_SUBREG - (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - addr:$src)), sub_ymm)>; -} //===----------------------------------------------------------------------===// // AVX-512 BROADCAST MASK TO VECTOR REGISTER //--- - -multiclass avx512_mask_broadcast opc, string OpcodeStr, - RegisterClass KRC> { -let Predicates = [HasCDI] in -def Zrr : AVX512XS8I, EVEX, EVEX_V512; - -let Predicates = [HasCDI, HasVLX] in { -def Z128rr : AVX512XS8I opc, string OpcodeStr, + X86VectorVTInfo _, RegisterClass KRC> { + def rr : AVX512XS8I, EVEX, EVEX_V128; -def Z256rr : AVX512XS8I, EVEX, EVEX_V256; + [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX; } + +multiclass avx512_mask_broadcast opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { + let Predicates = [HasCDI] in + defm Z : avx512_mask_broadcastm, EVEX_V512; + let Predicates = [HasCDI, HasVLX] in { + defm Z256 : avx512_mask_broadcastm, EVEX_V256; + defm Z128 : avx512_mask_broadcastm, EVEX_V128; + } } -let Predicates = [HasCDI] in { defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", - VK16>; + avx512vl_i32_info, VK16>; defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", - VK8>, VEX_W; -} + avx512vl_i64_info, VK8>, VEX_W; //===----------------------------------------------------------------------===// -// AVX-512 - VPERM -// -// -- immediate form -- -multiclass avx512_perm_imm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { - def ri : AVX512AIi8, - EVEX; - def mi : AVX512AIi8, - EVEX, EVEX_CD8<_.EltSize, CD8VF>; +// -- VPERMI2 - 3 source operands form -- +multiclass avx512_perm_i opc, string OpcodeStr, + X86VectorVTInfo _, X86VectorVTInfo IdxVT> { +let Constraints = "$src1 = $dst" in { + defm rr: AVX512_maskable_3src_cast, EVEX_4V, + AVX5128IBase; + + let mayLoad = 1 in + defm rm: AVX512_maskable_3src_cast, + EVEX_4V, AVX5128IBase; + } +} +multiclass avx512_perm_i_mb opc, string OpcodeStr, + X86VectorVTInfo _, X86VectorVTInfo IdxVT> { + let mayLoad = 1, Constraints = "$src1 = $dst" in + defm rmb: AVX512_maskable_3src_cast, + AVX5128IBase, EVEX_4V, EVEX_B; +} + +multiclass avx512_perm_i_sizes opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo ShuffleMask> { + defm NAME: avx512_perm_i, + avx512_perm_i_mb, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#128: avx512_perm_i, + avx512_perm_i_mb, EVEX_V128; + defm NAME#256: avx512_perm_i, + avx512_perm_i_mb, EVEX_V256; + } } + +multiclass avx512_perm_i_sizes_w opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo Idx> { + let Predicates = [HasBWI] in + defm NAME: avx512_perm_i, EVEX_V512; + let Predicates = [HasBWI, HasVLX] in { + defm NAME#128: avx512_perm_i, EVEX_V128; + defm NAME#256: avx512_perm_i, EVEX_V256; + } } -multiclass avx512_permil OpcImm, bits<8> OpcVar, X86VectorVTInfo _, - X86VectorVTInfo Ctrl> : - avx512_perm_imm { - let ExeDomain = _.ExeDomain in { - def rr : AVX5128I, - EVEX_4V; - def rm : AVX5128I, - EVEX_4V; - } -} - -defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>, - EVEX_V512, VEX_W; -defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>, - EVEX_V512, VEX_W; - -defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>, - EVEX_V512; -defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>, - EVEX_V512, VEX_W; - -def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), - (VPERMILPSZri VR512:$src1, imm:$imm)>; -def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), - (VPERMILPDZri VR512:$src1, imm:$imm)>; - -// -- VPERM - register form -- -multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { - - def rr : AVX5128I, EVEX_4V; - - def rm : AVX5128I, - EVEX_4V; -} - -defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedSingle in -defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -// -- VPERM2I - 3 source operands form -- -multiclass avx512_perm_3src opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, - SDNode OpNode, ValueType OpVT, RegisterClass KRC> { +defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", + avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", + avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2W : avx512_perm_i_sizes_w<0x75, "vpermi2w", + avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", + avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", + avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; + +// VPERMT2 +multiclass avx512_perm_t opc, string OpcodeStr, + X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst" in { - def rr : AVX5128I, - EVEX_4V; + defm rr: AVX512_maskable_3src, EVEX_4V, + AVX5128IBase; - def rrk : AVX5128I, - EVEX_4V, EVEX_K; + let mayLoad = 1 in + defm rm: AVX512_maskable_3src, + EVEX_4V, AVX5128IBase; + } +} +multiclass avx512_perm_t_mb opc, string OpcodeStr, + X86VectorVTInfo _, X86VectorVTInfo IdxVT> { + let mayLoad = 1, Constraints = "$src1 = $dst" in + defm rmb: AVX512_maskable_3src, + AVX5128IBase, EVEX_4V, EVEX_B; +} + +multiclass avx512_perm_t_sizes opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo ShuffleMask> { + defm NAME: avx512_perm_t, + avx512_perm_t_mb, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#128: avx512_perm_t, + avx512_perm_t_mb, EVEX_V128; + defm NAME#256: avx512_perm_t, + avx512_perm_t_mb, EVEX_V256; + } +} - let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> - def rrkz : AVX5128I, - EVEX_4V, EVEX_KZ; - - def rm : AVX5128I, EVEX_4V; - - def rmk : AVX5128I, - EVEX_4V, EVEX_K; - - let AddedComplexity = 10 in // Prefer over the rrkz variant - def rmkz : AVX5128I, - EVEX_4V, EVEX_KZ; - } -} -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, - i512mem, X86VPermiv3, v16i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, - i512mem, X86VPermiv3, v8i64, VK8WM>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, - i512mem, X86VPermiv3, v16f32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, - i512mem, X86VPermiv3, v8f64, VK8WM>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_perm_table_3src opc, string Suffix, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, - SDNode OpNode, ValueType OpVT, RegisterClass KRC, - ValueType MaskVT, RegisterClass MRC> : - avx512_perm_3src { - def : Pat<(OpVT (!cast("int_x86_avx512_mask_vpermt_"##Suffix##"_512") - VR512:$idx, VR512:$src1, VR512:$src2, -1)), - (!cast(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>; - - def : Pat<(OpVT (!cast("int_x86_avx512_mask_vpermt_"##Suffix##"_512") - VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)), - (!cast(NAME#rrk) VR512:$src1, - (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>; -} - -defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem, - X86VPermv3, v16i32, VK16WM, v16i1, GR16>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem, - X86VPermv3, v8i64, VK8WM, v8i1, GR8>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem, - X86VPermv3, v16f32, VK16WM, v16i1, GR16>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem, - X86VPermv3, v8f64, VK8WM, v8i1, GR8>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +multiclass avx512_perm_t_sizes_w opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo Idx> { + let Predicates = [HasBWI] in + defm NAME: avx512_perm_t, EVEX_V512; + let Predicates = [HasBWI, HasVLX] in { + defm NAME#128: avx512_perm_t, EVEX_V128; + defm NAME#256: avx512_perm_t, EVEX_V256; + } +} + +defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", + avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", + avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMT2W : avx512_perm_t_sizes_w<0x7D, "vpermt2w", + avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", + avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", + avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask @@ -1197,37 +1406,85 @@ def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), //===----------------------------------------------------------------------===// // avx512_cmp_scalar - AVX512 CMPSS and CMPSD -multiclass avx512_cmp_scalar { - def rr : AVX512Ii8<0xC2, MRMSrcReg, - (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, - [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], - IIC_SSE_ALU_F32S_RR>, EVEX_4V; - def rm : AVX512Ii8<0xC2, MRMSrcMem, - (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, - [(set VK1:$dst, (OpNode (VT RC:$src1), - (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + +multiclass avx512_cmp_scalar{ + + defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)>, EVEX_4V; + let mayLoad = 1 in + defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))), + imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + + defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "{sae}, $src2, $src1", "$src1, $src2,{sae}", + (OpNodeRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B; + // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { - def rri_alt : AVX512Ii8<0xC2, MRMSrcReg, - (outs VK1:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), - asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V; + defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs VK1:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V; + defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + + defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">, + EVEX_4V, EVEX_B; + }// let isAsmParserOnly = 1, hasSideEffects = 0 + + let isCodeGenOnly = 1 in { + def rr : AVX512Ii8<0xC2, MRMSrcReg, + (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc), + !strconcat("vcmp${cc}", _.Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.KRC:$dst, (OpNode _.FRC:$src1, + _.FRC:$src2, + imm:$cc))], + IIC_SSE_ALU_F32S_RR>, EVEX_4V; let mayLoad = 1 in - def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem, - (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), - asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + def rm : AVX512Ii8<0xC2, MRMSrcMem, + (outs _.KRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), + !strconcat("vcmp${cc}", _.Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.KRC:$dst, (OpNode _.FRC:$src1, + (_.ScalarLdFrag addr:$src2), + imm:$cc))], + IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; } } let Predicates = [HasAVX512] in { -defm VCMPSSZ : avx512_cmp_scalar, - XS; -defm VCMPSDZ : avx512_cmp_scalar, - XD, VEX_W; + defm VCMPSSZ : avx512_cmp_scalar, + AVX512XSIi8Base; + defm VCMPSDZ : avx512_cmp_scalar, + AVX512XDIi8Base, VEX_W; } multiclass avx512_icmp_packed opc, string OpcodeStr, SDNode OpNode, @@ -1362,7 +1619,7 @@ def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, X86VectorVTInfo _> { def rri : AVX512AIi8 opc, string Suffix, SDNode OpNode, IIC_SSE_ALU_F32P_RR>, EVEX_4V; let mayLoad = 1 in def rmi : AVX512AIi8 opc, string Suffix, SDNode OpNode, IIC_SSE_ALU_F32P_RM>, EVEX_4V; def rrik : AVX512AIi8 opc, string Suffix, SDNode OpNode, let mayLoad = 1 in def rmik : AVX512AIi8 opc, string Suffix, SDNode OpNode, avx512_icmp_cc { def rmib : AVX512AIi8 opc, string Suffix, SDNode OpNode, IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B; def rmibk : AVX512AIi8, VEX_W, EVEX_CD8<64, CD8VF>; -// avx512_cmp_packed - compare packed instructions -multiclass avx512_cmp_packed { - def rri : AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>; - let hasSideEffects = 0 in - def rrib: AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", suffix, - "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"), - [], d>, EVEX_B; - def rmi : AVX512PIi8<0xC2, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"), - [(set KRC:$dst, - (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>; +multiclass avx512_vcmp_common { + + defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (X86cmpm (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)>; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (X86cmpm (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)>; + + defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr, + (X86cmpm (_.VT _.RC:$src1), + (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), + imm:$cc)>,EVEX_B; + } // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { - def rri_alt : AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), - !strconcat("vcmp", suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; - let mayLoad = 1 in - def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), - !strconcat("vcmp", suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; + defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">; + + let mayLoad = 1 in { + defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">; + + defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, ${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B; + } + } +} + +multiclass avx512_vcmp_sae { + // comparison code form (VCMP[EQ/LT/LE/...] + defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "{sae}, $src2, $src1", "$src1, $src2,{sae}", + (X86cmpmRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))>, EVEX_B; + + let isAsmParserOnly = 1, hasSideEffects = 0 in { + defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc,{sae}, $src2, $src1", + "$src1, $src2,{sae}, $cc">, EVEX_B; + } +} + +multiclass avx512_vcmp { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcmp_common<_.info512>, + avx512_vcmp_sae<_.info512>, EVEX_V512; + + } + let Predicates = [HasAVX512,HasVLX] in { + defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128; + defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256; } } -defm VCMPPSZ : avx512_cmp_packed, PS, EVEX_4V, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VCMPPDZ : avx512_cmp_packed, PD, EVEX_4V, VEX_W, EVEX_V512, - EVEX_CD8<64, CD8VF>; +defm VCMPPD : avx512_vcmp, + AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VCMPPS : avx512_vcmp, + AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), (COPY_TO_REGCLASS (VCMPPSZrri @@ -1579,30 +1885,129 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), imm:$cc), VK8)>; -def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1), - FROUND_NO_EXC)), - (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR16)>; - -def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1), - FROUND_NO_EXC)), - (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR8)>; - -def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1), - FROUND_CURRENT)), - (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR16)>; - -def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1), - FROUND_CURRENT)), - (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2, - (I8Imm imm:$cc)), GR8)>; +// ---------------------------------------------------------------- +// FPClass +//handle fpclass instruction mask = op(reg_scalar,imm) +// op(mem_scalar,imm) +multiclass avx512_scalar_fpclass opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, Predicate prd> { + let Predicates = [prd] in { + def rr : AVX512; + def rrk : AVX512, EVEX_K; + let mayLoad = 1, AddedComplexity = 20 in { + def rm : AVX512; + def rmk : AVX512, EVEX_K; + } + } +} + +//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) +// fpclass(reg_vec, mem_vec, imm) +// fpclass(reg_vec, broadcast(eltVt), imm) +multiclass avx512_vector_fpclass opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, string mem, string broadcast>{ + def rr : AVX512; + def rrk : AVX512, EVEX_K; + let mayLoad = 1 in { + def rm : AVX512; + def rmk : AVX512, EVEX_K; + def rmb : AVX512,EVEX_B; + def rmbk : AVX512, + EVEX_B, EVEX_K; + } +} + +multiclass avx512_vector_fpclass_all opc, SDNode OpNode, Predicate prd, + string broadcast>{ + let Predicates = [prd] in { + defm Z : avx512_vector_fpclass, EVEX_V512; + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_vector_fpclass, EVEX_V128; + defm Z256 : avx512_vector_fpclass, EVEX_V256; + } +} +multiclass avx512_fp_fpclass_all opcVec, + bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{ + defm PS : avx512_vector_fpclass_all, EVEX_CD8<32, CD8VF>; + defm PD : avx512_vector_fpclass_all,EVEX_CD8<64, CD8VF> , VEX_W; + defm SS : avx512_scalar_fpclass, EVEX_CD8<32, CD8VT1>; + defm SD : avx512_scalar_fpclass, EVEX_CD8<64, CD8VT1>, VEX_W; +} + +defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass, + X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX; + +//----------------------------------------------------------------- // Mask register copy, including // - copy between mask registers // - load/store mask registers @@ -1610,17 +2015,18 @@ def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), // multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, string OpcodeStr, RegisterClass KRC, - ValueType vvt, ValueType ivt, X86MemOperand x86memop> { + ValueType vvt, X86MemOperand x86memop> { let hasSideEffects = 0 in { def kk : I; let mayLoad = 1 in def km : I; + [(set KRC:$dst, (vvt (load addr:$src)))]>; let mayStore = 1 in def mk : I; + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(store KRC:$src, addr:$dst)]>; } } @@ -1636,27 +2042,25 @@ multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, } let Predicates = [HasDQI] in - defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8, - i8mem>, + defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, VEX, PD; let Predicates = [HasAVX512] in - defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16, - i16mem>, + defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, VEX, PS; let Predicates = [HasBWI] in { - defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32, - i32mem>, VEX, PD, VEX_W; + defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, + VEX, PD, VEX_W; defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, VEX, XD; } let Predicates = [HasBWI] in { - defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64, - i64mem>, VEX, PS, VEX_W; + defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, + VEX, PS, VEX_W; defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, VEX, XD, VEX_W; } @@ -1687,24 +2091,41 @@ let Predicates = [HasBWI] in { let Predicates = [HasDQI] in { def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst), (KMOVBmk addr:$dst, VK8:$src)>; + def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), + (KMOVBkm addr:$src)>; + + def : Pat<(store VK4:$src, addr:$dst), + (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>; + def : Pat<(store VK2:$src, addr:$dst), + (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>; } -let Predicates = [HasAVX512] in { - def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst), - (KMOVWmk addr:$dst, VK16:$src)>; +let Predicates = [HasAVX512, NoDQI] in { def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst), (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>; - def : Pat<(i1 (load addr:$src)), - (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>; def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>; } +let Predicates = [HasAVX512] in { + def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst), + (KMOVWmk addr:$dst, VK16:$src)>; + def : Pat<(i1 (load addr:$src)), + (COPY_TO_REGCLASS (AND16ri (i16 (SUBREG_TO_REG (i32 0), + (MOV8rm addr:$src), sub_8bit)), + (i16 1)), VK1)>; + def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))), + (KMOVWkm addr:$src)>; +} let Predicates = [HasBWI] in { def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst), (KMOVDmk addr:$dst, VK32:$src)>; + def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))), + (KMOVDkm addr:$src)>; } let Predicates = [HasBWI] in { def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst), (KMOVQmk addr:$dst, VK64:$src)>; + def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))), + (KMOVQkm addr:$src)>; } let Predicates = [HasAVX512] in { @@ -1726,10 +2147,17 @@ let Predicates = [HasAVX512] in { def : Pat<(i32 (zext VK1:$src)), (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>; + def : Pat<(i32 (anyext VK1:$src)), + (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>; + def : Pat<(i8 (zext VK1:$src)), (EXTRACT_SUBREG (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>; + def : Pat<(i8 (anyext VK1:$src)), + (EXTRACT_SUBREG + (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>; + def : Pat<(i64 (zext VK1:$src)), (AND64ri8 (SUBREG_TO_REG (i64 0), (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>; @@ -1737,31 +2165,34 @@ let Predicates = [HasAVX512] in { (EXTRACT_SUBREG (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_16bit)>; - def : Pat<(v16i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK16)>; - def : Pat<(v8i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK8)>; -} -let Predicates = [HasBWI] in { - def : Pat<(v32i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK32)>; - def : Pat<(v64i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK64)>; } +def : Pat<(v16i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK16)>; +def : Pat<(v8i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK8)>; +def : Pat<(v4i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK4)>; +def : Pat<(v2i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK2)>; +def : Pat<(v32i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK32)>; +def : Pat<(v64i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK64)>; // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. -let Predicates = [HasAVX512] in { +let Predicates = [HasAVX512, NoDQI] in { // GR from/to 8-bit mask without native support def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), (COPY_TO_REGCLASS - (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), - VK8)>; + (KMOVWkr (MOVZX32rr8 GR8 :$src)), VK8)>; def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), sub_8bit)>; +} +let Predicates = [HasAVX512] in { def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))), (COPY_TO_REGCLASS VK16:$src, VK1)>; def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))), @@ -1818,21 +2249,24 @@ let Predicates = [HasBWI] in def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>; // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit -let Predicates = [HasAVX512] in { +let Predicates = [HasAVX512, NoDQI] in { def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>; - def : Pat<(not VK8:$src), (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; } +def : Pat<(xor VK4:$src1, (v4i1 immAllOnesV)), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src1, VK16)), VK4)>; +def : Pat<(xor VK2:$src1, (v2i1 immAllOnesV)), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src1, VK16)), VK2)>; // Mask binary operation // - KAND, KANDN, KOR, KXNOR, KXOR multiclass avx512_mask_binop opc, string OpcodeStr, RegisterClass KRC, SDPatternOperator OpNode, - Predicate prd> { - let Predicates = [prd] in + Predicate prd, bit IsCommutable> { + let Predicates = [prd], isCommutable = IsCommutable in def rr : I opc, string OpcodeStr, } multiclass avx512_mask_binop_all opc, string OpcodeStr, - SDPatternOperator OpNode> { + SDPatternOperator OpNode, bit IsCommutable, + Predicate prdW = HasAVX512> { defm B : avx512_mask_binop, VEX_4V, VEX_L, PD; + HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; defm W : avx512_mask_binop, VEX_4V, VEX_L, PS; + prdW, IsCommutable>, VEX_4V, VEX_L, PS; defm D : avx512_mask_binop, VEX_4V, VEX_L, VEX_W, PD; + HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; defm Q : avx512_mask_binop, VEX_4V, VEX_L, VEX_W, PS; + HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; } def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; -let isCommutable = 1 in { - defm KAND : avx512_mask_binop_all<0x41, "kand", and>; - defm KOR : avx512_mask_binop_all<0x45, "kor", or>; - defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>; - defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>; -} -let isCommutable = 0 in - defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>; - -def : Pat<(xor VK1:$src1, VK1:$src2), - (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16), - (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; - -def : Pat<(or VK1:$src1, VK1:$src2), - (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16), - (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; - -def : Pat<(and VK1:$src1, VK1:$src2), - (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16), - (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; +defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>; +defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>; +defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>; +defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>; +defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>; +defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>; multiclass avx512_mask_binop_int { let Predicates = [HasAVX512] in @@ -1890,13 +2311,28 @@ defm : avx512_mask_binop_int<"kor", "KOR">; defm : avx512_mask_binop_int<"kxnor", "KXNOR">; defm : avx512_mask_binop_int<"kxor", "KXOR">; -// With AVX-512, 8-bit mask is promoted to 16-bit mask. multiclass avx512_binop_pat { - let Predicates = [HasAVX512] in - def : Pat<(OpNode VK8:$src1, VK8:$src2), - (COPY_TO_REGCLASS - (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), - (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; + // With AVX512F, 8-bit mask is promoted to 16-bit mask, + // for the DQI set, this type is legal and KxxxB instruction is used + let Predicates = [NoDQI] in + def : Pat<(OpNode VK8:$src1, VK8:$src2), + (COPY_TO_REGCLASS + (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), + (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; + + // All types smaller than 8 bits require conversion anyway + def : Pat<(OpNode VK1:$src1, VK1:$src2), + (COPY_TO_REGCLASS (Inst + (COPY_TO_REGCLASS VK1:$src1, VK16), + (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; + def : Pat<(OpNode VK2:$src1, VK2:$src2), + (COPY_TO_REGCLASS (Inst + (COPY_TO_REGCLASS VK2:$src1, VK16), + (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>; + def : Pat<(OpNode VK4:$src1, VK4:$src2), + (COPY_TO_REGCLASS (Inst + (COPY_TO_REGCLASS VK4:$src1, VK16), + (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>; } defm : avx512_binop_pat; @@ -1905,55 +2341,75 @@ defm : avx512_binop_pat; defm : avx512_binop_pat; defm : avx512_binop_pat; -// Mask unpacking -multiclass avx512_mask_unpck opc, string OpcodeStr, - RegisterClass KRC> { - let Predicates = [HasAVX512] in - def rr : I; -} +def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)), + (KXNORWrr VK16:$src1, VK16:$src2)>; +def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)), + (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>; +def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)), + (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>; +def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)), + (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>; -multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { - defm BW : avx512_mask_unpck, - VEX_4V, VEX_L, PD; -} +let Predicates = [NoDQI] in +def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)), + (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK8:$src1, VK16), + (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; -defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">; -def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))), - (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16), - (COPY_TO_REGCLASS VK8:$src1, VK16))>; +def : Pat<(xor (xor VK4:$src1, VK4:$src2), (v4i1 immAllOnesV)), + (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK4:$src1, VK16), + (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; +def : Pat<(xor (xor VK2:$src1, VK2:$src2), (v2i1 immAllOnesV)), + (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK2:$src1, VK16), + (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; -multiclass avx512_mask_unpck_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_avx512_"##IntName##"_bw") - (i16 GR16:$src1), (i16 GR16:$src2)), - (COPY_TO_REGCLASS (!cast(InstName##"BWrr") - (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)), - (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>; +def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)), + (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK1:$src1, VK16), + (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; + +// Mask unpacking +multiclass avx512_mask_unpck { + let Predicates = [prd] in { + def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst), + (ins KRC:$src1, KRC:$src2), + "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V, VEX_L; + + def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)), + (!cast(NAME##rr) + (COPY_TO_REGCLASS KRCSrc:$src2, KRC), + (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>; + } } -defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; + +defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD; +defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS; +defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W; // Mask bit testing multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, - SDNode OpNode> { - let Predicates = [HasAVX512], Defs = [EFLAGS] in + SDNode OpNode, Predicate prd> { + let Predicates = [prd], Defs = [EFLAGS] in def rr : I; } -multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode> { - defm W : avx512_mask_testop, - VEX, PS; +multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode, + Predicate prdW = HasAVX512> { + defm B : avx512_mask_testop, + VEX, PD; + defm W : avx512_mask_testop, + VEX, PS; + defm Q : avx512_mask_testop, + VEX, PS, VEX_W; + defm D : avx512_mask_testop, + VEX, PD, VEX_W; } defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>; - -def : Pat<(X86cmp VK1:$src1, (i1 0)), - (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16), - (COPY_TO_REGCLASS VK1:$src1, VK16))>; +defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>; // Mask shift multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, @@ -1968,7 +2424,17 @@ multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, SDNode OpNode> { defm W : avx512_mask_shiftop, - VEX, TAPD, VEX_W; + VEX, TAPD, VEX_W; + let Predicates = [HasDQI] in + defm B : avx512_mask_shiftop, + VEX, TAPD; + let Predicates = [HasBWI] in { + defm Q : avx512_mask_shiftop, + VEX, TAPD, VEX_W; + let Predicates = [HasDQI] in + defm D : avx512_mask_shiftop, + VEX, TAPD; + } } defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>; @@ -1985,6 +2451,8 @@ multiclass avx512_mask_setop { multiclass avx512_mask_setop_w { defm B : avx512_mask_setop; defm W : avx512_mask_setop; + defm D : avx512_mask_setop; + defm Q : avx512_mask_setop; } defm KSET0 : avx512_mask_setop_w; @@ -1994,9 +2462,11 @@ defm KSET1 : avx512_mask_setop_w; let Predicates = [HasAVX512] in { def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; + def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; + def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>; - def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>; - def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>; + def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>; + def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>; } def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))), (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>; @@ -2007,193 +2477,247 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; -let Predicates = [HasVLX] in { - def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))), - (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>; - def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))), - (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>; - def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))), - (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>; - def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))), - (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>; -} +def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 0))), + (v16i1 (COPY_TO_REGCLASS VK32:$src, VK16))>; + +def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))), + (v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>; + +def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))), + (v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>; + +def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))), + (v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>; + +def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))), + (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>; + +def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))), + (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>; + +def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))), + (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>; + +def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))), + (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>; +def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))), + (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>; + +def : Pat<(v32i1 (insert_subvector undef, VK2:$src, (iPTR 0))), + (v32i1 (COPY_TO_REGCLASS VK2:$src, VK32))>; +def : Pat<(v32i1 (insert_subvector undef, VK4:$src, (iPTR 0))), + (v32i1 (COPY_TO_REGCLASS VK4:$src, VK32))>; +def : Pat<(v32i1 (insert_subvector undef, VK8:$src, (iPTR 0))), + (v32i1 (COPY_TO_REGCLASS VK8:$src, VK32))>; +def : Pat<(v32i1 (insert_subvector undef, VK16:$src, (iPTR 0))), + (v32i1 (COPY_TO_REGCLASS VK16:$src, VK32))>; + +def : Pat<(v64i1 (insert_subvector undef, VK2:$src, (iPTR 0))), + (v64i1 (COPY_TO_REGCLASS VK2:$src, VK64))>; +def : Pat<(v64i1 (insert_subvector undef, VK4:$src, (iPTR 0))), + (v64i1 (COPY_TO_REGCLASS VK4:$src, VK64))>; +def : Pat<(v64i1 (insert_subvector undef, VK8:$src, (iPTR 0))), + (v64i1 (COPY_TO_REGCLASS VK8:$src, VK64))>; +def : Pat<(v64i1 (insert_subvector undef, VK16:$src, (iPTR 0))), + (v64i1 (COPY_TO_REGCLASS VK16:$src, VK64))>; +def : Pat<(v64i1 (insert_subvector undef, VK32:$src, (iPTR 0))), + (v64i1 (COPY_TO_REGCLASS VK32:$src, VK64))>; + def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))), - (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>; + (v8i1 (COPY_TO_REGCLASS + (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), + (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>; def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))), - (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>; + (v8i1 (COPY_TO_REGCLASS + (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), + (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>; + +def : Pat<(v4i1 (X86vshli VK4:$src, (i8 imm:$imm))), + (v4i1 (COPY_TO_REGCLASS + (KSHIFTLWri (COPY_TO_REGCLASS VK4:$src, VK16), + (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>; + +def : Pat<(v4i1 (X86vsrli VK4:$src, (i8 imm:$imm))), + (v4i1 (COPY_TO_REGCLASS + (KSHIFTRWri (COPY_TO_REGCLASS VK4:$src, VK16), + (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>; + //===----------------------------------------------------------------------===// // AVX-512 - Aligned and unaligned load and store // -multiclass avx512_load opc, string OpcodeStr, PatFrag ld_frag, - RegisterClass KRC, RegisterClass RC, - ValueType vt, ValueType zvt, X86MemOperand memop, - Domain d, bit IsReMaterializable = 1> { -let hasSideEffects = 0 in { - def rr : AVX512PI opc, string OpcodeStr, X86VectorVTInfo _, + PatFrag ld_frag, PatFrag mload, + bit IsReMaterializable = 1> { + let hasSideEffects = 0 in { + def rr : AVX512PI, EVEX; - def rrkz : AVX512PI, EVEX; + def rrkz : AVX512PI, EVEX, EVEX_KZ; - } + "${dst} {${mask}} {z}, $src}"), [], _.ExeDomain>, + EVEX, EVEX_KZ; + let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable, SchedRW = [WriteLoad] in - def rm : AVX512PI, EVEX; - - let AddedComplexity = 20 in { - let Constraints = "$src0 = $dst", hasSideEffects = 0 in { - let hasSideEffects = 0 in - def rrk : AVX512PI, EVEX, EVEX_K; + [(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))], + _.ExeDomain>, EVEX; + + let Constraints = "$src0 = $dst" in { + def rrk : AVX512PI, + EVEX, EVEX_K; let mayLoad = 1, SchedRW = [WriteLoad] in - def rmk : AVX512PI, EVEX, EVEX_K; + [(set _.RC:$dst, (_.VT + (vselect _.KRCWM:$mask, + (_.VT (bitconvert (ld_frag addr:$src1))), + (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K; } let mayLoad = 1, SchedRW = [WriteLoad] in - def rmkz : AVX512PI, EVEX, EVEX_KZ; + def rmkz : AVX512PI, EVEX, EVEX_KZ; } + def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), + (!cast(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; + + def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), + (!cast(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; + + def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), + (!cast(NAME#_.ZSuffix##rmk) _.RC:$src0, + _.KRCWM:$mask, addr:$ptr)>; } -multiclass avx512_load_vl opc, string OpcodeStr, string ld_pat, - string elty, string elsz, string vsz512, - string vsz256, string vsz128, Domain d, - Predicate prd, bit IsReMaterializable = 1> { +multiclass avx512_alignedload_vl opc, string OpcodeStr, + AVX512VLVectorVTInfo _, + Predicate prd, + bit IsReMaterializable = 1> { let Predicates = [prd] in - defm Z : avx512_load(ld_pat##"v"##vsz512##elty##elsz), - !cast("VK"##vsz512##"WM"), VR512, - !cast("v"##vsz512##elty##elsz), v16i32, - !cast(elty##"512mem"), d, - IsReMaterializable>, EVEX_V512; + defm Z : avx512_load, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_load(ld_pat##!if(!eq(elty,"f"), - "v"##vsz256##elty##elsz, "v4i64")), - !cast("VK"##vsz256##"WM"), VR256X, - !cast("v"##vsz256##elty##elsz), v8i32, - !cast(elty##"256mem"), d, - IsReMaterializable>, EVEX_V256; - - defm Z128 : avx512_load(ld_pat##!if(!eq(elty,"f"), - "v"##vsz128##elty##elsz, "v2i64")), - !cast("VK"##vsz128##"WM"), VR128X, - !cast("v"##vsz128##elty##elsz), v4i32, - !cast(elty##"128mem"), d, - IsReMaterializable>, EVEX_V128; - } -} - - -multiclass avx512_store opc, string OpcodeStr, PatFrag st_frag, - ValueType OpVT, RegisterClass KRC, RegisterClass RC, - X86MemOperand memop, Domain d> { - let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { - def rr_alt : AVX512PI, - EVEX; - let Constraints = "$src1 = $dst" in - def rrk_alt : AVX512PI, - EVEX, EVEX_K; - def rrkz_alt : AVX512PI, EVEX, EVEX_KZ; + defm Z256 : avx512_load, EVEX_V256; + defm Z128 : avx512_load, EVEX_V128; + } +} + +multiclass avx512_load_vl opc, string OpcodeStr, + AVX512VLVectorVTInfo _, + Predicate prd, + bit IsReMaterializable = 1> { + let Predicates = [prd] in + defm Z : avx512_load, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_load, EVEX_V256; + defm Z128 : avx512_load, EVEX_V128; } +} + +multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, + PatFrag st_frag, PatFrag mstore> { + + def rr_REV : AVX512PI, EVEX; + def rrk_REV : AVX512PI, EVEX, EVEX_K; + def rrkz_REV : AVX512PI, EVEX, EVEX_KZ; + let mayStore = 1 in { - def mr : AVX512PI, EVEX; + [(st_frag (_.VT _.RC:$src), addr:$dst)], _.ExeDomain>, EVEX; def mrk : AVX512PI, EVEX, EVEX_K; + (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), + OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", + [], _.ExeDomain>, EVEX, EVEX_K; } + + def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)), + (!cast(NAME#_.ZSuffix##mrk) addr:$ptr, + _.KRCWM:$mask, _.RC:$src)>; } -multiclass avx512_store_vl opc, string OpcodeStr, string st_pat, - string st_suff_512, string st_suff_256, - string st_suff_128, string elty, string elsz, - string vsz512, string vsz256, string vsz128, - Domain d, Predicate prd> { +multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, + AVX512VLVectorVTInfo _, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_store(st_pat##st_suff_512), - !cast("v"##vsz512##elty##elsz), - !cast("VK"##vsz512##"WM"), VR512, - !cast(elty##"512mem"), d>, EVEX_V512; + defm Z : avx512_store, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_store(st_pat##st_suff_256), - !cast("v"##vsz256##elty##elsz), - !cast("VK"##vsz256##"WM"), VR256X, - !cast(elty##"256mem"), d>, EVEX_V256; + defm Z256 : avx512_store, EVEX_V256; + defm Z128 : avx512_store, EVEX_V128; + } +} + +multiclass avx512_alignedstore_vl opc, string OpcodeStr, + AVX512VLVectorVTInfo _, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_store, EVEX_V512; - defm Z128 : avx512_store(st_pat##st_suff_128), - !cast("v"##vsz128##elty##elsz), - !cast("VK"##vsz128##"WM"), VR128X, - !cast(elty##"128mem"), d>, EVEX_V128; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_store, EVEX_V256; + defm Z128 : avx512_store, EVEX_V128; } } -defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32", - "16", "8", "4", SSEPackedSingle, HasAVX512>, - avx512_store_vl<0x29, "vmovaps", "alignedstore", - "512", "256", "", "f", "32", "16", "8", "4", - SSEPackedSingle, HasAVX512>, - PS, EVEX_CD8<32, CD8VF>; +defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, + HasAVX512>, + avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, + HasAVX512>, PS, EVEX_CD8<32, CD8VF>; + +defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, + HasAVX512>, + avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, + HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64", - "8", "4", "2", SSEPackedDouble, HasAVX512>, - avx512_store_vl<0x29, "vmovapd", "alignedstore", - "512", "256", "", "f", "64", "8", "4", "2", - SSEPackedDouble, HasAVX512>, - PD, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32", - "16", "8", "4", SSEPackedSingle, HasAVX512>, - avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32", - "16", "8", "4", SSEPackedSingle, HasAVX512>, +defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512>, + avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>, PS, EVEX_CD8<32, CD8VF>; -defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64", - "8", "4", "2", SSEPackedDouble, HasAVX512, 0>, - avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64", - "8", "4", "2", SSEPackedDouble, HasAVX512>, - PD, VEX_W, EVEX_CD8<64, CD8VF>; +defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>, + avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>, + PD, VEX_W, EVEX_CD8<64, CD8VF>; def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr, (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)), @@ -2237,82 +2761,30 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src), (VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src)>; -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), - (VMOVUPSZmrk addr:$ptr, - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; - -def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; - -def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src)), - (VMOVUPSZmrk addr:$ptr, VK16WM:$mask, VR512:$src)>; - -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src)), - (VMOVUPDZmrk addr:$ptr, VK8WM:$mask, VR512:$src)>; - -def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, undef)), - (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, - (bc_v16f32 (v16i32 immAllZerosV)))), - (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src0))), - (VMOVUPSZrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, - (bc_v8f64 (v16i32 immAllZerosV)))), - (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src0))), - (VMOVUPDZrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm), - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; +defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, + HasAVX512>, + avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, + HasAVX512>, PD, EVEX_CD8<32, CD8VF>; -defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32", - "16", "8", "4", SSEPackedInt, HasAVX512>, - avx512_store_vl<0x7F, "vmovdqa32", "alignedstore", - "512", "256", "", "i", "32", "16", "8", "4", - SSEPackedInt, HasAVX512>, - PD, EVEX_CD8<32, CD8VF>; +defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, + HasAVX512>, + avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, + HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64", - "8", "4", "2", SSEPackedInt, HasAVX512>, - avx512_store_vl<0x7F, "vmovdqa64", "alignedstore", - "512", "256", "", "i", "64", "8", "4", "2", - SSEPackedInt, HasAVX512>, - PD, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8", - "64", "32", "16", SSEPackedInt, HasBWI>, - avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "", - "i", "8", "64", "32", "16", SSEPackedInt, +defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>, + avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI>, XD, EVEX_CD8<8, CD8VF>; -defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16", - "32", "16", "8", SSEPackedInt, HasBWI>, - avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "", - "i", "16", "32", "16", "8", SSEPackedInt, +defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>, + avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>; -defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32", - "16", "8", "4", SSEPackedInt, HasAVX512>, - avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "", - "i", "32", "16", "8", "4", SSEPackedInt, +defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512>, + avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512>, XS, EVEX_CD8<32, CD8VF>; -defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64", - "8", "4", "2", SSEPackedInt, HasAVX512>, - avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "", - "i", "64", "8", "4", "2", SSEPackedInt, +defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512>, + avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>; def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr, @@ -2351,78 +2823,43 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; } -def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 immAllZerosV))), - (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, undef)), - (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src0))), - (VMOVDQU32Zrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, - (bc_v8i64 (v16i32 immAllZerosV)))), - (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src0))), - (VMOVDQU64Zrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>; - -def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src)), - (VMOVDQU32Zmrk addr:$ptr, VK16WM:$mask, VR512:$src)>; - -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src)), - (VMOVDQU64Zmrk addr:$ptr, VK8WM:$mask, VR512:$src)>; - -// SKX replacement -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), - (VMOVDQU32Z256mrk addr:$ptr, VK8WM:$mask, VR256:$src)>; - -// KNL replacement -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), - (VMOVDQU32Zmrk addr:$ptr, - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; - -def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; - - // Move Int Doubleword to Packed Double Int // def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - EVEX, VEX_LIG; + EVEX; def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG; + IIC_SSE_MOVDQ>, EVEX, VEX_W; +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in +def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), + (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", []>, + EVEX, VEX_W, EVEX_CD8<64, CD8VT1>; let isCodeGenOnly = 1 in { -def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), +def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), "vmovq\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert GR64:$src))], + [(set FR64X:$dst, (bitconvert GR64:$src))], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; -def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), +def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert FR64:$src))], + [(set GR64:$dst, (bitconvert FR64X:$src))], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; -} -def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), +def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", - [(store (i64 (bitconvert FR64:$src)), addr:$dst)], + [(store (i64 (bitconvert FR64X:$src)), addr:$dst)], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, EVEX_CD8<64, CD8VT1>; +} // Move Int Doubleword to Single Scalar // @@ -2430,27 +2867,27 @@ let isCodeGenOnly = 1 in { def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG; + IIC_SSE_MOVDQ>, EVEX; def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; } // Move doubleword from xmm register to r/m32 // def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src), + [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, - EVEX, VEX_LIG; + EVEX; def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", - [(store (i32 (vector_extract (v4i32 VR128X:$src), + [(store (i32 (extractelt (v4i32 VR128X:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + EVEX, EVEX_CD8<32, CD8VT1>; // Move quadword from xmm1 register to r/m64 // @@ -2458,16 +2895,28 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))], - IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W, + IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Requires<[HasAVX512, In64BitMode]>; -def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs), - (ins i64mem:$dst, VR128X:$src), - "vmovq\t{$src, $dst|$dst, $src}", - [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), - addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>, - Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in +def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, + Requires<[HasAVX512, In64BitMode]>; + +def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), + (ins i64mem:$dst, VR128X:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), + addr:$dst)], IIC_SSE_MOVDQ>, + EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, + Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; + +let hasSideEffects = 0 in +def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovq.s\t{$src, $dst|$dst, $src}",[]>, + EVEX, VEX_W; // Move Scalar Single to Double Int // @@ -2476,92 +2925,95 @@ def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32X:$src))], - IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; + IIC_SSE_MOVD_ToGP>, EVEX; def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; } // Move Quadword Int to Packed Quadword Int // -def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), +def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, - EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + EVEX, VEX_W, EVEX_CD8<8, CD8VT8>; //===----------------------------------------------------------------------===// // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// -multiclass avx512_move_scalar { - let hasSideEffects = 0 in { - def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, (vt (OpNode VR128X:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; - let Constraints = "$src1 = $dst" in - def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3), - !strconcat(asm, - "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"), - [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K; - def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, - EVEX, VEX_LIG; +multiclass avx512_move_scalar { + defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2), + asm, "$src2, $src1","$src1, $src2", + (_.VT (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2))), + IIC_SSE_MOV_S_RR>, EVEX_4V; + let Constraints = "$src1 = $dst" , mayLoad = 1 in + defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _, + (outs _.RC:$dst), + (ins _.ScalarMemOp:$src), + asm,"$src","$src", + (_.VT (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector + (_.ScalarLdFrag addr:$src)))))>, EVEX; + let isCodeGenOnly = 1 in { + def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.RC:$src1, _.FRC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, + (scalar_to_vector _.FRC:$src2))))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; + let mayLoad = 1 in + def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], + _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX; + } let mayStore = 1 in { - def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG; - def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src), - !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), - [], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG, EVEX_K; + def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>, + EVEX; + def mrk: AVX512PI<0x11, MRMDestMem, (outs), + (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), + !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), + [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K; } // mayStore - } //hasSideEffects = 0 } -let ExeDomain = SSEPackedSingle in -defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem, - loadf32>, XS, EVEX_CD8<32, CD8VT1>; +defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>, + VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; -let ExeDomain = SSEPackedDouble in -defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem, - loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, + VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), - (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), - VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X), + VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), - (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), - VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X), + VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; -// For the disassembler -let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { - def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR32X:$src2), - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XS, EVEX_4V, VEX_LIG; - def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR64X:$src2), - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XD, EVEX_4V, VEX_LIG, VEX_W; -} +defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info, + (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), + "vmovss.s", "$src2, $src1", "$src1, $src2", []>, + XS, EVEX_4V, VEX_LIG; + +defm VMOVSSDrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info, + (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), + "vmovsd.s", "$src2, $src1", "$src1, $src2", []>, + XD, EVEX_4V, VEX_LIG, VEX_W; let Predicates = [HasAVX512] in { let AddedComplexity = 15 in { @@ -2645,10 +3097,10 @@ let Predicates = [HasAVX512] in { (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>; // Extract and store. - def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))), + def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))), addr:$dst), (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>; - def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))), + def : Pat<(store (f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))), addr:$dst), (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>; @@ -2712,7 +3164,7 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (v2i64 VR128X:$src))))], IIC_SSE_MOVQ_RR>, EVEX, VEX_W; -let AddedComplexity = 20 in +let AddedComplexity = 20 , isCodeGenOnly = 1 in def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -2841,19 +3293,19 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, OpndItins itins, bit IsCommutable = 0> { defm rr : AVX512_maskable, + itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V; let mayLoad = 1 in defm rm : AVX512_maskable, + itins.rm>, AVX512BIBase, EVEX_4V; } @@ -2863,13 +3315,13 @@ multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, avx512_binop_rm { let mayLoad = 1 in defm rmb : AVX512_maskable, + itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B; } @@ -2935,20 +3387,20 @@ multiclass avx512_binop_rm_vl_b opc, string OpcodeStr, SDNode OpNode, multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, bit IsCommutable = 0> { - defm Q : avx512_binop_rm_vl_q; - defm D : avx512_binop_rm_vl_d; } multiclass avx512_binop_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, bit IsCommutable = 0> { - defm W : avx512_binop_rm_vl_w; - defm B : avx512_binop_rm_vl_b; } @@ -2962,60 +3414,36 @@ multiclass avx512_binop_rm_vl_all opc_b, bits<8> opc_w, itins, HasBWI, IsCommutable>; } -multiclass avx512_binop_rm2 opc, string OpcodeStr, ValueType DstVT, - ValueType SrcVT, RegisterClass KRC, RegisterClass RC, - PatFrag memop_frag, X86MemOperand x86memop, - PatFrag scalar_mfrag, X86MemOperand x86scalar_mop, - string BrdcstStr, OpndItins itins, bit IsCommutable = 0> { - let isCommutable = IsCommutable in - { - def rr : AVX512BI, EVEX_4V; - def rrk : AVX512BI, EVEX_4V, EVEX_K; - def rrkz : AVX512BI, EVEX_4V, EVEX_KZ; - } +multiclass avx512_binop_rm2 opc, string OpcodeStr, OpndItins itins, + SDNode OpNode,X86VectorVTInfo _Src, + X86VectorVTInfo _Dst, bit IsCommutable = 0> { + defm rr : AVX512_maskable, + AVX512BIBase, EVEX_4V; let mayLoad = 1 in { - def rm : AVX512BI, EVEX_4V; - def rmk : AVX512BI, EVEX_4V, EVEX_K; - def rmkz : AVX512BI, EVEX_4V, EVEX_KZ; - def rmb : AVX512BI, EVEX_4V, EVEX_B; - def rmbk : AVX512BI, EVEX_4V, EVEX_B, EVEX_K; - def rmbkz : AVX512BI, EVEX_4V, EVEX_B, EVEX_KZ; + defm rm : AVX512_maskable, + AVX512BIBase, EVEX_4V; + + defm rmb : AVX512_maskable, + AVX512BIBase, EVEX_4V, EVEX_B; } } @@ -3023,171 +3451,166 @@ defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, SSE_INTALU_ITINS_P, 1>; defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, SSE_INTALU_ITINS_P, 0>; -defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, +defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, + SSE_INTALU_ITINS_P, HasBWI, 0>; +defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, + SSE_INTALU_ITINS_P, HasBWI, 0>; +defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; +defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, + SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; +defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P, + HasBWI, 1>; +defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P, + HasBWI, 1>; +defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P, + HasBWI, 1>, T8PD; +defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, - SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; +multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, + SDNode OpNode, bit IsCommutable = 0> { + + defm NAME#Z : avx512_binop_rm2, + EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_binop_rm2, + EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; + defm NAME#Z128 : avx512_binop_rm2, + EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; + } +} + +defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, + X86pmuldq, 1>,T8PD; +defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, + X86pmuludq, 1>; + +multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _Src, X86VectorVTInfo _Dst> { + let mayLoad = 1 in { + defm rmb : AVX512_maskable, + EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>; + } +} + +multiclass avx512_packs_rm opc, string OpcodeStr, + SDNode OpNode,X86VectorVTInfo _Src, + X86VectorVTInfo _Dst> { + defm rr : AVX512_maskable, + EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V; + let mayLoad = 1 in { + defm rm : AVX512_maskable, + EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>; + } +} + +multiclass avx512_packs_all_i32_i16 opc, string OpcodeStr, + SDNode OpNode> { + defm NAME#Z : avx512_packs_rm, + avx512_packs_rmb, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_packs_rm, + avx512_packs_rmb, EVEX_V256; + defm NAME#Z128 : avx512_packs_rm, + avx512_packs_rmb, EVEX_V128; + } +} +multiclass avx512_packs_all_i16_i8 opc, string OpcodeStr, + SDNode OpNode> { + defm NAME#Z : avx512_packs_rm, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_packs_rm, EVEX_V256; + defm NAME#Z128 : avx512_packs_rm, EVEX_V128; + } +} -defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; +multiclass avx512_vpmadd opc, string OpcodeStr, + SDNode OpNode, AVX512VLVectorVTInfo _Src, + AVX512VLVectorVTInfo _Dst> { + defm NAME#Z : avx512_packs_rm, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_packs_rm, EVEX_V256; + defm NAME#Z128 : avx512_packs_rm, EVEX_V128; + } +} -def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), - (VPMULUDQZrr VR512:$src1, VR512:$src2)>; +let Predicates = [HasBWI] in { + defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD; + defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD; + defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W; + defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W; -def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMULUDQZrr VR512:$src1, VR512:$src2)>; -def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMULDQZrr VR512:$src1, VR512:$src2)>; + defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, + avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD; + defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, + avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase; +} -defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax, +defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax, +defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax, +defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax, +defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax, +defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax, +defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin, +defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin, +defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin, +defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin, +defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin, +defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin, +defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; - -def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMAXSDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMAXUDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMAXSQZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMAXUQZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMINSDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMINUDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMINSQZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMINUQZrr VR512:$src1, VR512:$src2)>; -//===----------------------------------------------------------------------===// -// AVX-512 - Unpack Instructions -//===----------------------------------------------------------------------===// - -multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, - PatFrag mem_frag, RegisterClass RC, - X86MemOperand x86memop, string asm, - Domain d> { - def rr : AVX512PI, EVEX_4V; - def rm : AVX512PI, EVEX_4V; -} - -defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64, - VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64, - VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64, - VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64, - VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop> { - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; -} -defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, - VR512, memopv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, - VR512, memopv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; -defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, - VR512, memopv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, - VR512, memopv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; -//===----------------------------------------------------------------------===// -// AVX-512 - PSHUFD -// - -multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT> { - def ri : AVX512Ii8, - EVEX; - def mi : AVX512Ii8, EVEX; -} - -defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32, - i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// @@ -3199,32 +3622,99 @@ defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_INTALU_ITINS_P, HasAVX512, 1>; defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, - SSE_INTALU_ITINS_P, HasAVX512, 1>; + SSE_INTALU_ITINS_P, HasAVX512, 0>; //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic //===----------------------------------------------------------------------===// +multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, + SDNode OpNode, SDNode VecNode, OpndItins itins, + bit IsCommutable> { -multiclass avx512_binop_s opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm SSZ : sse12_fp_scalar, XS, EVEX_4V, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm SDZ : sse12_fp_scalar, XD, VEX_W, EVEX_4V, VEX_LIG, - EVEX_CD8<64, CD8VT1>; -} + defm rr_Int : AVX512_maskable_scalar; -let isCommutable = 1 in { -defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>; -defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>; -defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>; -defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>; + defm rm_Int : AVX512_maskable_scalar; + let isCodeGenOnly = 1, isCommutable = IsCommutable, + Predicates = [HasAVX512] in { + def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.FRC:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], + itins.rr>; + def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set _.FRC:$dst, (OpNode _.FRC:$src1, + (_.ScalarLdFrag addr:$src2)))], itins.rr>; + } } -let isCommutable = 0 in { -defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>; -defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>; + +multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo _, + SDNode VecNode, OpndItins itins, bit IsCommutable = 0> { + + defm rrb : AVX512_maskable_scalar, + EVEX_B, EVEX_RC; } +multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, + SDNode VecNode, OpndItins itins, bit IsCommutable> { + + defm rrb : AVX512_maskable_scalar, EVEX_B; +} + +multiclass avx512_binop_s_round opc, string OpcodeStr, SDNode OpNode, + SDNode VecNode, + SizeItins itins, bit IsCommutable> { + defm SSZ : avx512_fp_scalar, + avx512_fp_scalar_round, + XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; + defm SDZ : avx512_fp_scalar, + avx512_fp_scalar_round, + XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; +} + +multiclass avx512_binop_s_sae opc, string OpcodeStr, SDNode OpNode, + SDNode VecNode, + SizeItins itins, bit IsCommutable> { + defm SSZ : avx512_fp_scalar, + avx512_fp_scalar_sae, + XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; + defm SDZ : avx512_fp_scalar, + avx512_fp_scalar_sae, + XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; +} +defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>; +defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>; +defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>; +defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>; +defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 1>; +defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 1>; multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { @@ -3247,6 +3737,25 @@ multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, }//let mayLoad = 1 } +multiclass avx512_fp_round_packed opc, string OpcodeStr, SDNode OpNodeRnd, + X86VectorVTInfo _> { + defm rb: AVX512_maskable, + EVEX_4V, EVEX_B, EVEX_RC; +} + + +multiclass avx512_fp_sae_packed opc, string OpcodeStr, SDNode OpNodeRnd, + X86VectorVTInfo _> { + defm rb: AVX512_maskable, + EVEX_4V, EVEX_B; +} + multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, bit IsCommutable = 0> { defm PSZ : avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, } } -defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>; -defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>; -defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>; -defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>; -defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>; +multiclass avx512_fp_binop_p_round opc, string OpcodeStr, SDNode OpNodeRnd> { + defm PSZ : avx512_fp_round_packed, + EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_round_packed, + EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; +} + +multiclass avx512_fp_binop_p_sae opc, string OpcodeStr, SDNode OpNodeRnd> { + defm PSZ : avx512_fp_sae_packed, + EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_sae_packed, + EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; +} + +defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>, + avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>; +defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>, + avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>; +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, + avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>; +defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>, + avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>; +defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>, + avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>; +defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>, + avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>; +let Predicates = [HasDQI] in { + defm VAND : avx512_fp_binop_p<0x54, "vand", X86fand, 1>; + defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, 0>; + defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, 1>; + defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>; +} + +multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr: AVX512_maskable, EVEX_4V; + let mayLoad = 1 in { + defm rm: AVX512_maskable, EVEX_4V; + defm rmb: AVX512_maskable, + EVEX_4V, EVEX_B; + }//let mayLoad = 1 +} -def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)), - (i16 -1), FROUND_CURRENT)), - (VMAXPSZrr VR512:$src1, VR512:$src2)>; +multiclass avx512_fp_scalef_scalar opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr: AVX512_maskable_scalar; + let mayLoad = 1 in { + defm rm: AVX512_maskable_scalar; + }//let mayLoad = 1 +} -def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)), - (i8 -1), FROUND_CURRENT)), - (VMAXPDZrr VR512:$src1, VR512:$src2)>; +multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> { + defm PSZ : avx512_fp_scalef_p, + avx512_fp_round_packed, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_scalef_p, + avx512_fp_round_packed, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm SSZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V,EVEX_CD8<32, CD8VT1>; + defm SDZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; -def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)), - (i16 -1), FROUND_CURRENT)), - (VMINPSZrr VR512:$src1, VR512:$src2)>; + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp_scalef_p, + EVEX_V128, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp_scalef_p, + EVEX_V256, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp_scalef_p, + EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp_scalef_p, + EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; + } +} +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD; -def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)), - (i8 -1), FROUND_CURRENT)), - (VMINPDZrr VR512:$src1, VR512:$src2)>; //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions //===----------------------------------------------------------------------===// -multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, - SDNode OpNode, ValueType vt> { - def rr : AVX512PI, EVEX_4V; - def rm : AVX512PI, EVEX_4V; +multiclass avx512_vptest opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr : AVX512_maskable_cmp, + EVEX_4V; + let mayLoad = 1 in + defm rm : AVX512_maskable_cmp, + EVEX_4V, + EVEX_CD8<_.EltSize, CD8VF>; } -defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem, - memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, - memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; +multiclass avx512_vptest_mb opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let mayLoad = 1 in + defm rmb : AVX512_maskable_cmp, + EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; +} +multiclass avx512_vptest_dq_sizes opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in + defm Z : avx512_vptest, + avx512_vptest_mb, EVEX_V512; -let Predicates = [HasCDI] in { -defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem, - memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem, - memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_vptest, + avx512_vptest_mb, EVEX_V256; + defm Z128 : avx512_vptest, + avx512_vptest_mb, EVEX_V128; + } +} + +multiclass avx512_vptest_dq opc, string OpcodeStr, SDNode OpNode> { + defm D : avx512_vptest_dq_sizes; + defm Q : avx512_vptest_dq_sizes, VEX_W; +} + +multiclass avx512_vptest_wb opc, string OpcodeStr, + SDNode OpNode> { + let Predicates = [HasBWI] in { + defm WZ: avx512_vptest, + EVEX_V512, VEX_W; + defm BZ: avx512_vptest, + EVEX_V512; + } + let Predicates = [HasVLX, HasBWI] in { + + defm WZ256: avx512_vptest, + EVEX_V256, VEX_W; + defm WZ128: avx512_vptest, + EVEX_V128, VEX_W; + defm BZ256: avx512_vptest, + EVEX_V256; + defm BZ128: avx512_vptest, + EVEX_V128; + } } +multiclass avx512_vptest_all_forms opc_wb, bits<8> opc_dq, string OpcodeStr, + SDNode OpNode> : + avx512_vptest_wb , + avx512_vptest_dq; + +defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm>, T8PD; +defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8XS; + def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1), (v16i32 VR512:$src2), (i16 -1))), (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>; @@ -3351,149 +3977,358 @@ multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, (ins _.RC:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))), - " ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V; + SSE_INTSHIFT_ITINS_P.rr>; + let mayLoad = 1 in defm mi : AVX512_maskable, AVX512BIi8Base, EVEX_4V; + (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), + (i8 imm:$src2))), + SSE_INTSHIFT_ITINS_P.rm>; +} + +multiclass avx512_shift_rmbi opc, Format ImmFormM, + string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + let mayLoad = 1 in + defm mbi : AVX512_maskable, EVEX_B; } multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, - ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { + ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { // src2 is always 128-bit defm rr : AVX512_maskable, AVX512BIBase, EVEX_4V; + SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V; defm rm : AVX512_maskable, AVX512BIBase, EVEX_4V; + (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))), + SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, + EVEX_4V; } multiclass avx512_shift_sizes opc, string OpcodeStr, SDNode OpNode, - ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { - defm Z : avx512_shift_rrm, EVEX_V512; + ValueType SrcVT, PatFrag bc_frag, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_shift_rrm, EVEX_V512, + EVEX_CD8 ; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_shift_rrm, EVEX_V256, + EVEX_CD8; + defm Z128 : avx512_shift_rrm, EVEX_V128, + EVEX_CD8; + } } -multiclass avx512_shift_types opcd, bits<8> opcq, string OpcodeStr, - SDNode OpNode> { +multiclass avx512_shift_types opcd, bits<8> opcq, bits<8> opcw, + string OpcodeStr, SDNode OpNode> { defm D : avx512_shift_sizes, EVEX_CD8<32, CD8VQ>; + avx512vl_i32_info, HasAVX512>; defm Q : avx512_shift_sizes, EVEX_CD8<64, CD8VQ>, VEX_W; -} - -defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, - v16i32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, - v8i64_info>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; - -defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, - v16i32_info>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, - v8i64_info>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; - -defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, - v16i32_info>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, - v8i64_info>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; - -defm VPSLL : avx512_shift_types<0xF2, 0xF3, "vpsll", X86vshl>; -defm VPSRA : avx512_shift_types<0xE2, 0xE2, "vpsra", X86vsra>; -defm VPSRL : avx512_shift_types<0xD2, 0xD3, "vpsrl", X86vsrl>; + avx512vl_i64_info, HasAVX512>, VEX_W; + defm W : avx512_shift_sizes; +} -//===-------------------------------------------------------------------===// -// Variable Bit Shifts -//===-------------------------------------------------------------------===// -multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - defm rr : AVX512_maskable, AVX5128IBase, EVEX_4V; +multiclass avx512_shift_rmi_sizes opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasAVX512] in + defm Z: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in { + defm Z256: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V256; + defm Z128: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V128; + } +} + +multiclass avx512_shift_rmi_w opcw, + Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode> { + let Predicates = [HasBWI] in + defm WZ: avx512_shift_rmi, EVEX_V512; + let Predicates = [HasVLX, HasBWI] in { + defm WZ256: avx512_shift_rmi, EVEX_V256; + defm WZ128: avx512_shift_rmi, EVEX_V128; + } +} + +multiclass avx512_shift_rmi_dq opcd, bits<8> opcq, + Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode> { + defm D: avx512_shift_rmi_sizes, EVEX_CD8<32, CD8VF>; + defm Q: avx512_shift_rmi_sizes, EVEX_CD8<64, CD8VF>, VEX_W; +} + +defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>, + avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V; + +defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>, + avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V; + +defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>, + avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V; + +defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V; +defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V; + +defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>; +defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>; +defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>; + +//===-------------------------------------------------------------------===// +// Variable Bit Shifts +//===-------------------------------------------------------------------===// +multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr : AVX512_maskable, AVX5128IBase, EVEX_4V; + let mayLoad = 1 in defm rm : AVX512_maskable, AVX5128IBase, EVEX_4V; + (_.VT (OpNode _.RC:$src1, + (_.VT (bitconvert (_.LdFrag addr:$src2))))), + SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V, + EVEX_CD8<_.EltSize, CD8VF>; } +multiclass avx512_var_shift_mb opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let mayLoad = 1 in + defm rmb : AVX512_maskable, AVX5128IBase, EVEX_B, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; +} multiclass avx512_var_shift_sizes opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo _> { - defm Z : avx512_var_shift, EVEX_V512; + let Predicates = [HasAVX512] in + defm Z : avx512_var_shift, + avx512_var_shift_mb, EVEX_V512; + + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_var_shift, + avx512_var_shift_mb, EVEX_V256; + defm Z128 : avx512_var_shift, + avx512_var_shift_mb, EVEX_V128; + } } multiclass avx512_var_shift_types opc, string OpcodeStr, SDNode OpNode> { defm D : avx512_var_shift_sizes, EVEX_CD8<32, CD8VQ>; + avx512vl_i32_info>; defm Q : avx512_var_shift_sizes, EVEX_CD8<64, CD8VQ>, VEX_W; + avx512vl_i64_info>, VEX_W; +} + +// Use 512bit version to implement 128/256 bit in case NoVLX. +multiclass avx512_var_shift_w_lowering { + let Predicates = [HasBWI, NoVLX] in { + def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), + (_.info256.VT _.info256.RC:$src2))), + (EXTRACT_SUBREG + (!cast(NAME#"WZrr") + (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), + (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), + sub_ymm)>; + + def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), + (_.info128.VT _.info128.RC:$src2))), + (EXTRACT_SUBREG + (!cast(NAME#"WZrr") + (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), + sub_xmm)>; + } +} + +multiclass avx512_var_shift_w opc, string OpcodeStr, + SDNode OpNode> { + let Predicates = [HasBWI] in + defm WZ: avx512_var_shift, + EVEX_V512, VEX_W; + let Predicates = [HasVLX, HasBWI] in { + + defm WZ256: avx512_var_shift, + EVEX_V256, VEX_W; + defm WZ128: avx512_var_shift, + EVEX_V128, VEX_W; + } } -defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>; -defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>; -defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>; +defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>, + avx512_var_shift_w<0x12, "vpsllvw", shl>, + avx512_var_shift_w_lowering; +defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>, + avx512_var_shift_w<0x11, "vpsravw", sra>, + avx512_var_shift_w_lowering; +defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>, + avx512_var_shift_w<0x10, "vpsrlvw", srl>, + avx512_var_shift_w_lowering; +defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>; +defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>; + +//===-------------------------------------------------------------------===// +// 1-src variable permutation VPERMW/D/Q +//===-------------------------------------------------------------------===// +multiclass avx512_vperm_dq_sizes opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in + defm Z : avx512_var_shift, + avx512_var_shift_mb, EVEX_V512; + + let Predicates = [HasAVX512, HasVLX] in + defm Z256 : avx512_var_shift, + avx512_var_shift_mb, EVEX_V256; +} +multiclass avx512_vpermi_dq_sizes opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasAVX512] in + defm Z: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in + defm Z256: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V256; +} + + +defm VPERM : avx512_var_shift_w<0x8D, "vpermw", X86VPermv>; + +defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, + avx512vl_i32_info>; +defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, + avx512vl_i64_info>, VEX_W; +defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, + avx512vl_f32_info>; +defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, + avx512vl_f64_info>, VEX_W; + +defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", + X86VPermi, avx512vl_i64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; +defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", + X86VPermi, avx512vl_f64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; //===----------------------------------------------------------------------===// -// AVX-512 - MOVDDUP +// AVX-512 - VPERMIL //===----------------------------------------------------------------------===// -multiclass avx512_movddup { -def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX; -def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, - (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX; +multiclass avx512_permil_vec OpcVar, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, X86VectorVTInfo Ctrl> { + defm rr: AVX512_maskable, + T8PD, EVEX_4V; + let mayLoad = 1 in { + defm rm: AVX512_maskable, + T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + defm rmb: AVX512_maskable, + T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + }//let mayLoad = 1 } -defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))), - (VMOVDDUPZrm addr:$src)>; +multiclass avx512_permil_vec_common OpcVar, + AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_permil_vec, EVEX_V512; + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_permil_vec, EVEX_V128; + defm Z256 : avx512_permil_vec, EVEX_V256; + } +} -//===---------------------------------------------------------------------===// -// Replicate Single FP - MOVSHDUP and MOVSLDUP -//===---------------------------------------------------------------------===// -multiclass avx512_replicate_sfp op, SDNode OpNode, string OpcodeStr, - ValueType vt, RegisterClass RC, PatFrag mem_frag, - X86MemOperand x86memop> { - def rr : AVX512XSI, EVEX; - let mayLoad = 1 in - def rm : AVX512XSI, EVEX; +multiclass avx512_permil OpcImm, bits<8> OpcVar, + AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ + + defm NAME: avx512_permil_vec_common; + defm NAME: avx512_shift_rmi_sizes, + EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; } -defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup", - v16f32, VR512, memopv16f32, f512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup", - v16f32, VR512, memopv16f32, f512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; +defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, + avx512vl_i32_info>; +defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, + avx512vl_i64_info>, VEX_W; +//===----------------------------------------------------------------------===// +// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW +//===----------------------------------------------------------------------===// + +defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", + X86PShufd, avx512vl_i32_info>, + EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; +defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", + X86PShufhw>, EVEX, AVX512XSIi8Base; +defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", + X86PShuflw>, EVEX, AVX512XDIi8Base; + +multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode> { + let Predicates = [HasBWI] in + defm Z: avx512_var_shift, EVEX_V512; + + let Predicates = [HasVLX, HasBWI] in { + defm Z256: avx512_var_shift, EVEX_V256; + defm Z128: avx512_var_shift, EVEX_V128; + } +} -def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>; -def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))), - (VMOVSHDUPZrm addr:$src)>; -def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>; -def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))), - (VMOVSLDUPZrm addr:$src)>; +defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>; //===----------------------------------------------------------------------===// // Move Low to High and High to Low packed FP Instructions @@ -3521,192 +4356,480 @@ let Predicates = [HasAVX512] in { (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; } +//===----------------------------------------------------------------------===// +// VMOVHPS/PD VMOVLPS Instructions +// All patterns was taken from SSS implementation. +//===----------------------------------------------------------------------===// +multiclass avx512_mov_hilo_packed opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let mayLoad = 1 in + def rm : AVX512, EVEX_4V; +} + +defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps, + v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; +defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd, + v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; +defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps, + v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; +defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd, + v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; + +let Predicates = [HasAVX512] in { + // VMOVHPS patterns + def : Pat<(X86Movlhps VR128X:$src1, + (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), + (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128X:$src1, + (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>; + // VMOVHPD patterns + def : Pat<(v2f64 (X86Unpckl VR128X:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpckl VR128X:$src1, + (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), + (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; + // VMOVLPS patterns + def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))), + (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))), + (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>; + // VMOVLPD patterns + def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))), + (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))), + (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Movsd VR128X:$src1, + (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; +} + +let mayStore = 1 in { +def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovhps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)), + (bc_v2f64 (v4f32 VR128X:$src))), + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<32, CD8VT2>; +def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; +def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovlps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128X:$src)), + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<32, CD8VT2>; +def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128X:$src), + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; +} +let Predicates = [HasAVX512] in { + // VMOVHPD patterns + def : Pat<(store (f64 (vector_extract + (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), + (iPTR 0))), addr:$dst), + (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; + // VMOVLPS patterns + def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)), + addr:$src1), + (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>; + def : Pat<(store (v4i32 (X86Movlps + (bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1), + (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>; + // VMOVLPD patterns + def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)), + addr:$src1), + (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>; + def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)), + addr:$src1), + (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>; +} //===----------------------------------------------------------------------===// // FMA - Fused Multiply Operations // let Constraints = "$src1 = $dst" in { -// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3p_rm opc, string OpcodeStr, X86VectorVTInfo _, - SDPatternOperator OpNode = null_frag> { +multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { defm r: AVX512_maskable_3src, AVX512FMA3Base; - let mayLoad = 1 in - defm m: AVX512_maskable_3src, - AVX512FMA3Base; + AVX512FMA3Base; - defm mb: AVX512_maskable_3src, + OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), + !strconcat("$src2, ${src3}", _.BroadcastStr ), + (OpNode _.RC:$src1, + _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>, AVX512FMA3Base, EVEX_B; - } -} // Constraints = "$src1 = $dst" - -multiclass avx512_fma3p_forms opc213, bits<8> opc231, - string OpcodeStr, X86VectorVTInfo VTI, - SDPatternOperator OpNode> { - defm v213r : avx512_fma3p_rm, EVEX_CD8; + } +} - defm v231r : avx512_fma3p_rm, EVEX_CD8; +multiclass avx512_fma3_213_round opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B, EVEX_RC; } +} // Constraints = "$src1 = $dst" -multiclass avx512_fma3p opc213, bits<8> opc231, - string OpcodeStr, - SDPatternOperator OpNode> { -let ExeDomain = SSEPackedSingle in { - defm NAME##PSZ : avx512_fma3p_forms, EVEX_V512; - defm NAME##PSZ256 : avx512_fma3p_forms, EVEX_V256; - defm NAME##PSZ128 : avx512_fma3p_forms, EVEX_V128; +multiclass avx512_fma3p_213_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_213_rm, + avx512_fma3_213_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } -let ExeDomain = SSEPackedDouble in { - defm NAME##PDZ : avx512_fma3p_forms, EVEX_V512, VEX_W; - defm NAME##PDZ256 : avx512_fma3p_forms, EVEX_V256, VEX_W; - defm NAME##PDZ128 : avx512_fma3p_forms, EVEX_V128, VEX_W; + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_213_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_213_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } -defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>; -defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>; -defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>; -defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>; -defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>; -defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>; +multiclass avx512_fma3p_213_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_213_common; + defm PD : avx512_fma3p_213_common, VEX_W; +} + +defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; +defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; + let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_m132 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; +multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_maskable_3src, + AVX512FMA3Base; + + let mayLoad = 1 in { + defm m: AVX512_maskable_3src, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B; + } +} + +multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B, EVEX_RC; } } // Constraints = "$src1 = $dst" +multiclass avx512_fma3p_231_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_231_rm, + avx512_fma3_231_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_231_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_231_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; + } +} + +multiclass avx512_fma3p_231_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_231_common; + defm PD : avx512_fma3p_231_common, VEX_W; +} -multiclass avx512_fma3p_m132_f opc, - string OpcodeStr, - SDNode OpNode> { +defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; +defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; -let ExeDomain = SSEPackedSingle in { - defm NAME##PSZ : avx512_fma3p_m132, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm NAME##PSZ256 : avx512_fma3p_m132, EVEX_V256, EVEX_CD8<32, CD8VF>; - defm NAME##PSZ128 : avx512_fma3p_m132, EVEX_V128, EVEX_CD8<32, CD8VF>; +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_maskable_3src, + AVX512FMA3Base; + + let mayLoad = 1 in { + defm m: AVX512_maskable_3src, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B; } -let ExeDomain = SSEPackedDouble in { - defm NAME##PDZ : avx512_fma3p_m132, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>; - defm NAME##PDZ256 : avx512_fma3p_m132, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>; - defm NAME##PDZ128 : avx512_fma3p_m132, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>; +} + +multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B, EVEX_RC; +} +} // Constraints = "$src1 = $dst" + +multiclass avx512_fma3p_132_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_132_rm, + avx512_fma3_132_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_132_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_132_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } -defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>; -defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>; -defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>; -defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>; -defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>; -defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>; +multiclass avx512_fma3p_132_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_132_common; + defm PD : avx512_fma3p_132_common, VEX_W; +} +defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; +defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; // Scalar FMA let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType OpVT, - X86MemOperand x86memop, Operand memop, - PatFrag mem_frag> { - let isCommutable = 1 in - def r : AVX512FMA3; +multiclass avx512_fma3s_common opc, string OpcodeStr, X86VectorVTInfo _, + dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb, + dag RHS_r, dag RHS_m > { + defm r_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base; + let mayLoad = 1 in - def m : AVX512FMA3, AVX512FMA3Base; + + defm rb_Int: AVX512_maskable_3src_scalar, + AVX512FMA3Base, EVEX_B, EVEX_RC; + + let isCodeGenOnly = 1 in { + def r : AVX512FMA3; + [RHS_r]>; + let mayLoad = 1 in + def m : AVX512FMA3; + }// isCodeGenOnly = 1 +} +}// Constraints = "$src1 = $dst" + +multiclass avx512_fma3s_all opc213, bits<8> opc231, bits<8> opc132, + string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ , + string SUFF> { + + defm NAME#213#SUFF: avx512_fma3s_common; + + defm NAME#231#SUFF: avx512_fma3s_common; + + defm NAME#132#SUFF: avx512_fma3s_common; +} + +multiclass avx512_fma3s opc213, bits<8> opc231, bits<8> opc132, + string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd>{ + let Predicates = [HasAVX512] in { + defm NAME : avx512_fma3s_all, + EVEX_CD8<32, CD8VT1>, VEX_LIG; + defm NAME : avx512_fma3s_all, + EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; + } } -} // Constraints = "$src1 = $dst" - -defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>; +defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>; +defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from sign integer to float/double //===----------------------------------------------------------------------===// -multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, - X86MemOperand x86memop, string asm> { -let hasSideEffects = 0 in { - def rr : SI opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + let hasSideEffects = 0 in { + def rr : SI, EVEX_4V; - let mayLoad = 1 in - def rm : SI, EVEX_4V; -} // hasSideEffects = 0 + } // hasSideEffects = 0 + let isCodeGenOnly = 1 in { + def rr_Int : SI, EVEX_4V; + + def rm_Int : SI, EVEX_4V; + }//isCodeGenOnly = 1 +} + +multiclass avx512_vcvtsi_round opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, string asm> { + def rrb_Int : SI, EVEX_4V, EVEX_B, EVEX_RC; +} + +multiclass avx512_vcvtsi_common opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + defm NAME : avx512_vcvtsi_round, + avx512_vcvtsi, + VEX_LIG; } + let Predicates = [HasAVX512] in { -defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">, - XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">, - XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">, - XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">, - XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, + v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">, + XS, EVEX_CD8<32, CD8VT1>; +defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, + v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">, + XS, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, + v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">, + XD, EVEX_CD8<32, CD8VT1>; +defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, + v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -3726,14 +4849,18 @@ def : Pat<(f64 (sint_to_fp GR32:$src)), def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; -defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">, - XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">, - XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">, +defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32, + v4f32x_info, i32mem, loadi32, + "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64, + v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, + XS, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info, + i32mem, loadi32, "cvtusi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">, - XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64, + v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -3757,50 +4884,55 @@ def : Pat<(f64 (uint_to_fp GR64:$src)), //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from float/double to integer //===----------------------------------------------------------------------===// -multiclass avx512_cvt_s_int opc, RegisterClass SrcRC, RegisterClass DstRC, - Intrinsic Int, Operand memop, ComplexPattern mem_cpat, - string asm> { -let hasSideEffects = 0 in { - def rr : SI, EVEX, VEX_LIG, - Requires<[HasAVX512]>; - let mayLoad = 1 in - def rm : SI, EVEX, VEX_LIG, - Requires<[HasAVX512]>; -} // hasSideEffects = 0 +multiclass avx512_cvt_s_int_round opc, RegisterClass SrcRC, + RegisterClass DstRC, Intrinsic Int, + Operand memop, ComplexPattern mem_cpat, string asm> { + let hasSideEffects = 0, Predicates = [HasAVX512] in { + def rr : SI, EVEX, VEX_LIG; + def rb : SI, + EVEX, VEX_LIG, EVEX_B, EVEX_RC; + let mayLoad = 1 in + def rm : SI, EVEX, VEX_LIG; + } // hasSideEffects = 0, Predicates = [HasAVX512] } -let Predicates = [HasAVX512] in { + // Convert float/double to signed/unsigned int 32/64 -defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si, +defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse_cvtss2si, ssmem, sse_load_f32, "cvtss2si">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64, +defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64, + int_x86_sse_cvtss2si64, ssmem, sse_load_f32, "cvtss2si">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi, +defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32, + int_x86_avx512_cvtss2usi, ssmem, sse_load_f32, "cvtss2usi">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, +defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64, int_x86_avx512_cvtss2usi64, ssmem, sse_load_f32, "cvtss2usi">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; -defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si, +defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si">, XD, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64, +defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64, + int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi, +defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32, + int_x86_avx512_cvtsd2usi, sdmem, sse_load_f64, "cvtsd2usi">, XD, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, +defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64, int_x86_avx512_cvtsd2usi64, sdmem, sse_load_f64, "cvtsd2usi">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1 , Predicates = [HasAVX512] in { defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", SSE_CVT_Scalar, 0>, XS, EVEX_4V; @@ -3814,246 +4946,558 @@ let isCodeGenOnly = 1 in { int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; - defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V; - defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}", SSE_CVT_Scalar, 0>, XD, EVEX_4V; - defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}", - SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; -} // isCodeGenOnly = 1 +} // isCodeGenOnly = 1, Predicates = [HasAVX512] // Convert float/double to signed/unsigned int 32/64 with truncation -let isCodeGenOnly = 1 in { - defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si, - ssmem, sse_load_f32, "cvttss2si">, - XS, EVEX_CD8<32, CD8VT1>; - defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, - int_x86_sse_cvttss2si64, ssmem, sse_load_f32, - "cvttss2si">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; - defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si, - sdmem, sse_load_f64, "cvttsd2si">, XD, - EVEX_CD8<64, CD8VT1>; - defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, - int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, - "cvttsd2si">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; - defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, - int_x86_avx512_cvttss2usi, ssmem, sse_load_f32, - "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>; - defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, - int_x86_avx512_cvttss2usi64, ssmem, - sse_load_f32, "cvttss2usi">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; - defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, - int_x86_avx512_cvttsd2usi, - sdmem, sse_load_f64, "cvttsd2usi">, XD, - EVEX_CD8<64, CD8VT1>; - defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, - int_x86_avx512_cvttsd2usi64, sdmem, - sse_load_f64, "cvttsd2usi">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; -} // isCodeGenOnly = 1 - -multiclass avx512_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, - SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm> { - def rr : SI opc, string asm, X86VectorVTInfo _SrcRC, + X86VectorVTInfo _DstRC, SDNode OpNode, + SDNode OpNodeRnd>{ +let Predicates = [HasAVX512] in { + def rr : SI, EVEX; - def rm : SI, EVEX; + def rb : SI, EVEX, EVEX_B; + def rm : SI, EVEX; -} - -defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem, - loadf32, "cvttss2si">, XS, - EVEX_CD8<32, CD8VT1>; -defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem, - loadf32, "cvttss2usi">, XS, - EVEX_CD8<32, CD8VT1>; -defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem, - loadf32, "cvttss2si">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; -defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem, - loadf32, "cvttss2usi">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; -defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem, - loadf64, "cvttsd2si">, XD, - EVEX_CD8<64, CD8VT1>; -defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem, - loadf64, "cvttsd2usi">, XD, - EVEX_CD8<64, CD8VT1>; -defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem, - loadf64, "cvttsd2si">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; -defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem, - loadf64, "cvttsd2usi">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; + [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, + EVEX; + + let isCodeGenOnly = 1,hasSideEffects = 0 in { + def rr_Int : SI, EVEX, VEX_LIG; + def rb_Int : SI, + EVEX,VEX_LIG , EVEX_B; + let mayLoad = 1 in + def rm_Int : SI, EVEX, VEX_LIG; + + } // isCodeGenOnly = 1, hasSideEffects = 0 +} //HasAVX512 +} + + +defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i32x_info, + fp_to_sint,X86cvttss2IntRnd>, + XS, EVEX_CD8<32, CD8VT1>; +defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i64x_info, + fp_to_sint,X86cvttss2IntRnd>, + VEX_W, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i32x_info, + fp_to_sint,X86cvttsd2IntRnd>, + XD, EVEX_CD8<64, CD8VT1>; +defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i64x_info, + fp_to_sint,X86cvttsd2IntRnd>, + VEX_W, XD, EVEX_CD8<64, CD8VT1>; + +defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i32x_info, + fp_to_uint,X86cvttss2UIntRnd>, + XS, EVEX_CD8<32, CD8VT1>; +defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i64x_info, + fp_to_uint,X86cvttss2UIntRnd>, + XS,VEX_W, EVEX_CD8<32, CD8VT1>; +defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i32x_info, + fp_to_uint,X86cvttsd2UIntRnd>, + XD, EVEX_CD8<64, CD8VT1>; +defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i64x_info, + fp_to_uint,X86cvttsd2UIntRnd>, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; +let Predicates = [HasAVX512] in { + def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))), + (VCVTTSS2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))), + (VCVTTSS2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))), + (VCVTTSD2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>; + def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))), + (VCVTTSD2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>; + } // HasAVX512 //===----------------------------------------------------------------------===// // AVX-512 Convert form float to double and back //===----------------------------------------------------------------------===// -let hasSideEffects = 0 in { -def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), - (ins FR32X:$src1, FR32X:$src2), - "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; -let mayLoad = 1 in -def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), - (ins FR32X:$src1, f32mem:$src2), - "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, - EVEX_CD8<32, CD8VT1>; - -// Convert scalar double to scalar single -def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), - (ins FR64X:$src1, FR64X:$src2), - "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>; -let mayLoad = 1 in -def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst), - (ins FR64X:$src1, f64mem:$src2), - "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, VEX_W, - Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>; -} - -def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>, - Requires<[HasAVX512]>; -def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>; - -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, +multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNode> { + defm rr : AVX512_maskable_scalar, + EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; + defm rm : AVX512_maskable_scalar, + EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>; +} + +// Scalar Coversion with SAE - suppress all exceptions +multiclass avx512_cvt_fp_sae_scalar opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNodeRnd> { + defm rrb : AVX512_maskable_scalar, + EVEX_4V, VEX_LIG, EVEX_B; +} + +// Scalar Conversion with rounding control (RC) +multiclass avx512_cvt_fp_rc_scalar opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNodeRnd> { + defm rrb : AVX512_maskable_scalar, + EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, + EVEX_B, EVEX_RC; +} +multiclass avx512_cvt_fp_scalar_sd2ss opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, X86VectorVTInfo _src, + X86VectorVTInfo _dst> { + let Predicates = [HasAVX512] in { + defm Z : avx512_cvt_fp_scalar, + avx512_cvt_fp_rc_scalar, VEX_W, EVEX_CD8<64, CD8VT1>, + EVEX_V512, XD; + } +} + +multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, X86VectorVTInfo _src, + X86VectorVTInfo _dst> { + let Predicates = [HasAVX512] in { + defm Z : avx512_cvt_fp_scalar, + avx512_cvt_fp_sae_scalar, + EVEX_CD8<32, CD8VT1>, XS, EVEX_V512; + } +} +defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround, + X86froundRnd, f64x_info, f32x_info>; +defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext, + X86fpextRnd,f32x_info, f64x_info >; + +def : Pat<(f64 (fextend FR32X:$src)), + (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X), + (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>, + Requires<[HasAVX512]>; +def : Pat<(f64 (fextend (loadf32 addr:$src))), + (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, + Requires<[HasAVX512]>; + +def : Pat<(f64 (extloadf32 addr:$src)), + (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, Requires<[HasAVX512, OptForSize]>; -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, - Requires<[HasAVX512, OptForSpeed]>; +def : Pat<(f64 (extloadf32 addr:$src)), + (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>, + Requires<[HasAVX512, OptForSpeed]>; -def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, +def : Pat<(f32 (fround FR64X:$src)), + (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X), + (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>, Requires<[HasAVX512]>; +//===----------------------------------------------------------------------===// +// AVX-512 Vector convert from signed/unsigned integer to float/double +// and from float/double to signed/unsigned integer +//===----------------------------------------------------------------------===// -multiclass avx512_vcvt_fp_with_rc opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT, - Domain d> { -let hasSideEffects = 0 in { - def rr : AVX512PI, EVEX; - def rrb : AVX512PI, EVEX, EVEX_B, EVEX_RC; - let mayLoad = 1 in - def rm : AVX512PI, EVEX; -} // hasSideEffects = 0 +multiclass avx512_vcvt_fp opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNode, + string Broadcast = _.BroadcastStr, + string Alias = ""> { + + defm rr : AVX512_maskable, EVEX; + + defm rm : AVX512_maskable, EVEX; + + defm rmb : AVX512_maskable, EVEX, EVEX_B; +} +// Coversion with SAE - suppress all exceptions +multiclass avx512_vcvt_fp_sae opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNodeRnd> { + defm rrb : AVX512_maskable, + EVEX, EVEX_B; +} + +// Conversion with rounding control (RC) +multiclass avx512_vcvt_fp_rc opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNodeRnd> { + defm rrb : AVX512_maskable, + EVEX, EVEX_B, EVEX_RC; +} + +// Extend Float to Double +multiclass avx512_cvtps2pd opc, string OpcodeStr> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_sae, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } } -multiclass avx512_vcvt_fp opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT, - Domain d> { -let hasSideEffects = 0 in { - def rr : AVX512PI, EVEX; - let mayLoad = 1 in - def rm : AVX512PI, EVEX; -} // hasSideEffects = 0 +// Truncate Double to Float +multiclass avx512_cvtpd2ps opc, string OpcodeStr> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; + } } -defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround, - memopv8f64, f512mem, v8f32, v8f64, - SSEPackedSingle>, EVEX_V512, VEX_W, PD, - EVEX_CD8<64, CD8VF>; +defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">, + VEX_W, PD, EVEX_CD8<64, CD8VF>; +defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">, + PS, EVEX_CD8<32, CD8VH>; -defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, - memopv4f64, f256mem, v8f64, v8f32, - SSEPackedDouble>, EVEX_V512, PS, - EVEX_CD8<32, CD8VH>; def : Pat<(v8f64 (extloadv8f32 addr:$src)), (VCVTPS2PDZrm addr:$src)>; -def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src), - (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))), - (VCVTPD2PSZrr VR512:$src)>; +let Predicates = [HasVLX] in { + def : Pat<(v4f64 (extloadv4f32 addr:$src)), + (VCVTPS2PDZ256rm addr:$src)>; +} -def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src), - (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)), - (VCVTPD2PSZrrb VR512:$src, imm:$rc)>; +// Convert Signed/Unsigned Doubleword to Double +multiclass avx512_cvtdq2pd opc, string OpcodeStr, SDNode OpNode, + SDNode OpNode128> { + // No rounding in this op + let Predicates = [HasAVX512] in + defm Z : avx512_vcvt_fp, + EVEX_V512; -//===----------------------------------------------------------------------===// -// AVX-512 Vector convert from sign integer to float/double -//===----------------------------------------------------------------------===// + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Signed/Unsigned Doubleword to Float +multiclass avx512_cvtdq2ps opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Doubleword with truncation +multiclass avx512_cvttps2dq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_sae, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Doubleword +multiclass avx512_cvtps2dq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Doubleword with truncation +multiclass avx512_cvttpd2dq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_sae, EVEX_V512; + } + let Predicates = [HasVLX] in { + // we need "x"/"y" suffixes in order to distinguish between 128 and 256 + // memory forms of these instructions in Asm Parcer. They have the same + // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly + // due to the same reason. + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Doubleword +multiclass avx512_cvtpd2dq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasVLX] in { + // we need "x"/"y" suffixes in order to distinguish between 128 and 256 + // memory forms of these instructions in Asm Parcer. They have the same + // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly + // due to the same reason. + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Quardword +multiclass avx512_cvtpd2qq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Quardword with truncation +multiclass avx512_cvttpd2qq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_sae, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Signed/Unsigned Quardword to Double +multiclass avx512_cvtqq2pd opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Quardword +multiclass avx512_cvtps2qq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + // Explicitly specified broadcast string, since we take only 2 elements + // from v4f32x_info source + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Quardword with truncation +multiclass avx512_cvttps2qq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_sae, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + // Explicitly specified broadcast string, since we take only 2 elements + // from v4f32x_info source + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} -defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp, - memopv8i64, i512mem, v16f32, v16i32, - SSEPackedSingle>, EVEX_V512, PS, - EVEX_CD8<32, CD8VF>; +// Convert Signed/Unsigned Quardword to Float +multiclass avx512_cvtqq2ps opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + // we need "x"/"y" suffixes in order to distinguish between 128 and 256 + // memory forms of these instructions in Asm Parcer. They have the same + // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly + // due to the same reason. + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; + } +} -defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, - memopv4i64, i256mem, v8f64, v8i32, - SSEPackedDouble>, EVEX_V512, XS, +defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86cvtdq2pd>, XS, EVEX_CD8<32, CD8VH>; -defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, - memopv16f32, f512mem, v16i32, v16f32, - SSEPackedSingle>, EVEX_V512, XS, +defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, + X86VSintToFpRnd>, + PS, EVEX_CD8<32, CD8VF>; + +defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint, + X86VFpToSintRnd>, + XS, EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, + X86VFpToSintRnd>, + PD, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint, + X86VFpToUintRnd>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, - memopv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, PD, VEX_W, +defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, + X86VFpToUintRnd>, PS, VEX_W, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, - memopv16f32, f512mem, v16i32, v16f32, - SSEPackedSingle>, EVEX_V512, PS, +defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>, + XS, EVEX_CD8<32, CD8VH>; + +defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp, + X86VUintToFpRnd>, XD, EVEX_CD8<32, CD8VF>; -// cvttps2udq (src, 0, mask-all-ones, sae-current) -def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)), - (VCVTTPS2UDQZrr VR512:$src)>; +defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtps2Int, + X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, - memopv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, PS, VEX_W, +defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtpd2Int, + X86cvtpd2IntRnd>, XD, VEX_W, EVEX_CD8<64, CD8VF>; -// cvttpd2udq (src, 0, mask-all-ones, sae-current) -def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src), - (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)), - (VCVTTPD2UDQZrr VR512:$src)>; +defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtps2UInt, + X86cvtps2UIntRnd>, + PS, EVEX_CD8<32, CD8VF>; +defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtpd2UInt, + X86cvtpd2UIntRnd>, VEX_W, + PS, EVEX_CD8<64, CD8VF>; -defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, - memopv4i64, f256mem, v8f64, v8i32, - SSEPackedDouble>, EVEX_V512, XS, - EVEX_CD8<32, CD8VH>; +defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtpd2Int, + X86cvtpd2IntRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; -defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, - memopv16i32, f512mem, v16f32, v16i32, - SSEPackedSingle>, EVEX_V512, XD, - EVEX_CD8<32, CD8VF>; +defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtps2Int, + X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VH>; -def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), - (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr +defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtpd2UInt, + X86cvtpd2UIntRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; + +defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtps2UInt, + X86cvtps2UIntRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint, + X86VFpToSlongRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, + X86VFpToSlongRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint, + X86VFpToUlongRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, + X86VFpToUlongRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, + X86VSlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>; + +defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, + X86VUlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>; + +defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, + X86VSlongToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>; + +defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, + X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>; + +let Predicates = [HasAVX512, NoVLX] in { +def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), + (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), @@ -4071,67 +5515,8 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>; - -def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)), - (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>; -def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VCVTDQ2PDZrr VR256X:$src)>; -def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)), - (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>; -def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VCVTUDQ2PDZrr VR256X:$src)>; - -multiclass avx512_vcvt_fp2int opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, PatFrag mem_frag, - X86MemOperand x86memop, Domain d> { -let hasSideEffects = 0 in { - def rr : AVX512PI, EVEX; - def rrb : AVX512PI, EVEX, EVEX_B, EVEX_RC; - let mayLoad = 1 in - def rm : AVX512PI, EVEX; -} // hasSideEffects = 0 } -defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512, - memopv16f32, f512mem, SSEPackedSingle>, PD, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X, - memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W, - EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1), imm:$rc)), - (VCVTPS2DQZrrb VR512:$src, imm:$rc)>; - -def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src), - (v8i32 immAllZerosV), (i8 -1), imm:$rc)), - (VCVTPD2DQZrrb VR512:$src, imm:$rc)>; - -defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512, - memopv16f32, f512mem, SSEPackedSingle>, - PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X, - memopv8f64, f512mem, SSEPackedDouble>, VEX_W, - PS, EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1), imm:$rc)), - (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>; - -def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src), - (v8i32 immAllZerosV), (i8 -1), imm:$rc)), - (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>; - let Predicates = [HasAVX512] in { def : Pat<(v8f32 (fround (loadv8f64 addr:$src))), (VCVTPD2PSZrm addr:$src)>; @@ -4142,40 +5527,102 @@ let Predicates = [HasAVX512] in { //===----------------------------------------------------------------------===// // Half precision conversion instructions //===----------------------------------------------------------------------===// -multiclass avx512_cvtph2ps { - def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", - []>, EVEX; - let hasSideEffects = 0, mayLoad = 1 in - def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX; -} - -multiclass avx512_cvtps2ph { - def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst), - (ins srcRC:$src1, i32i8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX; - let hasSideEffects = 0, mayStore = 1 in - def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), - (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX; +multiclass avx512_cvtph2ps { + defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), + "vcvtph2ps", "$src", "$src", + (X86cvtph2ps (_src.VT _src.RC:$src), + (i32 FROUND_CURRENT))>, T8PD; + let hasSideEffects = 0, mayLoad = 1 in { + defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src), + "vcvtph2ps", "$src", "$src", + (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))), + (i32 FROUND_CURRENT))>, T8PD; + } +} + +multiclass avx512_cvtph2ps_sae { + defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), + "vcvtph2ps", "{sae}, $src", "$src, {sae}", + (X86cvtph2ps (_src.VT _src.RC:$src), + (i32 FROUND_NO_EXC))>, T8PD, EVEX_B; + +} + +let Predicates = [HasAVX512] in { + defm VCVTPH2PSZ : avx512_cvtph2ps, + avx512_cvtph2ps_sae, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; + let Predicates = [HasVLX] in { + defm VCVTPH2PSZ256 : avx512_cvtph2ps,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; + defm VCVTPH2PSZ128 : avx512_cvtph2ps, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; + } } -defm VCVTPH2PSZ : avx512_cvtph2ps, EVEX_V512, - EVEX_CD8<32, CD8VH>; -defm VCVTPS2PHZ : avx512_cvtps2ph, EVEX_V512, - EVEX_CD8<32, CD8VH>; +multiclass avx512_cvtps2ph { + defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), + (ins _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph", "$src2, $src1", "$src1, $src2", + (X86cvtps2ph (_src.VT _src.RC:$src1), + (i32 imm:$src2), + (i32 FROUND_CURRENT))>, AVX512AIi8Base; + let hasSideEffects = 0, mayStore = 1 in { + def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1), + (i32 imm:$src2), (i32 FROUND_CURRENT) )), + addr:$dst)]>; + def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", + []>, EVEX_K; + } +} +multiclass avx512_cvtps2ph_sae { + defm rb : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), + (ins _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph", "$src2, {sae}, $src1", "$src1, $src2, {sae}", + (X86cvtps2ph (_src.VT _src.RC:$src1), + (i32 imm:$src2), + (i32 FROUND_NO_EXC))>, EVEX_B, AVX512AIi8Base; +} +let Predicates = [HasAVX512] in { + defm VCVTPS2PHZ : avx512_cvtps2ph, + avx512_cvtps2ph_sae, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; + let Predicates = [HasVLX] in { + defm VCVTPS2PHZ256 : avx512_cvtps2ph, + EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; + defm VCVTPS2PHZ128 : avx512_cvtps2ph, + EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; + } +} -def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src), - imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))), - (VCVTPS2PHZrr VR512:$src, imm:$rc)>; +// Unordered/Ordered scalar fp compare with Sea and set EFLAGS +multiclass avx512_ord_cmp_sae opc, X86VectorVTInfo _, SDNode OpNode, + string OpcodeStr> { + def rb: AVX512, EVEX, EVEX_B, VEX_LIG, EVEX_V128, + Sched<[WriteFAdd]>; +} -def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src), - (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))), - (VCVTPH2PSZrr VR256X:$src)>; +let Defs = [EFLAGS], Predicates = [HasAVX512] in { + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">, + AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">, + AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">, + AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">, + AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; +} let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, @@ -4185,10 +5632,10 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { "ucomisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { - defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, + defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, "comiss">, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, + defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, "comisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } @@ -4210,50 +5657,31 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { } /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd -multiclass avx512_fp14_s opc, string OpcodeStr, RegisterClass RC, - X86MemOperand x86memop> { - let hasSideEffects = 0 in { - def rr : AVX5128I, EVEX_4V; +multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let hasSideEffects = 0, AddedComplexity = 20 , Predicates = [HasAVX512] in { + defm rr : AVX512_maskable_scalar, EVEX_4V; let mayLoad = 1 in { - def rm : AVX5128I, EVEX_4V; + defm rm : AVX512_maskable_scalar, EVEX_4V; } } } -defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>, - EVEX_CD8<32, CD8VT1>; -defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>, - VEX_W, EVEX_CD8<64, CD8VT1>; -defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>, - EVEX_CD8<32, CD8VT1>; -defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>, - VEX_W, EVEX_CD8<64, CD8VT1>; - -def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1), - (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X), - (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; - -def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1), - (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X), - (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; - -def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1), - (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X), - (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; - -def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1), - (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X), - (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>, + EVEX_CD8<32, CD8VT1>, T8PD; +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>, + VEX_W, EVEX_CD8<64, CD8VT1>, T8PD; +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>, + EVEX_CD8<32, CD8VT1>, T8PD; +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>, + VEX_W, EVEX_CD8<64, CD8VT1>, T8PD; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, @@ -4301,20 +5729,6 @@ multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode> { defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>; defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>; -def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))), - (VRSQRT14PSZr VR512:$src)>; -def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VRSQRT14PDZr VR512:$src)>; - -def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))), - (VRCP14PSZr VR512:$src)>; -def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VRCP14PDZr VR512:$src)>; - /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode> { @@ -4327,9 +5741,9 @@ multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, defm rb : AVX512_maskable_scalar, EVEX_B; + (i32 FROUND_NO_EXC))>, EVEX_B; defm m : AVX512_maskable_scalar, T8PD, EVEX_4V; defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V; } + +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, @@ -4359,12 +5775,6 @@ multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, (ins _.RC:$src), OpcodeStr, "$src", "$src", (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>; - defm rb : AVX512_maskable, EVEX_B; - defm m : AVX512_maskable opc, string OpcodeStr, X86VectorVTInfo _, (i32 FROUND_CURRENT))>; defm mb : AVX512_maskable, EVEX_B; } +multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, + SDNode OpNode> { + defm rb : AVX512_maskable, EVEX_B; +} multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode> { defm PS : avx512_fp28_p, - EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round, + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PD : avx512_fp28_p, - VEX_W, EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round, + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } +multiclass avx512_fp_unaryop_packed opc, string OpcodeStr, + SDNode OpNode> { + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp28_p, + EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp28_p, + EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp28_p, + EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp28_p, + EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + } +} let Predicates = [HasERI], hasSideEffects = 0 in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX; +} +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>, + avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX; + +multiclass avx512_sqrt_packed_round opc, string OpcodeStr, + SDNode OpNodeRnd, X86VectorVTInfo _>{ + defm rb: AVX512_maskable, + EVEX, EVEX_B, EVEX_RC; } multiclass avx512_sqrt_packed opc, string OpcodeStr, @@ -4412,67 +5856,6 @@ multiclass avx512_sqrt_packed opc, string OpcodeStr, } } -multiclass avx512_sqrt_scalar opc, string OpcodeStr, - Intrinsic F32Int, Intrinsic F64Int, - OpndItins itins_s, OpndItins itins_d> { - def SSZr : SI, XS, EVEX_4V; - let isCodeGenOnly = 1 in - def SSZr_Int : SIi8, XS, EVEX_4V; - let mayLoad = 1 in { - def SSZm : SI, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; - let isCodeGenOnly = 1 in - def SSZm_Int : SIi8, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; - } - def SDZr : SI, - XD, EVEX_4V, VEX_W; - let isCodeGenOnly = 1 in - def SDZr_Int : SIi8, XD, EVEX_4V, VEX_W; - let mayLoad = 1 in { - def SDZm : SI, - XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; - let isCodeGenOnly = 1 in - def SDZm_Int : SIi8, - XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; - } -} - multiclass avx512_sqrt_packed_all opc, string OpcodeStr, SDNode OpNode> { defm PSZ : avx512_sqrt_packed opc, string OpcodeStr, } } -defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>; +multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr, + SDNode OpNodeRnd> { + defm PSZ : avx512_sqrt_packed_round, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_sqrt_packed_round, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; +} -defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", - int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, - SSE_SQRTSS, SSE_SQRTSD>; +multiclass avx512_sqrt_scalar opc, string OpcodeStr,X86VectorVTInfo _, + string SUFF, SDNode OpNode, SDNode OpNodeRnd> { -let Predicates = [HasAVX512] in { - def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), - (VSQRTPSZr VR512:$src1)>; - def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), - (VSQRTPDZr VR512:$src1)>; - - def : Pat<(f32 (fsqrt FR32X:$src)), - (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; - def : Pat<(f32 (fsqrt (load addr:$src))), - (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; - def : Pat<(f64 (fsqrt FR64X:$src)), - (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>; - def : Pat<(f64 (fsqrt (load addr:$src))), - (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; + defm r_Int : AVX512_maskable_scalar; + let mayLoad = 1 in + defm m_Int : AVX512_maskable_scalar; + + defm rb_Int : AVX512_maskable_scalar, + EVEX_B, EVEX_RC; - def : Pat<(f32 (X86frsqrt FR32X:$src)), - (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>; - def : Pat<(f32 (X86frsqrt (load addr:$src))), - (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; + let isCodeGenOnly = 1 in { + def r : I; - def : Pat<(f32 (X86frcp FR32X:$src)), - (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>; - def : Pat<(f32 (X86frcp (load addr:$src))), - (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; + let mayLoad = 1 in + def m : I; + } - def : Pat<(int_x86_sse_sqrt_ss VR128X:$src), - (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)), - (COPY_TO_REGCLASS VR128X:$src, FR32)), - VR128X)>; - def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), - (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; - - def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src), - (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)), - (COPY_TO_REGCLASS VR128X:$src, FR64)), - VR128X)>; - def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), - (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; -} - - -multiclass avx512_rndscale opc, string OpcodeStr, - X86MemOperand x86memop, RegisterClass RC, - PatFrag mem_frag, Domain d> { -let ExeDomain = d in { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def r : AVX512AIi8, EVEX; + def : Pat<(_.EltVT (OpNode _.FRC:$src)), + (!cast(NAME#SUFF#Zr) + (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; - // Vector intrinsic operation, mem - def m : AVX512AIi8, EVEX; -} // ExeDomain + def : Pat<(_.EltVT (OpNode (load addr:$src))), + (!cast(NAME#SUFF#Zm) + (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>; } +multiclass avx512_sqrt_scalar_all opc, string OpcodeStr> { + defm SSZ : avx512_sqrt_scalar, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; + defm SDZ : avx512_sqrt_scalar, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; +} -defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512, - memopv16f32, SSEPackedSingle>, EVEX_V512, - EVEX_CD8<32, CD8VF>; +defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, + avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>; -def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1), - imm:$src2, (v16f32 VR512:$src1), (i16 -1), - FROUND_CURRENT)), - (VRNDSCALEPSZr VR512:$src1, imm:$src2)>; +defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG; +let Predicates = [HasAVX512] in { + def : Pat<(f32 (X86frsqrt FR32X:$src)), + (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>; + def : Pat<(f32 (X86frsqrt (load addr:$src))), + (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, + Requires<[OptForSize]>; + def : Pat<(f32 (X86frcp FR32X:$src)), + (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>; + def : Pat<(f32 (X86frcp (load addr:$src))), + (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, + Requires<[OptForSize]>; +} -defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512, - memopv8f64, SSEPackedDouble>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; +multiclass +avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { -def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1), - imm:$src2, (v8f64 VR512:$src1), (i8 -1), - FROUND_CURRENT)), - (VRNDSCALEPDZr VR512:$src1, imm:$src2)>; + let ExeDomain = _.ExeDomain in { + defm r : AVX512_maskable_scalar; -multiclass avx512_rndscale_scalar opc, string OpcodeStr, - Operand x86memop, RegisterClass RC, Domain d> { -let ExeDomain = d in { - def r : AVX512AIi8, EVEX_4V; + defm rb : AVX512_maskable_scalar, EVEX_B; - def m : AVX512AIi8, EVEX_4V; -} // ExeDomain -} - -defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X, - SSEPackedSingle>, EVEX_CD8<32, CD8VT1>; - -defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X, - SSEPackedDouble>, EVEX_CD8<64, CD8VT1>; - -def : Pat<(ffloor FR32X:$src), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>; -def : Pat<(f64 (ffloor FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>; -def : Pat<(f32 (fnearbyint FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>; -def : Pat<(f64 (fnearbyint FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>; -def : Pat<(f32 (fceil FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>; -def : Pat<(f64 (fceil FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>; -def : Pat<(f32 (frint FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>; -def : Pat<(f64 (frint FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>; -def : Pat<(f32 (ftrunc FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>; -def : Pat<(f64 (ftrunc FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>; + let mayLoad = 1 in + defm m : AVX512_maskable_scalar; + } + let Predicates = [HasAVX512] in { + def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS + (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>; + def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS + (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>; + def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS + (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>; + def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS + (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>; + def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS + (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>; + + def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS + (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), + addr:$src, (i32 0x1))), _.FRC)>; + def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS + (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), + addr:$src, (i32 0x2))), _.FRC)>; + def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS + (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), + addr:$src, (i32 0x3))), _.FRC)>; + def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS + (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), + addr:$src, (i32 0x4))), _.FRC)>; + def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS + (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), + addr:$src, (i32 0xc))), _.FRC)>; + } +} -def : Pat<(v16f32 (ffloor VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x1))>; -def : Pat<(v16f32 (fnearbyint VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0xC))>; -def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x2))>; -def : Pat<(v16f32 (frint VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x4))>; -def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x3))>; +defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>; -def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x1))>; -def : Pat<(v8f64 (fnearbyint VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0xC))>; -def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x2))>; -def : Pat<(v8f64 (frint VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x4))>; -def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x3))>; +defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>; //------------------------------------------------- // Integer truncate and extend operations //------------------------------------------------- -multiclass avx512_trunc_sat opc, string OpcodeStr, - RegisterClass dstRC, RegisterClass srcRC, - RegisterClass KRC, X86MemOperand x86memop> { - def rr : AVX512XS8I opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo, + X86MemOperand x86memop> { + + defm rr : AVX512_maskable, + EVEX, T8XS; + + // for intrinsic patter match + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + undef)), + (!cast(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + DestInfo.ImmAllZerosV)), + (!cast(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + DestInfo.RC:$src0)), + (!cast(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0, + DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + let mayStore = 1 in { + def mr : AVX512XS8I, EVEX; - def rrk : AVX512XS8I, EVEX, EVEX_K; + }//mayStore = 1 +} - def rrkz : AVX512XS8I, EVEX, EVEX_KZ; +multiclass avx512_trunc_mr_lowering { - def mr : AVX512XS8I, EVEX; + def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), + (!cast(NAME#SrcInfo.ZSuffix##mr) + addr:$dst, SrcInfo.RC:$src)>; - def mrk : AVX512XS8I, EVEX, EVEX_K; + def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask, + (SrcInfo.VT SrcInfo.RC:$src)), + (!cast(NAME#SrcInfo.ZSuffix##mrk) + addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; +} + +multiclass avx512_trunc_sat_mr_lowering { + + def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# + DestInfo.Suffix#"_mem_"#SrcInfo.Size) + addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask), + (!cast(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr, + (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM), + (SrcInfo.VT SrcInfo.RC:$src))>; + + def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# + DestInfo.Suffix#"_mem_"#SrcInfo.Size) + addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1), + (!cast(NAME#SrcInfo.ZSuffix##mr) addr:$ptr, + (SrcInfo.VT SrcInfo.RC:$src))>; +} + +multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, + X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, + X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, + X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag, + Predicate prd = HasAVX512>{ + + let Predicates = [HasVLX, prd] in { + defm Z128: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V128; + + defm Z256: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V256; + } + let Predicates = [prd] in + defm Z: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V512; +} + +multiclass avx512_trunc_sat opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, + X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, + X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, + X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{ + + let Predicates = [HasVLX, prd] in { + defm Z128: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V128; + + defm Z256: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V256; + } + let Predicates = [prd] in + defm Z: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V512; +} + +multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; +} +multiclass avx512_trunc_sat_qb opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VO>; +} +multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; } -defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; - -def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; -def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; -def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; -def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; -def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; - -def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>; -def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>; -def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>; -def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>; - - -multiclass avx512_extend opc, string OpcodeStr, RegisterClass KRC, - RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode, - PatFrag mem_frag, X86MemOperand x86memop, - ValueType OpVT, ValueType InVT> { - - def rr : AVX5128I, EVEX; - - def rrk : AVX5128I, EVEX, EVEX_K; - - def rrkz : AVX5128I, EVEX, EVEX_KZ; +multiclass avx512_trunc_sat_qw opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VQ>; +} + +multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; +} +multiclass avx512_trunc_sat_qd opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<32, CD8VH>; +} + +multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; +} +multiclass avx512_trunc_sat_db opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VQ>; +} + +multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; +} +multiclass avx512_trunc_sat_dw opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; +} + +multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; +} +multiclass avx512_trunc_sat_wb opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; +} + +defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>; +defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>; +defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>; + +defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>; +defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>; +defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>; + +defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>; +defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>; +defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>; + +defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>; +defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>; +defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>; + +defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>; +defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>; +defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>; + +defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>; +defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>; +defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>; + +let Predicates = [HasAVX512, NoVLX] in { +def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))), + (v8i16 (EXTRACT_SUBREG + (v16i16 (VPMOVDWZrr (v16i32 (SUBREG_TO_REG (i32 0), + VR256X:$src, sub_ymm)))), sub_xmm))>; +def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))), + (v4i32 (EXTRACT_SUBREG + (v8i32 (VPMOVQDZrr (v8i64 (SUBREG_TO_REG (i32 0), + VR256X:$src, sub_ymm)))), sub_xmm))>; +} + +let Predicates = [HasBWI, NoVLX] in { +def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))), + (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (SUBREG_TO_REG (i32 0), + VR256X:$src, sub_ymm))), sub_xmm))>; +} + +multiclass avx512_extend_common opc, string OpcodeStr, + X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, + X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ + + defm rr : AVX512_maskable, + EVEX; let mayLoad = 1 in { - def rm : AVX5128I, - EVEX; + defm rm : AVX512_maskable, + EVEX; + } +} - def rmk : AVX5128I, - EVEX, EVEX_K; - - def rmkz : AVX5128I, - EVEX, EVEX_KZ; - } -} - -defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext, - memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VQ>; -defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext, - memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VO>; -defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext, - memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, - EVEX_CD8<16, CD8VH>; -defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext, - memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, - EVEX_CD8<16, CD8VQ>; -defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext, - memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, - EVEX_CD8<32, CD8VH>; - -defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext, - memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VQ>; -defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext, - memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VO>; -defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext, - memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, - EVEX_CD8<16, CD8VH>; -defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext, - memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, - EVEX_CD8<16, CD8VQ>; -defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext, - memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, - EVEX_CD8<32, CD8VH>; +multiclass avx512_extend_BW opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + let Predicates = [HasVLX, HasBWI] in { + defm Z128: avx512_extend_common, + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128; -//===----------------------------------------------------------------------===// -// GATHER - SCATTER Operations + defm Z256: avx512_extend_common, + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256; + } + let Predicates = [HasBWI] in { + defm Z : avx512_extend_common, + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512; + } +} -multiclass avx512_gather opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand memop> { -let mayLoad = 1, - Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in - def rm : AVX5128I, EVEX, EVEX_K; +multiclass avx512_extend_BD opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512; + } } -let ExeDomain = SSEPackedDouble in { -defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +multiclass avx512_extend_BQ opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512; + } } -let ExeDomain = SSEPackedSingle in { -defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; -defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; -} - -defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -multiclass avx512_scatter opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand memop> { -let mayStore = 1, Constraints = "$mask = $mask_wb" in - def mr : AVX5128I opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_WQ opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_DQ opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi32")> { + + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; + } +} + +defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">; +defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">; +defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">; +defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">; +defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">; +defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">; + + +defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">; +defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">; +defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">; +defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">; +defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">; +defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">; + +//===----------------------------------------------------------------------===// +// GATHER - SCATTER Operations + +multiclass avx512_gather opc, string OpcodeStr, X86VectorVTInfo _, + X86MemOperand memop, PatFrag GatherNode> { + let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", + ExeDomain = _.ExeDomain in + def rm : AVX5128I, EVEX, EVEX_K; + [(set _.RC:$dst, _.KRCWM:$mask_wb, + (GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask, + vectoraddr:$src2))]>, EVEX, EVEX_K, + EVEX_CD8<_.EltSize, CD8VT1>; +} + +multiclass avx512_gather_q_pd dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_gather, EVEX_V512, VEX_W; + defm NAME##Q##SUFF##Z: avx512_gather, EVEX_V512, VEX_W; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_gather, EVEX_V256, VEX_W; + defm NAME##Q##SUFF##Z256: avx512_gather, EVEX_V256, VEX_W; + defm NAME##D##SUFF##Z128: avx512_gather, EVEX_V128, VEX_W; + defm NAME##Q##SUFF##Z128: avx512_gather, EVEX_V128, VEX_W; +} +} + +multiclass avx512_gather_d_ps dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_gather, EVEX_V512; + defm NAME##Q##SUFF##Z: avx512_gather, EVEX_V512; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_gather, EVEX_V256; + defm NAME##Q##SUFF##Z256: avx512_gather, EVEX_V256; + defm NAME##D##SUFF##Z128: avx512_gather, EVEX_V128; + defm NAME##Q##SUFF##Z128: avx512_gather, EVEX_V128; +} } -let ExeDomain = SSEPackedDouble in { -defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; + +defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, + avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; + +defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, + avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; + +multiclass avx512_scatter opc, string OpcodeStr, X86VectorVTInfo _, + X86MemOperand memop, PatFrag ScatterNode> { + +let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in + + def mr : AVX5128I, + EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; } -let ExeDomain = SSEPackedSingle in { -defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; -defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; +multiclass avx512_scatter_q_pd dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_scatter, EVEX_V512, VEX_W; + defm NAME##Q##SUFF##Z: avx512_scatter, EVEX_V512, VEX_W; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_scatter, EVEX_V256, VEX_W; + defm NAME##Q##SUFF##Z256: avx512_scatter, EVEX_V256, VEX_W; + defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128, VEX_W; + defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128, VEX_W; +} +} + +multiclass avx512_scatter_d_ps dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_scatter, EVEX_V512; + defm NAME##Q##SUFF##Z: avx512_scatter, EVEX_V512; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_scatter, EVEX_V256; + defm NAME##Q##SUFF##Z256: avx512_scatter, EVEX_V256; + defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128; + defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128; +} } -defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; +defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, + avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; -defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; +defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, + avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; // prefetch multiclass avx512_gather_scatter_prefetch opc, Format F, string OpcodeStr, @@ -4941,256 +6571,24 @@ defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -//===----------------------------------------------------------------------===// -// VSHUFPS - VSHUFPD Operations - -multiclass avx512_shufp { - def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, u8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, - EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>; - def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, u8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, - EVEX_4V, Sched<[WriteShuffle]>; -} - -defm VSHUFPSZ : avx512_shufp, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VSHUFPDZ : avx512_shufp, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>; -def : Pat<(v16i32 (X86Shufp VR512:$src1, - (memopv16i32 addr:$src2), (i8 imm:$imm))), - (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>; - -def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>; -def : Pat<(v8i64 (X86Shufp VR512:$src1, - (memopv8i64 addr:$src2), (i8 imm:$imm))), - (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; - -multiclass avx512_valign { - defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), - "valign"##_.Suffix, - "$src3, $src2, $src1", "$src1, $src2, $src3", - (_.VT (X86VAlign _.RC:$src2, _.RC:$src1, - (i8 imm:$src3)))>, - AVX512AIi8Base, EVEX_4V; - - // Also match valign of packed floats. - def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))), - (!cast(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>; - - let mayLoad = 1 in - def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst), - (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), - !strconcat("valign"##_.Suffix, - "\t{$src3, $src2, $src1, $dst|" - "$dst, $src1, $src2, $src3}"), - []>, EVEX_4V; -} -defm VALIGND : avx512_valign, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VALIGNQ : avx512_valign, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; // Helper fragments to match sext vXi1 to vXiY. def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; -multiclass avx512_vpabs opc, string OpcodeStr, ValueType OpVT, - RegisterClass KRC, RegisterClass RC, - X86MemOperand x86memop, X86MemOperand x86scalar_mop, - string BrdcstStr> { - def rr : AVX5128I, EVEX; - def rrk : AVX5128I, EVEX, EVEX_K; - def rrkz : AVX5128I, EVEX, EVEX_KZ; - let mayLoad = 1 in { - def rm : AVX5128I, EVEX; - def rmk : AVX5128I, EVEX, EVEX_K; - def rmkz : AVX5128I, EVEX, EVEX_KZ; - def rmb : AVX5128I, EVEX, EVEX_B; - def rmbk : AVX5128I, EVEX, EVEX_B, EVEX_K; - def rmbkz : AVX5128I, EVEX, EVEX_B, EVEX_KZ; - } -} - -defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512, - i512mem, i32mem, "{1to16}">, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512, - i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -def : Pat<(xor - (bc_v16i32 (v16i1sextv16i32)), - (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), - (VPABSDZrr VR512:$src)>; -def : Pat<(xor - (bc_v8i64 (v8i1sextv8i64)), - (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), - (VPABSQZrr VR512:$src)>; - -def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1))), - (VPABSDZrr VR512:$src)>; -def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src), - (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPABSQZrr VR512:$src)>; - -multiclass avx512_conflict opc, string OpcodeStr, - RegisterClass RC, RegisterClass KRC, - X86MemOperand x86memop, - X86MemOperand x86scalar_mop, string BrdcstStr> { - def rr : AVX5128I, EVEX; - def rm : AVX5128I, EVEX; - def rmb : AVX5128I, EVEX, EVEX_B; - def rrkz : AVX5128I, EVEX, EVEX_KZ; - def rmkz : AVX5128I, EVEX, EVEX_KZ; - def rmbkz : AVX5128I, EVEX, EVEX_KZ, EVEX_B; - - let Constraints = "$src1 = $dst" in { - def rrk : AVX5128I, EVEX, EVEX_K; - def rmk : AVX5128I, EVEX, EVEX_K; - def rmbk : AVX5128I, EVEX, EVEX_K, EVEX_B; - } -} - -let Predicates = [HasCDI] in { -defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM, - i512mem, i32mem, "{1to16}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; - - -defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM, - i512mem, i64mem, "{1to8}">, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -} - -def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1, - GR16:$mask), - (VPCONFLICTDrrk VR512:$src1, - (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>; - -def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1, - GR8:$mask), - (VPCONFLICTQrrk VR512:$src1, - (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; - -let Predicates = [HasCDI] in { -defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM, - i512mem, i32mem, "{1to16}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; - - -defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM, - i512mem, i64mem, "{1to8}">, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -} - -def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1, - GR16:$mask), - (VPLZCNTDrrk VR512:$src1, - (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>; - -def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1, - GR8:$mask), - (VPLZCNTQrrk VR512:$src1, - (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; - -def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))), - (VPLZCNTDrm addr:$src)>; -def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))), - (VPLZCNTDrr VR512:$src)>; -def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))), - (VPLZCNTQrm addr:$src)>; -def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))), - (VPLZCNTQrr VR512:$src)>; - -def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; -def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; -def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; +def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; +def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; +def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; def : Pat<(store VK1:$src, addr:$dst), - (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>; + (MOV8mr addr:$dst, + (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), + sub_8bit))>, Requires<[HasAVX512, NoDQI]>; + +def : Pat<(store VK8:$src, addr:$dst), + (MOV8mr addr:$dst, + (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), + sub_8bit))>, Requires<[HasAVX512, NoDQI]>; def truncstorei1 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr), [{ @@ -5201,7 +6599,7 @@ def : Pat<(truncstorei1 GR8:$src, addr:$dst), (MOV8mr addr:$dst, GR8:$src)>; multiclass cvt_by_vec_width opc, X86VectorVTInfo Vec, string OpcodeStr > { -def rr : AVX512XS8I, EVEX; } @@ -5230,29 +6628,56 @@ multiclass avx512_convert_mask_to_vector { defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">; +multiclass convert_vector_to_mask_common opc, X86VectorVTInfo _, string OpcodeStr > { +def rr : AVX512XS8I, EVEX; +} + +multiclass avx512_convert_vector_to_mask opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { +let Predicates = [prd] in + defm Z : convert_vector_to_mask_common , + EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : convert_vector_to_mask_common, + EVEX_V256; + defm Z128 : convert_vector_to_mask_common, + EVEX_V128; + } +} + +defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", + avx512vl_i8_info, HasBWI>; +defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", + avx512vl_i16_info, HasBWI>, VEX_W; +defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", + avx512vl_i32_info, HasDQI>; +defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", + avx512vl_i64_info, HasDQI>, VEX_W; + //===----------------------------------------------------------------------===// // AVX-512 - COMPRESS and EXPAND // + multiclass compress_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { - def rrkz : AVX5128I, EVEX_KZ; - - let Constraints = "$src0 = $dst" in - def rrk : AVX5128I, EVEX_K; + defm rr : AVX512_maskable, AVX5128IBase; let mayStore = 1 in { + def mr : AVX5128I, EVEX_CD8<_.EltSize, CD8VT1>; + def mrk : AVX5128I, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; } @@ -5280,38 +6705,16 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info // expand multiclass expand_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { - def rrkz : AVX5128I, EVEX_KZ; - - let Constraints = "$src0 = $dst" in - def rrk : AVX5128I, EVEX_K; + defm rr : AVX512_maskable, AVX5128IBase; - let mayLoad = 1, Constraints = "$src0 = $dst" in - def rmk : AVX5128I, - EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; - let mayLoad = 1 in - def rmkz : AVX5128I, - EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>; - + defm rm : AVX512_maskable, + AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>; } multiclass expand_by_elt_width opc, string OpcodeStr, @@ -5332,3 +6735,789 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>, EVEX; defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, EVEX, VEX_W; + +//handle instruction reg_vec1 = op(reg_vec,imm) +// op(mem_vec,imm) +// op(broadcast(eltVt),imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable; + let mayLoad = 1 in { + defm rmi : AVX512_maskable; + defm rmbi : AVX512_maskable, EVEX_B; + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ + defm rrib : AVX512_maskable, EVEX_B; +} + +multiclass avx512_common_unary_fp_sae_packed_imm opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z : avx512_unary_fp_packed_imm, + avx512_unary_fp_sae_packed_imm, + EVEX_V512; + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_unary_fp_packed_imm, + EVEX_V128; + defm Z256 : avx512_unary_fp_packed_imm, + EVEX_V256; + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +// op(reg_vec2,broadcast(eltVt),imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable; + let mayLoad = 1 in { + defm rmi : AVX512_maskable; + defm rmbi : AVX512_maskable, EVEX_B; + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{ + + defm rri : AVX512_maskable; + let mayLoad = 1 in + defm rmi : AVX512_maskable; +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +// op(reg_vec2,broadcast(eltVt),imm) +multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>: + avx512_3Op_rm_imm8{ + + let mayLoad = 1 in + defm rmbi : AVX512_maskable, EVEX_B; +} + +//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_scalar,imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + + defm rri : AVX512_maskable_scalar; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_scalar; + + let isAsmParserOnly = 1 in { + defm rmi_alt :AVX512_maskable_in_asm; + } + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ + defm rrib : AVX512_maskable, EVEX_B; +} +//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _> { + defm NAME#rrib : AVX512_maskable_scalar, EVEX_B; +} + +multiclass avx512_common_fp_sae_packed_imm opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z : avx512_fp_packed_imm, + avx512_fp_sae_packed_imm, + EVEX_V512; + + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_fp_packed_imm, + EVEX_V128; + defm Z256 : avx512_fp_packed_imm, + EVEX_V256; + } +} + +multiclass avx512_common_3Op_rm_imm8 opc, SDNode OpNode, string OpStr, + AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{ + let Predicates = [HasBWI] in { + defm Z : avx512_3Op_rm_imm8, EVEX_V512, AVX512AIi8Base, EVEX_4V; + } + let Predicates = [HasBWI, HasVLX] in { + defm Z128 : avx512_3Op_rm_imm8, EVEX_V128, AVX512AIi8Base, EVEX_4V; + defm Z256 : avx512_3Op_rm_imm8, EVEX_V256, AVX512AIi8Base, EVEX_4V; + } +} + +multiclass avx512_common_3Op_imm8 opc, SDNode OpNode>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8, EVEX_V512; + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_3Op_imm8, EVEX_V128; + defm Z256 : avx512_3Op_imm8, EVEX_V256; + } +} + +multiclass avx512_common_fp_sae_scalar_imm opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z128 : avx512_fp_scalar_imm, + avx512_fp_sae_scalar_imm; + } +} + +multiclass avx512_common_unary_fp_sae_packed_imm_all opcPs, bits<8> opcPd, SDNode OpNode, Predicate prd>{ + defm PS : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<32, CD8VF>; + defm PD : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<64, CD8VF>, VEX_W; +} + +defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd", + avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps", + avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + +defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info, + 0x55, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info, + 0x55, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, + X86VReduce, HasDQI>, AVX512AIi8Base, EVEX; +defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, + X86VRndScale, HasAVX512>, AVX512AIi8Base, EVEX; +defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, + X86VGetMant, HasAVX512>, AVX512AIi8Base, EVEX; + + +defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, + 0x50, X86VRange, HasDQI>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, + 0x50, X86VRange, HasDQI>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + +defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info, + 0x51, X86VRange, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, + 0x51, X86VRange, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, + 0x57, X86Reduces, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, + 0x57, X86Reduces, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, + 0x27, X86GetMants, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, + 0x27, X86GetMants, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +multiclass avx512_shuff_packed_128 opc, SDNode OpNode = X86Shuf128>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8, EVEX_V512; + + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_3Op_imm8, EVEX_V256; + } +} +let Predicates = [HasAVX512] in { +def : Pat<(v16f32 (ffloor VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>; +def : Pat<(v16f32 (fnearbyint VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>; +def : Pat<(v16f32 (fceil VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>; +def : Pat<(v16f32 (frint VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>; +def : Pat<(v16f32 (ftrunc VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>; + +def : Pat<(v8f64 (ffloor VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>; +def : Pat<(v8f64 (fnearbyint VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>; +def : Pat<(v8f64 (fceil VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>; +def : Pat<(v8f64 (frint VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>; +def : Pat<(v8f64 (ftrunc VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>; +} + +defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; + +multiclass avx512_valign { + defm NAME: avx512_common_3Op_imm8, + AVX512AIi8Base, EVEX_4V; +} + +defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>, + EVEX_CD8<32, CD8VF>; +defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>, + EVEX_CD8<64, CD8VF>, VEX_W; + +multiclass avx512_vpalign_lowering p>{ + let Predicates = p in + def NAME#_.VTName#rri: + Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))), + (!cast(NAME#_.ZSuffix#rri) + _.RC:$src1, _.RC:$src2, imm:$imm)>; +} + +multiclass avx512_vpalign_lowering_common: + avx512_vpalign_lowering<_.info512, [HasBWI]>, + avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>, + avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>; + +defm VPALIGN: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" , + avx512vl_i8_info, avx512vl_i8_info>, + avx512_vpalign_lowering_common, + avx512_vpalign_lowering_common, + avx512_vpalign_lowering_common, + avx512_vpalign_lowering_common, + avx512_vpalign_lowering_common, + EVEX_CD8<8, CD8VF>; + +defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" , + avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; + +multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr : AVX512_maskable, EVEX, AVX5128IBase; + + let mayLoad = 1 in + defm rm : AVX512_maskable, + EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>; +} + +multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> : + avx512_unary_rm { + let mayLoad = 1 in + defm rmb : AVX512_maskable, + EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; +} + +multiclass avx512_unary_rm_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_unary_rm, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_unary_rm, + EVEX_V256; + defm Z128 : avx512_unary_rm, + EVEX_V128; + } +} + +multiclass avx512_unary_rmb_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_unary_rmb, + EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_unary_rmb, + EVEX_V256; + defm Z128 : avx512_unary_rmb, + EVEX_V128; + } +} + +multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, + SDNode OpNode, Predicate prd> { + defm Q : avx512_unary_rmb_vl, VEX_W; + defm D : avx512_unary_rmb_vl; +} + +multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, + SDNode OpNode, Predicate prd> { + defm W : avx512_unary_rm_vl; + defm B : avx512_unary_rm_vl; +} + +multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, + bits<8> opc_d, bits<8> opc_q, + string OpcodeStr, SDNode OpNode> { + defm NAME : avx512_unary_rm_vl_dq, + avx512_unary_rm_vl_bw; +} + +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>; + +def : Pat<(xor + (bc_v16i32 (v16i1sextv16i32)), + (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), + (VPABSDZrr VR512:$src)>; +def : Pat<(xor + (bc_v8i64 (v8i1sextv8i64)), + (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), + (VPABSQZrr VR512:$src)>; + +multiclass avx512_ctlz opc, string OpcodeStr, Predicate prd>{ + + defm NAME : avx512_unary_rm_vl_dq; +} + +defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>; +defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>; + +//===---------------------------------------------------------------------===// +// Replicate Single FP - MOVSHDUP and MOVSLDUP +//===---------------------------------------------------------------------===// +multiclass avx512_replicate opc, string OpcodeStr, SDNode OpNode>{ + defm NAME: avx512_unary_rm_vl, XS; +} + +defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>; +defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>; + +//===----------------------------------------------------------------------===// +// AVX-512 - MOVDDUP +//===----------------------------------------------------------------------===// + +multiclass avx512_movddup_128 opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr : AVX512_maskable, EVEX; + let mayLoad = 1 in + defm rm : AVX512_maskable, + EVEX, EVEX_CD8<_.EltSize, CD8VH>; +} + +multiclass avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo> { + + defm Z : avx512_unary_rm, EVEX_V512; + + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_unary_rm, + EVEX_V256; + defm Z128 : avx512_movddup_128, + EVEX_V128; + } +} + +multiclass avx512_movddup opc, string OpcodeStr, SDNode OpNode>{ + defm NAME: avx512_movddup_common, XD, VEX_W; +} + +defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>; + +def : Pat<(X86Movddup (loadv2f64 addr:$src)), + (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>; +def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), + (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Unpack Instructions +//===----------------------------------------------------------------------===// +defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh>; +defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl>; + +defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, + SSE_INTALU_ITINS_P, HasBWI>; + +defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, + SSE_INTALU_ITINS_P, HasAVX512>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Extract & Insert Integer Instructions +//===----------------------------------------------------------------------===// + +multiclass avx512_extract_elt_bw_m opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let mayStore = 1 in + def mr : AVX512Ii8, + EVEX, EVEX_CD8<_.EltSize, CD8VT1>; +} + +multiclass avx512_extract_elt_b { + let Predicates = [HasBWI] in { + def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), + (ins _.RC:$src1, u8imm:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32orGR64:$dst, + (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, + EVEX, TAPD; + + defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; + } +} + +multiclass avx512_extract_elt_w { + let Predicates = [HasBWI] in { + def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), + (ins _.RC:$src1, u8imm:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32orGR64:$dst, + (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, + EVEX, PD; + + def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), + (ins _.RC:$src1, u8imm:$src2), + OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + EVEX, TAPD; + + defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; + } +} + +multiclass avx512_extract_elt_dq { + let Predicates = [HasDQI] in { + def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), + (ins _.RC:$src1, u8imm:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GRC:$dst, + (extractelt (_.VT _.RC:$src1), imm:$src2))]>, + EVEX, TAPD; + + let mayStore = 1 in + def mr : AVX512Ii8<0x16, MRMDestMem, (outs), + (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(store (extractelt (_.VT _.RC:$src1), + imm:$src2),addr:$dst)]>, + EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD; + } +} + +defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>; +defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>; +defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; +defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; + +multiclass avx512_insert_elt_m opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, PatFrag LdFrag> { + def rm : AVX512Ii8, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; +} + +multiclass avx512_insert_elt_bw opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, PatFrag LdFrag> { + let Predicates = [HasBWI] in { + def rr : AVX512Ii8, EVEX_4V; + + defm NAME : avx512_insert_elt_m; + } +} + +multiclass avx512_insert_elt_dq opc, string OpcodeStr, + X86VectorVTInfo _, RegisterClass GRC> { + let Predicates = [HasDQI] in { + def rr : AVX512Ii8, + EVEX_4V, TAPD; + + defm NAME : avx512_insert_elt_m, TAPD; + } +} + +defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, + extloadi8>, TAPD; +defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, + extloadi16>, PD; +defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; +defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; +//===----------------------------------------------------------------------===// +// VSHUFPS - VSHUFPD Operations +//===----------------------------------------------------------------------===// +multiclass avx512_shufp{ + defm NAME: avx512_common_3Op_imm8, + EVEX_CD8, + AVX512AIi8Base, EVEX_4V; +} + +defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; +defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; +//===----------------------------------------------------------------------===// +// AVX-512 - Byte shift Left/Right +//===----------------------------------------------------------------------===// + +multiclass avx512_shift_packed opc, SDNode OpNode, Format MRMr, + Format MRMm, string OpcodeStr, X86VectorVTInfo _>{ + def rr : AVX512; + let mayLoad = 1 in + def rm : AVX512; +} + +multiclass avx512_shift_packed_all opc, SDNode OpNode, Format MRMr, + Format MRMm, string OpcodeStr, Predicate prd>{ + let Predicates = [prd] in + defm Z512 : avx512_shift_packed, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_shift_packed, EVEX_V256; + defm Z128 : avx512_shift_packed, EVEX_V128; + } +} +defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", + HasBWI>, AVX512PDIi8Base, EVEX_4V; +defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", + HasBWI>, AVX512PDIi8Base, EVEX_4V; + + +multiclass avx512_psadbw_packed opc, SDNode OpNode, + string OpcodeStr, X86VectorVTInfo _dst, + X86VectorVTInfo _src>{ + def rr : AVX512BI; + let mayLoad = 1 in + def rm : AVX512BI; +} + +multiclass avx512_psadbw_packed_all opc, SDNode OpNode, + string OpcodeStr, Predicate prd> { + let Predicates = [prd] in + defm Z512 : avx512_psadbw_packed, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_psadbw_packed, EVEX_V256; + defm Z128 : avx512_psadbw_packed, EVEX_V128; + } +} + +defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", + HasBWI>, EVEX_4V; + +multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + let Constraints = "$src1 = $dst" in { + defm rri : AVX512_maskable_3src, AVX512AIi8Base, EVEX_4V; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_3src, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + defm rmbi : AVX512_maskable_3src, EVEX_B, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + } + }// Constraints = "$src1 = $dst" +} + +multiclass avx512_common_ternlog{ + let Predicates = [HasAVX512] in + defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128; + defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256; + } +} + +defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>; +defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W; +