X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrAVX512.td;h=8d66627c9271aef85ab5f2282ad4c685a6eb8461;hb=da135386a48b02596be55f9207caf144032a3d3b;hp=84990664d3f7a907bed1526350dccfaaa198e10b;hpb=af0e519127d28604923fd71d1fbc198e8c33feba;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 84990664d3f..8d66627c927 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -79,7 +79,7 @@ class X86VectorVTInfo O, Format F, X86VectorVTInfo _, OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>; +multiclass AVX512_maskable_3src_scalar O, Format F, X86VectorVTInfo _, + dag Outs, dag NonTiedIns, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS> : + AVX512_maskable_common; multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, @@ -727,7 +737,7 @@ let ExeDomain = SSEPackedDouble in { } // avx512_broadcast_pat introduces patterns for broadcast with a scalar argument. -// Later, we can canonize broadcast instructions before ISel phase and +// Later, we can canonize broadcast instructions before ISel phase and // eliminate additional patterns on ISel. // SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar // representations of source @@ -857,7 +867,7 @@ multiclass avx512_int_broadcast_rm opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), [(set DstRC:$dst, (OpVT (vselect KRC:$mask, - (X86VBroadcast (ld_frag addr:$src)), + (X86VBroadcast (ld_frag addr:$src)), (OpVT (bitconvert (v16i32 immAllZerosV))))))]>, EVEX, EVEX_KZ; } } @@ -874,8 +884,8 @@ multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, let mayLoad = 1 in { def rm : AVX5128I, EVEX; def rmk : AVX5128I OpcImm, bits<8> OpcVar, X86VectorVTInfo _, EVEX_4V; } } - -defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>, - EVEX_V512, VEX_W; -defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>, - EVEX_V512, VEX_W; - defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>, EVEX_V512; defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>, @@ -1063,150 +1067,88 @@ def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), (VPERMILPDZri VR512:$src1, imm:$imm)>; -// -- VPERM - register form -- -multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { - - def rr : AVX5128I, EVEX_4V; - - def rm : AVX5128I, - EVEX_4V; -} - -defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, loadv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, loadv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedSingle in -defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, loadv16f32, f512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, loadv8f64, f512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - // -- VPERM2I - 3 source operands form -- -multiclass avx512_perm_3src opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, - SDNode OpNode, ValueType OpVT, RegisterClass KRC> { +multiclass avx512_perm_3src opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _> { let Constraints = "$src1 = $dst" in { - def rr : AVX5128I, - EVEX_4V; - - def rrk : AVX5128I, - EVEX_4V, EVEX_K; - - let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> - def rrkz : AVX5128I, - EVEX_4V, EVEX_KZ; + defm rr: AVX512_maskable_3src, EVEX_4V, + AVX5128IBase; - def rm : AVX5128I, EVEX_4V; + let mayLoad = 1 in + defm rm: AVX512_maskable_3src, + EVEX_4V, AVX5128IBase; + } +} +multiclass avx512_perm_3src_mb opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _> { + let mayLoad = 1, Constraints = "$src1 = $dst" in + defm rmb: AVX512_maskable_3src, + AVX5128IBase, EVEX_4V, EVEX_B; +} - def rmk : AVX5128I, - EVEX_4V, EVEX_K; - - let AddedComplexity = 10 in // Prefer over the rrkz variant - def rmkz : AVX5128I, - EVEX_4V, EVEX_KZ; - } -} -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, loadv16i32, - i512mem, X86VPermiv3, v16i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, loadv8i64, - i512mem, X86VPermiv3, v8i64, VK8WM>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, loadv16f32, - i512mem, X86VPermiv3, v16f32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, loadv8f64, - i512mem, X86VPermiv3, v8f64, VK8WM>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_perm_table_3src opc, string Suffix, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, - SDNode OpNode, ValueType OpVT, RegisterClass KRC, - ValueType MaskVT, RegisterClass MRC> : - avx512_perm_3src { - def : Pat<(OpVT (!cast("int_x86_avx512_mask_vpermt_"##Suffix##"_512") - VR512:$idx, VR512:$src1, VR512:$src2, -1)), - (!cast(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>; - - def : Pat<(OpVT (!cast("int_x86_avx512_mask_vpermt_"##Suffix##"_512") - VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)), - (!cast(NAME#rrk) VR512:$src1, - (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>; -} - -defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, loadv16i32, i512mem, - X86VPermv3, v16i32, VK16WM, v16i1, GR16>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, loadv8i64, i512mem, - X86VPermv3, v8i64, VK8WM, v8i1, GR8>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, loadv16f32, i512mem, - X86VPermv3, v16f32, VK16WM, v16i1, GR16>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, loadv8f64, i512mem, - X86VPermv3, v8f64, VK8WM, v8i1, GR8>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +multiclass avx512_perm_3src_sizes opc, string OpcodeStr, + SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasAVX512] in + defm NAME: avx512_perm_3src, + avx512_perm_3src_mb, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#128: avx512_perm_3src, + avx512_perm_3src_mb, + EVEX_V128; + defm NAME#256: avx512_perm_3src, + avx512_perm_3src_mb, + EVEX_V256; + } +} +multiclass avx512_perm_3src_sizes_w opc, string OpcodeStr, + SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasBWI] in + defm NAME: avx512_perm_3src, + avx512_perm_3src_mb, + EVEX_V512; + let Predicates = [HasBWI, HasVLX] in { + defm NAME#128: avx512_perm_3src, + avx512_perm_3src_mb, + EVEX_V128; + defm NAME#256: avx512_perm_3src, + avx512_perm_3src_mb, + EVEX_V256; + } +} +defm VPERMI2D : avx512_perm_3src_sizes<0x76, "vpermi2d", X86VPermiv3, + avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_3src_sizes<0x76, "vpermi2q", X86VPermiv3, + avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2PS : avx512_perm_3src_sizes<0x77, "vpermi2ps", X86VPermiv3, + avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_3src_sizes<0x77, "vpermi2pd", X86VPermiv3, + avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPERMT2D : avx512_perm_3src_sizes<0x7E, "vpermt2d", X86VPermv3, + avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2Q : avx512_perm_3src_sizes<0x7E, "vpermt2q", X86VPermv3, + avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMT2PS : avx512_perm_3src_sizes<0x7F, "vpermt2ps", X86VPermv3, + avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2PD : avx512_perm_3src_sizes<0x7F, "vpermt2pd", X86VPermv3, + avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPERMT2W : avx512_perm_3src_sizes_w<0x7D, "vpermt2w", X86VPermv3, + avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMI2W : avx512_perm_3src_sizes_w<0x75, "vpermi2w", X86VPermiv3, + avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask @@ -2081,11 +2023,11 @@ defm : avx512_binop_pat; def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)), (KXNORWrr VK16:$src1, VK16:$src2)>; def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)), - (KXNORBrr VK8:$src1, VK8:$src2)>; + (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>; def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)), - (KXNORDrr VK32:$src1, VK32:$src2)>; + (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>; def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)), - (KXNORQrr VK64:$src1, VK64:$src2)>; + (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>; let Predicates = [NoDQI] in def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)), @@ -2182,7 +2124,7 @@ multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, let Predicates = [HasDQI] in defm D : avx512_mask_shiftop, VEX, TAPD; - } + } } defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>; @@ -2375,7 +2317,7 @@ multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, def rrkz_alt : AVX512PI, EVEX, EVEX_KZ; } @@ -2484,13 +2426,13 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src), VR512:$src)>; let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), +def: Pat<(X86mstore addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz + (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))), @@ -2562,13 +2504,13 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), } // NoVLX patterns let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), +def: Pat<(X86mstore addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz + (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; } @@ -3022,7 +2964,7 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, OpndItins itins, bit IsCommutable = 0> { defm rr : AVX512_maskable, @@ -3030,7 +2972,7 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, let mayLoad = 1 in defm rm : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, avx512_binop_rm { let mayLoad = 1 in defm rmb : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, bit IsCommutable = 0> { - defm Q : avx512_binop_rm_vl_q; - defm D : avx512_binop_rm_vl_d; } multiclass avx512_binop_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, bit IsCommutable = 0> { - defm W : avx512_binop_rm_vl_w; - defm B : avx512_binop_rm_vl_b; } @@ -3144,15 +3086,15 @@ multiclass avx512_binop_rm_vl_all opc_b, bits<8> opc_w, } multiclass avx512_binop_rm2 opc, string OpcodeStr, OpndItins itins, - SDNode OpNode,X86VectorVTInfo _Src, + SDNode OpNode,X86VectorVTInfo _Src, X86VectorVTInfo _Dst, bit IsCommutable = 0> { - defm rr : AVX512_maskable, + itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V; let mayLoad = 1 in { defm rm : AVX512_maskable opc, string OpcodeStr, OpndItins itins, AVX512BIBase, EVEX_4V; defm rmb : AVX512_maskable, AVX512BIBase, EVEX_4V, EVEX_B; @@ -3185,17 +3127,24 @@ defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, SSE_INTALU_ITINS_P, HasBWI, 0>; defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, - SSE_INTALU_ITINS_P, HasBWI, 1>; + SSE_INTALU_ITINS_P, HasBWI, 1>; defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, - SSE_INTALU_ITINS_P, HasBWI, 0>; -defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, + SSE_INTALU_ITINS_P, HasBWI, 0>; +defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; +defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, + SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; +defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P, + HasBWI, 1>; +defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P, + HasBWI, 1>; +defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P, + HasBWI, 1>, T8PD; +defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, - SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; - multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, SDNode OpNode, bit IsCommutable = 0> { @@ -3210,7 +3159,7 @@ multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, v4i32x_info, v2i64x_info, IsCommutable>, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; } -} +} defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, X86pmuldq, 1>,T8PD; @@ -3221,25 +3170,25 @@ multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _Src, X86VectorVTInfo _Dst> { let mayLoad = 1 in { defm rmb : AVX512_maskable, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>; } } -multiclass avx512_packs_rm opc, string OpcodeStr, - SDNode OpNode,X86VectorVTInfo _Src, +multiclass avx512_packs_rm opc, string OpcodeStr, + SDNode OpNode,X86VectorVTInfo _Src, X86VectorVTInfo _Dst> { - defm rr : AVX512_maskable, EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V; let mayLoad = 1 in { @@ -3280,126 +3229,59 @@ multiclass avx512_packs_all_i16_i8 opc, string OpcodeStr, v16i8x_info>, EVEX_V128; } } + +multiclass avx512_vpmadd opc, string OpcodeStr, + SDNode OpNode, AVX512VLVectorVTInfo _Src, + AVX512VLVectorVTInfo _Dst> { + defm NAME#Z : avx512_packs_rm, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_packs_rm, EVEX_V256; + defm NAME#Z128 : avx512_packs_rm, EVEX_V128; + } +} + let Predicates = [HasBWI] in { defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD; defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD; defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W; defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W; + + defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, + avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD; + defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, + avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase; } -defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax, +defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax, +defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax, +defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax, +defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax, +defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax, +defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin, +defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin, +defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin, +defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin, +defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin, +defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin, +defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; - -def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMAXSDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMAXUDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMAXSQZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMAXUQZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMINSDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), - (VPMINUDZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMINSQZrr VR512:$src1, VR512:$src2)>; -def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1), - (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMINUQZrr VR512:$src1, VR512:$src2)>; -//===----------------------------------------------------------------------===// -// AVX-512 - Unpack Instructions -//===----------------------------------------------------------------------===// - -multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, - PatFrag mem_frag, RegisterClass RC, - X86MemOperand x86memop, string asm, - Domain d> { - def rr : AVX512PI, EVEX_4V; - def rm : AVX512PI, EVEX_4V; -} - -defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, loadv8f64, - VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, loadv8f64, - VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, loadv8f64, - VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, loadv8f64, - VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop> { - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; -} -defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, - VR512, loadv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, - VR512, loadv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; -defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, - VR512, loadv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, - VR512, loadv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// @@ -3437,12 +3319,12 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, let isCodeGenOnly = 1, isCommutable = IsCommutable, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), - (ins _.FRC:$src1, _.FRC:$src2), + (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], itins.rr>; def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), - (ins _.FRC:$src1, _.ScalarMemOp:$src2), + (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))], itins.rr>; @@ -3450,7 +3332,7 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, } multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo _, - SDNode VecNode, OpndItins itins, bit IsCommutable> { + SDNode VecNode, OpndItins itins, bit IsCommutable = 0> { defm rrb : AVX512_maskable_scalar opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fp_round_packed opc, string OpcodeStr, SDNode OpNodeRnd, - X86VectorVTInfo _, bit IsCommutable> { + X86VectorVTInfo _> { defm rb: AVX512_maskable opc, string OpcodeStr, SDNode OpNodeRn multiclass avx512_fp_sae_packed opc, string OpcodeStr, SDNode OpNodeRnd, - X86VectorVTInfo _, bit IsCommutable> { + X86VectorVTInfo _> { defm rb: AVX512_maskable opc, string OpcodeStr, SDNode OpNodeRnd, EVEX_4V, EVEX_B; } -multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, bit IsCommutable = 0> { defm PSZ : avx512_fp_packed, EVEX_V512, PS, @@ -3572,16 +3454,16 @@ multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fp_binop_p_round opc, string OpcodeStr, SDNode OpNodeRnd> { - defm PSZ : avx512_fp_round_packed, + defm PSZ : avx512_fp_round_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_round_packed, + defm PDZ : avx512_fp_round_packed, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_binop_p_sae opc, string OpcodeStr, SDNode OpNodeRnd> { - defm PSZ : avx512_fp_sae_packed, + defm PSZ : avx512_fp_sae_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_sae_packed, + defm PDZ : avx512_fp_sae_packed, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } @@ -3589,7 +3471,7 @@ defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>, avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>; defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>, avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>; defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>; @@ -3604,6 +3486,69 @@ let Predicates = [HasDQI] in { defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>; } +multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr: AVX512_maskable, EVEX_4V; + let mayLoad = 1 in { + defm rm: AVX512_maskable, EVEX_4V; + defm rmb: AVX512_maskable, + EVEX_4V, EVEX_B; + }//let mayLoad = 1 +} + +multiclass avx512_fp_scalef_scalar opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr: AVX512_maskable_scalar; + let mayLoad = 1 in { + defm rm: AVX512_maskable_scalar; + }//let mayLoad = 1 +} + +multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> { + defm PSZ : avx512_fp_scalef_p, + avx512_fp_round_packed, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_scalef_p, + avx512_fp_round_packed, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm SSZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V,EVEX_CD8<32, CD8VT1>; + defm SDZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; + + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp_scalef_p, + EVEX_V128, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp_scalef_p, + EVEX_V256, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp_scalef_p, + EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp_scalef_p, + EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; + } +} +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD; + //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions //===----------------------------------------------------------------------===// @@ -3619,7 +3564,7 @@ multiclass avx512_vptest opc, string OpcodeStr, SDNode OpNode, defm rm : AVX512_maskable_cmp, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; @@ -3781,12 +3726,12 @@ multiclass avx512_shift_rmi_sizes opc, Format ImmFormR, Format ImmFormM, VTInfo.info256>, EVEX_V256; defm Z128: avx512_shift_rmi, - avx512_shift_rmbi, EVEX_V128; } } -multiclass avx512_shift_rmi_w opcw, +multiclass avx512_shift_rmi_w opcw, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in @@ -3839,7 +3784,8 @@ multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, defm rm : AVX512_maskable, AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; } @@ -3901,17 +3847,78 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>, defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>; +//===-------------------------------------------------------------------===// +// 1-src variable permutation VPERMW/D/Q +//===-------------------------------------------------------------------===// +multiclass avx512_vperm_dq_sizes opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in + defm Z : avx512_var_shift, + avx512_var_shift_mb, EVEX_V512; + + let Predicates = [HasAVX512, HasVLX] in + defm Z256 : avx512_var_shift, + avx512_var_shift_mb, EVEX_V256; +} + +multiclass avx512_vpermi_dq_sizes opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasAVX512] in + defm Z: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in + defm Z256: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V256; +} + + +defm VPERM : avx512_var_shift_w<0x8D, "vpermw", X86VPermv>; + +defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, + avx512vl_i32_info>; +defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, + avx512vl_i64_info>, VEX_W; +defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, + avx512vl_f32_info>; +defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, + avx512vl_f64_info>, VEX_W; + +defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", + X86VPermi, avx512vl_i64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; +defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", + X86VPermi, avx512vl_f64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; + //===----------------------------------------------------------------------===// // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW //===----------------------------------------------------------------------===// defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", - X86PShufd, avx512vl_i32_info>, + X86PShufd, avx512vl_i32_info>, EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W; defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W; + +multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode> { + let Predicates = [HasBWI] in + defm Z: avx512_var_shift, EVEX_V512; + + let Predicates = [HasVLX, HasBWI] in { + defm Z256: avx512_var_shift, EVEX_V256; + defm Z128: avx512_var_shift, EVEX_V128; + } +} + +defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>; + //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// @@ -3992,216 +3999,366 @@ let Predicates = [HasAVX512] in { // let Constraints = "$src1 = $dst" in { -// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3p_rm opc, string OpcodeStr, X86VectorVTInfo _, - SDPatternOperator OpNode = null_frag> { +multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { defm r: AVX512_maskable_3src, AVX512FMA3Base; - let mayLoad = 1 in - defm m: AVX512_maskable_3src, - AVX512FMA3Base; + AVX512FMA3Base; - defm mb: AVX512_maskable_3src, + _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>, AVX512FMA3Base, EVEX_B; - } -} // Constraints = "$src1 = $dst" + } +} -let Constraints = "$src1 = $dst" in { -// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3_round_rrb opc, string OpcodeStr, - X86VectorVTInfo _, - SDPatternOperator OpNode> { - defm rb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC; - } +} } // Constraints = "$src1 = $dst" -multiclass avx512_fma3_round_forms opc213, string OpcodeStr, - X86VectorVTInfo VTI, SDPatternOperator OpNode> { - defm v213r : avx512_fma3_round_rrb, EVEX_CD8; +multiclass avx512_fma3p_213_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_213_rm, + avx512_fma3_213_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_213_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_213_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; + } } -multiclass avx512_fma3p_forms opc213, bits<8> opc231, - string OpcodeStr, X86VectorVTInfo VTI, - SDPatternOperator OpNode> { - defm v213r : avx512_fma3p_rm, EVEX_CD8; - defm v231r : avx512_fma3p_rm, EVEX_CD8; +multiclass avx512_fma3p_213_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_213_common; + defm PD : avx512_fma3p_213_common, VEX_W; } -multiclass avx512_fma3p opc213, bits<8> opc231, - string OpcodeStr, - SDPatternOperator OpNode, - SDPatternOperator OpNodeRnd> { -let ExeDomain = SSEPackedSingle in { - defm NAME##PSZ : avx512_fma3p_forms, - avx512_fma3_round_forms, EVEX_V512; - defm NAME##PSZ256 : avx512_fma3p_forms, EVEX_V256; - defm NAME##PSZ128 : avx512_fma3p_forms, EVEX_V128; - } -let ExeDomain = SSEPackedDouble in { - defm NAME##PDZ : avx512_fma3p_forms, - avx512_fma3_round_forms, EVEX_V512, VEX_W; - defm NAME##PDZ256 : avx512_fma3p_forms, - EVEX_V256, VEX_W; - defm NAME##PDZ128 : avx512_fma3p_forms, - EVEX_V128, VEX_W; - } -} - -defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>; -defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>; -defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>; -defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>; -defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; -defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; +defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; +defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; + let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_m132 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; +multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_maskable_3src, + AVX512FMA3Base; + + let mayLoad = 1 in { + defm m: AVX512_maskable_3src, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B; + } +} + +multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B, EVEX_RC; } } // Constraints = "$src1 = $dst" -multiclass avx512_fma3p_m132_f opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_fma3p_231_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_231_rm, + avx512_fma3_231_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_231_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_231_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; + } +} -let ExeDomain = SSEPackedSingle in { - defm NAME##PSZ : avx512_fma3p_m132, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm NAME##PSZ256 : avx512_fma3p_m132, EVEX_V256, - EVEX_CD8<32, CD8VF>; - defm NAME##PSZ128 : avx512_fma3p_m132, EVEX_V128, - EVEX_CD8<32, CD8VF>; +multiclass avx512_fma3p_231_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_231_common; + defm PD : avx512_fma3p_231_common, VEX_W; +} + +defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; +defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; + +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_maskable_3src, + AVX512FMA3Base; + + let mayLoad = 1 in { + defm m: AVX512_maskable_3src, + AVX512FMA3Base; + + defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B; } -let ExeDomain = SSEPackedDouble in { - defm NAME##PDZ : avx512_fma3p_m132, EVEX_V512, - VEX_W, EVEX_CD8<32, CD8VF>; - defm NAME##PDZ256 : avx512_fma3p_m132, EVEX_V256, - VEX_W, EVEX_CD8<32, CD8VF>; - defm NAME##PDZ128 : avx512_fma3p_m132, EVEX_V128, - VEX_W, EVEX_CD8<32, CD8VF>; +} + +multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B, EVEX_RC; +} +} // Constraints = "$src1 = $dst" + +multiclass avx512_fma3p_132_common opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in { + defm Z : avx512_fma3p_132_rm, + avx512_fma3_132_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + } + let Predicates = [HasVLX, HasAVX512] in { + defm Z256 : avx512_fma3p_132_rm, + EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; + defm Z128 : avx512_fma3p_132_rm, + EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } -defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>; -defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>; -defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>; -defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>; -defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>; -defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>; +multiclass avx512_fma3p_132_f opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd > { + defm PS : avx512_fma3p_132_common; + defm PD : avx512_fma3p_132_common, VEX_W; +} + +defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; +defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; // Scalar FMA let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType OpVT, - X86MemOperand x86memop, Operand memop, - PatFrag mem_frag> { - let isCommutable = 1 in - def r : AVX512FMA3; +multiclass avx512_fma3s_common opc, string OpcodeStr, X86VectorVTInfo _, + dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb, + dag RHS_r, dag RHS_m > { + defm r_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base; + let mayLoad = 1 in - def m : AVX512FMA3, AVX512FMA3Base; + + defm rb_Int: AVX512_maskable_3src_scalar, + AVX512FMA3Base, EVEX_B, EVEX_RC; + + let isCodeGenOnly = 1 in { + def r : AVX512FMA3; + [RHS_r]>; + let mayLoad = 1 in + def m : AVX512FMA3; + }// isCodeGenOnly = 1 +} +}// Constraints = "$src1 = $dst" + +multiclass avx512_fma3s_all opc213, bits<8> opc231, bits<8> opc132, + string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ , + string SUFF> { + + defm NAME#213#SUFF: avx512_fma3s_common; + + defm NAME#231#SUFF: avx512_fma3s_common; + + defm NAME#132#SUFF: avx512_fma3s_common; +} + +multiclass avx512_fma3s opc213, bits<8> opc231, bits<8> opc132, + string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd>{ + let Predicates = [HasAVX512] in { + defm NAME : avx512_fma3s_all, + EVEX_CD8<32, CD8VT1>, VEX_LIG; + defm NAME : avx512_fma3s_all, + EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; + } } -} // Constraints = "$src1 = $dst" -defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>; +defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>; +defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from sign integer to float/double //===----------------------------------------------------------------------===// -multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, - X86MemOperand x86memop, string asm> { -let hasSideEffects = 0 in { - def rr : SI opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + let hasSideEffects = 0 in { + def rr : SI, EVEX_4V; - let mayLoad = 1 in - def rm : SI, EVEX_4V; -} // hasSideEffects = 0 + } // hasSideEffects = 0 + let isCodeGenOnly = 1 in { + def rr_Int : SI, EVEX_4V; + + def rm_Int : SI, EVEX_4V; + }//isCodeGenOnly = 1 +} + +multiclass avx512_vcvtsi_round opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, string asm> { + def rrb_Int : SI, EVEX_4V, EVEX_B, EVEX_RC; +} + +multiclass avx512_vcvtsi_common opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + defm NAME : avx512_vcvtsi_round, + avx512_vcvtsi, + VEX_LIG; } let Predicates = [HasAVX512] in { -defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">, - XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">, - XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">, - XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">, - XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, + v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">, + XS, EVEX_CD8<32, CD8VT1>; +defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, + v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">, + XS, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, + v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">, + XD, EVEX_CD8<32, CD8VT1>; +defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, + v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -4221,14 +4378,18 @@ def : Pat<(f64 (sint_to_fp GR32:$src)), def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; -defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">, - XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">, - XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">, +defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32, + v4f32x_info, i32mem, loadi32, + "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64, + v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, + XS, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info, + i32mem, loadi32, "cvtusi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">, - XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64, + v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -4309,18 +4470,9 @@ let isCodeGenOnly = 1 in { int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; - defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V; - defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}", SSE_CVT_Scalar, 0>, XD, EVEX_4V; - defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}", - SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; } // isCodeGenOnly = 1 // Convert float/double to signed/unsigned int 32/64 with truncation @@ -4436,117 +4588,389 @@ def : Pat<(extloadf32 addr:$src), def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, Requires<[HasAVX512]>; -multiclass avx512_vcvt_fp_with_rc opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT, - Domain d> { -let hasSideEffects = 0 in { - def rr : AVX512PI, EVEX; - def rrb : AVX512PI, EVEX, EVEX_B, EVEX_RC; - let mayLoad = 1 in - def rm : AVX512PI, EVEX; -} // hasSideEffects = 0 -} - -multiclass avx512_vcvt_fp opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT, - Domain d> { -let hasSideEffects = 0 in { - def rr : AVX512PI, EVEX; - let mayLoad = 1 in - def rm : AVX512PI, EVEX; -} // hasSideEffects = 0 -} - -defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround, - loadv8f64, f512mem, v8f32, v8f64, - SSEPackedSingle>, EVEX_V512, VEX_W, PD, - EVEX_CD8<64, CD8VF>; - -defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, - loadv4f64, f256mem, v8f64, v8f32, - SSEPackedDouble>, EVEX_V512, PS, - EVEX_CD8<32, CD8VH>; -def : Pat<(v8f64 (extloadv8f32 addr:$src)), - (VCVTPS2PDZrm addr:$src)>; - -def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src), - (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))), - (VCVTPD2PSZrr VR512:$src)>; - -def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src), - (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)), - (VCVTPD2PSZrrb VR512:$src, imm:$rc)>; - //===----------------------------------------------------------------------===// -// AVX-512 Vector convert from sign integer to float/double +// AVX-512 Vector convert from signed/unsigned integer to float/double +// and from float/double to signed/unsigned integer //===----------------------------------------------------------------------===// -defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp, - loadv8i64, i512mem, v16f32, v16i32, - SSEPackedSingle>, EVEX_V512, PS, - EVEX_CD8<32, CD8VF>; +multiclass avx512_vcvt_fp opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNode, + string Broadcast = _.BroadcastStr, + string Alias = ""> { -defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, - loadv4i64, i256mem, v8f64, v8i32, - SSEPackedDouble>, EVEX_V512, XS, - EVEX_CD8<32, CD8VH>; + defm rr : AVX512_maskable, EVEX; -defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, - loadv16f32, f512mem, v16i32, v16f32, - SSEPackedSingle>, EVEX_V512, XS, - EVEX_CD8<32, CD8VF>; + defm rm : AVX512_maskable, EVEX; -defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, - loadv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, PD, VEX_W, + defm rmb : AVX512_maskable, EVEX, EVEX_B; +} +// Coversion with SAE - suppress all exceptions +multiclass avx512_vcvt_fp_sae opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNodeRnd> { + defm rrb : AVX512_maskable, + EVEX, EVEX_B; +} + +// Conversion with rounding control (RC) +multiclass avx512_vcvt_fp_rc opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNodeRnd> { + defm rrb : AVX512_maskable, + EVEX, EVEX_B, EVEX_RC; +} + +// Extend Float to Double +multiclass avx512_cvtps2pd opc, string OpcodeStr> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_sae, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Truncate Double to Float +multiclass avx512_cvtpd2ps opc, string OpcodeStr> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; + } +} + +defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">, + VEX_W, PD, EVEX_CD8<64, CD8VF>; +defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">, + PS, EVEX_CD8<32, CD8VH>; + +def : Pat<(v8f64 (extloadv8f32 addr:$src)), + (VCVTPS2PDZrm addr:$src)>; + +let Predicates = [HasVLX] in { + def : Pat<(v4f64 (extloadv4f32 addr:$src)), + (VCVTPS2PDZ256rm addr:$src)>; +} + +// Convert Signed/Unsigned Doubleword to Double +multiclass avx512_cvtdq2pd opc, string OpcodeStr, SDNode OpNode, + SDNode OpNode128> { + // No rounding in this op + let Predicates = [HasAVX512] in + defm Z : avx512_vcvt_fp, + EVEX_V512; + + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Signed/Unsigned Doubleword to Float +multiclass avx512_cvtdq2ps opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Doubleword with truncation +multiclass avx512_cvttps2dq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_sae, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Doubleword +multiclass avx512_cvtps2dq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Doubleword with truncation +multiclass avx512_cvttpd2dq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_sae, EVEX_V512; + } + let Predicates = [HasVLX] in { + // we need "x"/"y" suffixes in order to distinguish between 128 and 256 + // memory forms of these instructions in Asm Parcer. They have the same + // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly + // due to the same reason. + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Doubleword +multiclass avx512_cvtpd2dq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasVLX] in { + // we need "x"/"y" suffixes in order to distinguish between 128 and 256 + // memory forms of these instructions in Asm Parcer. They have the same + // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly + // due to the same reason. + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Quardword +multiclass avx512_cvtpd2qq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Quardword with truncation +multiclass avx512_cvttpd2qq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_sae, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Signed/Unsigned Quardword to Double +multiclass avx512_cvtqq2pd opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + defm Z128 : avx512_vcvt_fp, + EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Quardword +multiclass avx512_cvtps2qq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + // Explicitly specified broadcast string, since we take only 2 elements + // from v4f32x_info source + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Quardword with truncation +multiclass avx512_cvttps2qq opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_sae, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + // Explicitly specified broadcast string, since we take only 2 elements + // from v4f32x_info source + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, + EVEX_V256; + } +} + +// Convert Signed/Unsigned Quardword to Float +multiclass avx512_cvtqq2ps opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp, + avx512_vcvt_fp_rc, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + // we need "x"/"y" suffixes in order to distinguish between 128 and 256 + // memory forms of these instructions in Asm Parcer. They have the same + // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly + // due to the same reason. + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; + } +} + +defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86cvtdq2pd>, XS, + EVEX_CD8<32, CD8VH>; + +defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, + X86VSintToFpRnd>, + PS, EVEX_CD8<32, CD8VF>; + +defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint, + X86VFpToSintRnd>, + XS, EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, + X86VFpToSintRnd>, + PD, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint, + X86VFpToUintRnd>, PS, + EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, + X86VFpToUintRnd>, PS, VEX_W, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, - loadv16f32, f512mem, v16i32, v16f32, - SSEPackedSingle>, EVEX_V512, PS, +defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>, + XS, EVEX_CD8<32, CD8VH>; + +defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp, + X86VUintToFpRnd>, XD, EVEX_CD8<32, CD8VF>; -// cvttps2udq (src, 0, mask-all-ones, sae-current) -def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)), - (VCVTTPS2UDQZrr VR512:$src)>; +defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtps2Int, + X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, - loadv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, PS, VEX_W, +defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtpd2Int, + X86cvtpd2IntRnd>, XD, VEX_W, EVEX_CD8<64, CD8VF>; -// cvttpd2udq (src, 0, mask-all-ones, sae-current) -def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src), - (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)), - (VCVTTPD2UDQZrr VR512:$src)>; +defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtps2UInt, + X86cvtps2UIntRnd>, + PS, EVEX_CD8<32, CD8VF>; +defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtpd2UInt, + X86cvtpd2UIntRnd>, VEX_W, + PS, EVEX_CD8<64, CD8VF>; -defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, - loadv4i64, f256mem, v8f64, v8i32, - SSEPackedDouble>, EVEX_V512, XS, - EVEX_CD8<32, CD8VH>; +defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtpd2Int, + X86cvtpd2IntRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; -defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, - loadv16i32, f512mem, v16f32, v16i32, - SSEPackedSingle>, EVEX_V512, XD, - EVEX_CD8<32, CD8VF>; +defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtps2Int, + X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtpd2UInt, + X86cvtpd2UIntRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; + +defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtps2UInt, + X86cvtps2UIntRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint, + X86VFpToSlongRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, + X86VFpToSlongRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint, + X86VFpToUlongRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, + X86VFpToUlongRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, + X86VSlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>; + +defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, + X86VUlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>; +defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, + X86VSlongToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>; + +defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, + X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>; + +let Predicates = [NoVLX] in { def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; @@ -4566,67 +4990,8 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>; - -def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)), - (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>; -def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VCVTDQ2PDZrr VR256X:$src)>; -def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)), - (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>; -def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VCVTUDQ2PDZrr VR256X:$src)>; - -multiclass avx512_vcvt_fp2int opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, PatFrag mem_frag, - X86MemOperand x86memop, Domain d> { -let hasSideEffects = 0 in { - def rr : AVX512PI, EVEX; - def rrb : AVX512PI, EVEX, EVEX_B, EVEX_RC; - let mayLoad = 1 in - def rm : AVX512PI, EVEX; -} // hasSideEffects = 0 } -defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512, - loadv16f32, f512mem, SSEPackedSingle>, PD, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X, - loadv8f64, f512mem, SSEPackedDouble>, XD, VEX_W, - EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1), imm:$rc)), - (VCVTPS2DQZrrb VR512:$src, imm:$rc)>; - -def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src), - (v8i32 immAllZerosV), (i8 -1), imm:$rc)), - (VCVTPD2DQZrrb VR512:$src, imm:$rc)>; - -defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512, - loadv16f32, f512mem, SSEPackedSingle>, - PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X, - loadv8f64, f512mem, SSEPackedDouble>, VEX_W, - PS, EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1), imm:$rc)), - (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>; - -def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src), - (v8i32 immAllZerosV), (i8 -1), imm:$rc)), - (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>; - let Predicates = [HasAVX512] in { def : Pat<(v8f32 (fround (loadv8f64 addr:$src))), (VCVTPD2PSZrm addr:$src)>; @@ -4845,6 +5210,8 @@ let hasSideEffects = 0, Predicates = [HasERI] in { defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V; defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V; } + +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, @@ -4854,11 +5221,6 @@ multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, (ins _.RC:$src), OpcodeStr, "$src", "$src", (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>; - defm rb : AVX512_maskable, EVEX_B; - defm m : AVX512_maskable opc, string OpcodeStr, X86VectorVTInfo _, (i32 FROUND_CURRENT))>; defm mb : AVX512_maskable, EVEX_B; } +multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, + SDNode OpNode> { + defm rb : AVX512_maskable, EVEX_B; +} multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode> { defm PS : avx512_fp28_p, - EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round, + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PD : avx512_fp28_p, - VEX_W, EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round, + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } +multiclass avx512_fp_unaryop_packed opc, string OpcodeStr, + SDNode OpNode> { + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp28_p, + EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp28_p, + EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp28_p, + EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp28_p, + EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + } +} let Predicates = [HasERI], hasSideEffects = 0 in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX; +} +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>, + avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX; + +multiclass avx512_sqrt_packed_round opc, string OpcodeStr, + SDNode OpNodeRnd, X86VectorVTInfo _>{ + defm rb: AVX512_maskable, + EVEX, EVEX_B, EVEX_RC; } multiclass avx512_sqrt_packed opc, string OpcodeStr, @@ -4992,20 +5388,22 @@ multiclass avx512_sqrt_packed_all opc, string OpcodeStr, } } -defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>; +multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr, + SDNode OpNodeRnd> { + defm PSZ : avx512_sqrt_packed_round, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_sqrt_packed_round, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; +} + +defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, + avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>; defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, SSE_SQRTSS, SSE_SQRTSD>; let Predicates = [HasAVX512] in { - def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), - (VSQRTPSZr VR512:$src1)>; - def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), - (VSQRTPDZr VR512:$src1)>; - def : Pat<(f32 (fsqrt FR32X:$src)), (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -5044,47 +5442,6 @@ let Predicates = [HasAVX512] in { (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; } - -multiclass avx512_rndscale opc, string OpcodeStr, - X86MemOperand x86memop, RegisterClass RC, - PatFrag mem_frag, Domain d> { -let ExeDomain = d in { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def r : AVX512AIi8, EVEX; - - // Vector intrinsic operation, mem - def m : AVX512AIi8, EVEX; -} // ExeDomain -} - -defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512, - loadv16f32, SSEPackedSingle>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - -def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1), - imm:$src2, (v16f32 VR512:$src1), (i16 -1), - FROUND_CURRENT)), - (VRNDSCALEPSZr VR512:$src1, imm:$src2)>; - - -defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512, - loadv8f64, SSEPackedDouble>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; - -def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1), - imm:$src2, (v8f64 VR512:$src1), (i8 -1), - FROUND_CURRENT)), - (VRNDSCALEPDZr VR512:$src1, imm:$src2)>; - multiclass avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { @@ -5092,20 +5449,20 @@ avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { defm r : AVX512_maskable_scalar; defm rb : AVX512_maskable_scalar, EVEX_B; let mayLoad = 1 in defm m : AVX512_maskable_scalar; } @@ -5150,109 +5507,221 @@ defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>, defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>; -let Predicates = [HasAVX512] in { -def : Pat<(v16f32 (ffloor VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x1))>; -def : Pat<(v16f32 (fnearbyint VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0xC))>; -def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x2))>; -def : Pat<(v16f32 (frint VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x4))>; -def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x3))>; - -def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x1))>; -def : Pat<(v8f64 (fnearbyint VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0xC))>; -def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x2))>; -def : Pat<(v8f64 (frint VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x4))>; -def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x3))>; -} //------------------------------------------------- // Integer truncate and extend operations //------------------------------------------------- -multiclass avx512_trunc_sat opc, string OpcodeStr, - RegisterClass dstRC, RegisterClass srcRC, - RegisterClass KRC, X86MemOperand x86memop> { - def rr : AVX512XS8I opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo, + X86MemOperand x86memop> { + + defm rr : AVX512_maskable, + EVEX, T8XS; + + // for intrinsic patter match + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + undef)), + (!cast(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + DestInfo.ImmAllZerosV)), + (!cast(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + DestInfo.RC:$src0)), + (!cast(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0, + DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + let mayStore = 1 in { + def mr : AVX512XS8I, EVEX; - def rrk : AVX512XS8I, EVEX, EVEX_K; + }//mayStore = 1 +} - def rrkz : AVX512XS8I, EVEX, EVEX_KZ; +multiclass avx512_trunc_mr_lowering { - def mr : AVX512XS8I, EVEX; + def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), + (!cast(NAME#SrcInfo.ZSuffix##mr) + addr:$dst, SrcInfo.RC:$src)>; - def mrk : AVX512XS8I, EVEX, EVEX_K; + def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask, + (SrcInfo.VT SrcInfo.RC:$src)), + (!cast(NAME#SrcInfo.ZSuffix##mrk) + addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; +} + +multiclass avx512_trunc_sat_mr_lowering { + def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# + DestInfo.Suffix#"_mem_"#SrcInfo.Size) + addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask), + (!cast(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr, + (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM), + (SrcInfo.VT SrcInfo.RC:$src))>; + + def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# + DestInfo.Suffix#"_mem_"#SrcInfo.Size) + addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1), + (!cast(NAME#SrcInfo.ZSuffix##mr) addr:$ptr, + (SrcInfo.VT SrcInfo.RC:$src))>; } -defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; - -def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; -def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; -def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; -def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; -def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; - -def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>; -def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>; -def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>; -def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>; +multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, + X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, + X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, + X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag, + Predicate prd = HasAVX512>{ + + let Predicates = [HasVLX, prd] in { + defm Z128: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V128; + + defm Z256: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V256; + } + let Predicates = [prd] in + defm Z: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V512; +} + +multiclass avx512_trunc_sat opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, + X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, + X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, + X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{ + + let Predicates = [HasVLX, prd] in { + defm Z128: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V128; + + defm Z256: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V256; + } + let Predicates = [prd] in + defm Z: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V512; +} + +multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; +} +multiclass avx512_trunc_sat_qb opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VO>; +} + +multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; +} +multiclass avx512_trunc_sat_qw opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VQ>; +} + +multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; +} +multiclass avx512_trunc_sat_qd opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<32, CD8VH>; +} + +multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; +} +multiclass avx512_trunc_sat_db opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VQ>; +} + +multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; +} +multiclass avx512_trunc_sat_dw opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; +} + +multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; +} +multiclass avx512_trunc_sat_wb opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; +} + +defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>; +defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>; +defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>; + +defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>; +defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>; +defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>; + +defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>; +defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>; +defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>; + +defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>; +defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>; +defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>; + +defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>; +defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>; +defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>; + +defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>; +defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>; +defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>; multiclass avx512_extend_common opc, string OpcodeStr, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, @@ -5400,10 +5869,11 @@ defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">; multiclass avx512_gather opc, string OpcodeStr, X86VectorVTInfo _, X86MemOperand memop, PatFrag GatherNode> { - let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in + let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", + ExeDomain = _.ExeDomain in def rm : AVX5128I opc, string OpcodeStr, X86VectorVTInfo _, EVEX_CD8<_.EltSize, CD8VT1>; } -let ExeDomain = SSEPackedDouble in { -defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem, - mgatherv8i32>, EVEX_V512, VEX_W; -defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem, - mgatherv8i64>, EVEX_V512, VEX_W; +multiclass avx512_gather_q_pd dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_gather, EVEX_V512, VEX_W; + defm NAME##Q##SUFF##Z: avx512_gather, EVEX_V512, VEX_W; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_gather, EVEX_V256, VEX_W; + defm NAME##Q##SUFF##Z256: avx512_gather, EVEX_V256, VEX_W; + defm NAME##D##SUFF##Z128: avx512_gather, EVEX_V128, VEX_W; + defm NAME##Q##SUFF##Z128: avx512_gather, EVEX_V128, VEX_W; +} +} + +multiclass avx512_gather_d_ps dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_gather, EVEX_V512; + defm NAME##Q##SUFF##Z: avx512_gather, EVEX_V512; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_gather, EVEX_V256; + defm NAME##Q##SUFF##Z256: avx512_gather, EVEX_V256; + defm NAME##D##SUFF##Z128: avx512_gather, EVEX_V128; + defm NAME##Q##SUFF##Z128: avx512_gather, EVEX_V128; } - -let ExeDomain = SSEPackedSingle in { -defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem, - mgatherv16i32>, EVEX_V512; -defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem, - mgatherv8i64>, EVEX_V512; } -defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem, - mgatherv8i32>, EVEX_V512, VEX_W; -defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem, - mgatherv16i32>, EVEX_V512; -defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem, - mgatherv8i64>, EVEX_V512, VEX_W; -defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem, - mgatherv8i64>, EVEX_V512; +defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, + avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; + +defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, + avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; multiclass avx512_scatter opc, string OpcodeStr, X86VectorVTInfo _, X86MemOperand memop, PatFrag ScatterNode> { -let mayStore = 1, Constraints = "$mask = $mask_wb" in +let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in def mr : AVX5128I, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; } -let ExeDomain = SSEPackedDouble in { -defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem, - mscatterv8i32>, EVEX_V512, VEX_W; -defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem, - mscatterv8i64>, EVEX_V512, VEX_W; +multiclass avx512_scatter_q_pd dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_scatter, EVEX_V512, VEX_W; + defm NAME##Q##SUFF##Z: avx512_scatter, EVEX_V512, VEX_W; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_scatter, EVEX_V256, VEX_W; + defm NAME##Q##SUFF##Z256: avx512_scatter, EVEX_V256, VEX_W; + defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128, VEX_W; + defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128, VEX_W; +} +} + +multiclass avx512_scatter_d_ps dopc, bits<8> qopc, + AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { + defm NAME##D##SUFF##Z: avx512_scatter, EVEX_V512; + defm NAME##Q##SUFF##Z: avx512_scatter, EVEX_V512; +let Predicates = [HasVLX] in { + defm NAME##D##SUFF##Z256: avx512_scatter, EVEX_V256; + defm NAME##Q##SUFF##Z256: avx512_scatter, EVEX_V256; + defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128; + defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128; } - -let ExeDomain = SSEPackedSingle in { -defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem, - mscatterv16i32>, EVEX_V512; -defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem, - mscatterv8i64>, EVEX_V512; } -defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem, - mscatterv8i32>, EVEX_V512, VEX_W; -defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem, - mscatterv16i32>, EVEX_V512; +defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, + avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; -defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem, - mscatterv8i64>, EVEX_V512, VEX_W; -defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem, - mscatterv8i64>, EVEX_V512; +defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, + avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; // prefetch multiclass avx512_gather_scatter_prefetch opc, Format F, string OpcodeStr, @@ -5568,105 +6075,10 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, (loadv8i64 addr:$src2), (i8 imm:$imm))), (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; -multiclass avx512_valign { - defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), - "valign"##_.Suffix, - "$src3, $src2, $src1", "$src1, $src2, $src3", - (_.VT (X86VAlign _.RC:$src2, _.RC:$src1, - (i8 imm:$src3)))>, - AVX512AIi8Base, EVEX_4V; - - // Also match valign of packed floats. - def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))), - (!cast(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>; - - let mayLoad = 1 in - def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst), - (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), - !strconcat("valign"##_.Suffix, - "\t{$src3, $src2, $src1, $dst|" - "$dst, $src1, $src2, $src3}"), - []>, EVEX_4V; -} -defm VALIGND : avx512_valign, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VALIGNQ : avx512_valign, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - // Helper fragments to match sext vXi1 to vXiY. def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; -multiclass avx512_vpabs opc, string OpcodeStr, ValueType OpVT, - RegisterClass KRC, RegisterClass RC, - X86MemOperand x86memop, X86MemOperand x86scalar_mop, - string BrdcstStr> { - def rr : AVX5128I, EVEX; - def rrk : AVX5128I, EVEX, EVEX_K; - def rrkz : AVX5128I, EVEX, EVEX_KZ; - let mayLoad = 1 in { - def rm : AVX5128I, EVEX; - def rmk : AVX5128I, EVEX, EVEX_K; - def rmkz : AVX5128I, EVEX, EVEX_KZ; - def rmb : AVX5128I, EVEX, EVEX_B; - def rmbk : AVX5128I, EVEX, EVEX_B, EVEX_K; - def rmbkz : AVX5128I, EVEX, EVEX_B, EVEX_KZ; - } -} - -defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512, - i512mem, i32mem, "{1to16}">, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512, - i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -def : Pat<(xor - (bc_v16i32 (v16i1sextv16i32)), - (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), - (VPABSDZrr VR512:$src)>; -def : Pat<(xor - (bc_v8i64 (v8i1sextv8i64)), - (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), - (VPABSQZrr VR512:$src)>; - -def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1))), - (VPABSDZrr VR512:$src)>; -def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src), - (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPABSQZrr VR512:$src)>; - multiclass avx512_conflict opc, string OpcodeStr, RegisterClass RC, RegisterClass KRC, X86MemOperand x86memop, @@ -5865,26 +6277,24 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", //===----------------------------------------------------------------------===// // AVX-512 - COMPRESS and EXPAND // + multiclass compress_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { - def rrkz : AVX5128I, EVEX_KZ; - - let Constraints = "$src0 = $dst" in - def rrk : AVX5128I, EVEX_K; + defm rr : AVX512_maskable, AVX5128IBase; let mayStore = 1 in { + def mr : AVX5128I, EVEX_CD8<_.EltSize, CD8VT1>; + def mrk : AVX5128I, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; } @@ -5912,37 +6322,16 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info // expand multiclass expand_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { - def rrkz : AVX5128I, EVEX_KZ; - - let Constraints = "$src0 = $dst" in - def rrk : AVX5128I, EVEX_K; - - let mayLoad = 1, Constraints = "$src0 = $dst" in - def rmk : AVX5128I, - EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; + defm rr : AVX512_maskable, AVX5128IBase; let mayLoad = 1 in - def rmkz : AVX5128I, - EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>; + defm rm : AVX512_maskable, + AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>; } multiclass expand_by_elt_width opc, string OpcodeStr, @@ -5964,6 +6353,62 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>, defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, EVEX, VEX_W; +//handle instruction reg_vec1 = op(reg_vec,imm) +// op(mem_vec,imm) +// op(broadcast(eltVt),imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable; + let mayLoad = 1 in { + defm rmi : AVX512_maskable; + defm rmbi : AVX512_maskable, EVEX_B; + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ + defm rrib : AVX512_maskable, EVEX_B; +} + +multiclass avx512_common_unary_fp_sae_packed_imm opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z : avx512_unary_fp_packed_imm, + avx512_unary_fp_sae_packed_imm, + EVEX_V512; + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_unary_fp_packed_imm, + EVEX_V128; + defm Z256 : avx512_unary_fp_packed_imm, + EVEX_V256; + } +} + //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) // op(reg_vec2,broadcast(eltVt),imm) @@ -5971,58 +6416,157 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm rri : AVX512_maskable; let mayLoad = 1 in { defm rmi : AVX512_maskable; defm rmbi : AVX512_maskable, EVEX_B; } } +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +// op(reg_vec2,broadcast(eltVt),imm) +multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable; + let mayLoad = 1 in { + defm rmi : AVX512_maskable; + defm rmbi : AVX512_maskable, EVEX_B; + } +} + +//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_scalar,imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + + defm rri : AVX512_maskable_scalar; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_scalar; + + let isAsmParserOnly = 1 in { + defm rmi_alt :AVX512_maskable_in_asm; + } + } +} + //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm rrib : AVX512_maskable, EVEX_B; +} +//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _> { + defm NAME#rrib : AVX512_maskable_scalar, EVEX_B; } multiclass avx512_common_fp_sae_packed_imm opc, SDNode OpNode, Predicate prd>{ - let Predicates = [prd] in { - defm Z : avx512_fp_packed_imm, - avx512_fp_sae_packed_imm, + let Predicates = [prd] in { + defm Z : avx512_fp_packed_imm, + avx512_fp_sae_packed_imm, EVEX_V512; - } - let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_fp_packed_imm, + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_fp_packed_imm, EVEX_V128; - defm Z256 : avx512_fp_packed_imm, + defm Z256 : avx512_fp_packed_imm, EVEX_V256; - } + } +} + +multiclass avx512_common_3Op_imm8 opc, SDNode OpNode>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8, EVEX_V512; + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_3Op_imm8, EVEX_V128; + defm Z256 : avx512_3Op_imm8, EVEX_V256; + } +} + +multiclass avx512_common_fp_sae_scalar_imm opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z128 : avx512_fp_scalar_imm, + avx512_fp_sae_scalar_imm; + } +} + +multiclass avx512_common_fp_sae_packed_imm_all opcPs, + bits<8> opcPd, SDNode OpNode, Predicate prd>{ + defm PS : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<32, CD8VF>; + defm PD : avx512_common_unary_fp_sae_packed_imm,EVEX_CD8<64, CD8VF> , VEX_W; } defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd", @@ -6032,6 +6576,15 @@ defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps", avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info, + 0x55, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info, + 0x55, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +defm VREDUCE : avx512_common_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, X86VReduce, HasDQI>,AVX512AIi8Base,EVEX; +defm VRNDSCALE : avx512_common_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, X86VRndScale, HasAVX512>,AVX512AIi8Base, EVEX; defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 0x50, X86VRange, HasDQI>, @@ -6040,17 +6593,186 @@ defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 0x50, X86VRange, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info, + 0x51, X86VRange, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, + 0x51, X86VRange, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, + 0x57, X86Reduces, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, + 0x57, X86Reduces, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +multiclass avx512_shuff_packed_128 opc, SDNode OpNode = X86Shuf128>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8, EVEX_V512; + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_3Op_imm8, EVEX_V256; + } +} +let Predicates = [HasAVX512] in { +def : Pat<(v16f32 (ffloor VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>; +def : Pat<(v16f32 (fnearbyint VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>; +def : Pat<(v16f32 (fceil VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>; +def : Pat<(v16f32 (frint VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>; +def : Pat<(v16f32 (ftrunc VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>; +def : Pat<(v8f64 (ffloor VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>; +def : Pat<(v8f64 (fnearbyint VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>; +def : Pat<(v8f64 (fceil VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>; +def : Pat<(v8f64 (frint VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>; +def : Pat<(v8f64 (ftrunc VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>; +} +defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +multiclass avx512_valign{ + defm NAME: avx512_common_3Op_imm8, + AVX512AIi8Base, EVEX_4V; + let isCodeGenOnly = 1 in { + defm NAME#_FP: avx512_common_3Op_imm8, + AVX512AIi8Base, EVEX_4V; + } +} + +defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>, + EVEX_CD8<32, CD8VF>; +defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>, + EVEX_CD8<64, CD8VF>, VEX_W; + +multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr : AVX512_maskable, EVEX, AVX5128IBase; + + let mayLoad = 1 in + defm rm : AVX512_maskable, + EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>; +} + +multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> : + avx512_unary_rm { + let mayLoad = 1 in + defm rmb : AVX512_maskable, + EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; +} +multiclass avx512_unary_rm_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_unary_rm, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_unary_rm, + EVEX_V256; + defm Z128 : avx512_unary_rm, + EVEX_V128; + } +} +multiclass avx512_unary_rmb_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_unary_rmb, + EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_unary_rmb, + EVEX_V256; + defm Z128 : avx512_unary_rmb, + EVEX_V128; + } +} +multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, + SDNode OpNode, Predicate prd> { + defm Q : avx512_unary_rmb_vl, VEX_W; + defm D : avx512_unary_rmb_vl; +} +multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, + SDNode OpNode, Predicate prd> { + defm W : avx512_unary_rm_vl; + defm B : avx512_unary_rm_vl; +} +multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, + bits<8> opc_d, bits<8> opc_q, + string OpcodeStr, SDNode OpNode> { + defm NAME : avx512_unary_rm_vl_dq, + avx512_unary_rm_vl_bw; +} +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>; +def : Pat<(xor + (bc_v16i32 (v16i1sextv16i32)), + (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), + (VPABSDZrr VR512:$src)>; +def : Pat<(xor + (bc_v8i64 (v8i1sextv8i64)), + (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), + (VPABSQZrr VR512:$src)>; +//===----------------------------------------------------------------------===// +// AVX-512 - Unpack Instructions +//===----------------------------------------------------------------------===// +defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh>; +defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl>; + +defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, + SSE_INTALU_ITINS_P, HasBWI>; + +defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, + SSE_INTALU_ITINS_P, HasAVX512>;