X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrAVX512.td;h=8d66627c9271aef85ab5f2282ad4c685a6eb8461;hb=da135386a48b02596be55f9207caf144032a3d3b;hp=58334a86b6655f7b8ec3927049ebdddef02199d1;hpb=7d40c4215d7439422a18e23790ba56546b483e8e;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 58334a86b66..8d66627c927 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -79,7 +79,7 @@ class X86VectorVTInfo opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), [(set DstRC:$dst, (OpVT (vselect KRC:$mask, - (X86VBroadcast (ld_frag addr:$src)), + (X86VBroadcast (ld_frag addr:$src)), (OpVT (bitconvert (v16i32 immAllZerosV))))))]>, EVEX, EVEX_KZ; } } @@ -884,8 +884,8 @@ multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, let mayLoad = 1 in { def rm : AVX5128I, EVEX; def rmk : AVX5128I opc, string OpcodeStr, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (OpNode _.RC:$src1, - _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>, + _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>, AVX5128IBase, EVEX_4V, EVEX_B; } multiclass avx512_perm_3src_sizes opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasAVX512] in - defm NAME: avx512_perm_3src, + defm NAME: avx512_perm_3src, avx512_perm_3src_mb, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_3src, + defm NAME#128: avx512_perm_3src, avx512_perm_3src_mb, EVEX_V128; - defm NAME#256: avx512_perm_3src, + defm NAME#256: avx512_perm_3src, avx512_perm_3src_mb, EVEX_V256; } } -multiclass avx512_perm_3src_sizes_w opc, string OpcodeStr, +multiclass avx512_perm_3src_sizes_w opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasBWI] in - defm NAME: avx512_perm_3src, + defm NAME: avx512_perm_3src, avx512_perm_3src_mb, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { - defm NAME#128: avx512_perm_3src, + defm NAME#128: avx512_perm_3src, avx512_perm_3src_mb, EVEX_V128; - defm NAME#256: avx512_perm_3src, + defm NAME#256: avx512_perm_3src, avx512_perm_3src_mb, EVEX_V256; } @@ -2124,7 +2124,7 @@ multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, let Predicates = [HasDQI] in defm D : avx512_mask_shiftop, VEX, TAPD; - } + } } defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>; @@ -2317,7 +2317,7 @@ multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, def rrkz_alt : AVX512PI, EVEX, EVEX_KZ; } @@ -2426,13 +2426,13 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src), VR512:$src)>; let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), +def: Pat<(X86mstore addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz + (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))), @@ -2504,13 +2504,13 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), } // NoVLX patterns let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), +def: Pat<(X86mstore addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz + (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; } @@ -2964,7 +2964,7 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, OpndItins itins, bit IsCommutable = 0> { defm rr : AVX512_maskable, @@ -2972,7 +2972,7 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, let mayLoad = 1 in defm rm : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, avx512_binop_rm { let mayLoad = 1 in defm rmb : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, bit IsCommutable = 0> { - defm Q : avx512_binop_rm_vl_q; - defm D : avx512_binop_rm_vl_d; } multiclass avx512_binop_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, bit IsCommutable = 0> { - defm W : avx512_binop_rm_vl_w; - defm B : avx512_binop_rm_vl_b; } @@ -3086,15 +3086,15 @@ multiclass avx512_binop_rm_vl_all opc_b, bits<8> opc_w, } multiclass avx512_binop_rm2 opc, string OpcodeStr, OpndItins itins, - SDNode OpNode,X86VectorVTInfo _Src, + SDNode OpNode,X86VectorVTInfo _Src, X86VectorVTInfo _Dst, bit IsCommutable = 0> { - defm rr : AVX512_maskable, + itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V; let mayLoad = 1 in { defm rm : AVX512_maskable opc, string OpcodeStr, OpndItins itins, AVX512BIBase, EVEX_4V; defm rmb : AVX512_maskable, AVX512BIBase, EVEX_4V, EVEX_B; @@ -3127,24 +3127,24 @@ defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, SSE_INTALU_ITINS_P, HasBWI, 0>; defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, - SSE_INTALU_ITINS_P, HasBWI, 1>; + SSE_INTALU_ITINS_P, HasBWI, 1>; defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, - SSE_INTALU_ITINS_P, HasBWI, 0>; -defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, - SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, - SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulh", mulhs, SSE_INTALU_ITINS_P, - HasBWI, 1>; -defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhu", mulhu, SSE_INTMUL_ITINS_P, + SSE_INTALU_ITINS_P, HasBWI, 0>; +defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; +defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, + SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; +defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrs", X86mulhrs, SSE_INTMUL_ITINS_P, - HasBWI, 1>, T8PD; +defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P, + HasBWI, 1>; +defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P, + HasBWI, 1>, T8PD; defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, - SSE_INTALU_ITINS_P, HasBWI, 1>; - + SSE_INTALU_ITINS_P, HasBWI, 1>; + multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, SDNode OpNode, bit IsCommutable = 0> { @@ -3159,7 +3159,7 @@ multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, v4i32x_info, v2i64x_info, IsCommutable>, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; } -} +} defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, X86pmuldq, 1>,T8PD; @@ -3170,25 +3170,25 @@ multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _Src, X86VectorVTInfo _Dst> { let mayLoad = 1 in { defm rmb : AVX512_maskable, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>; } } -multiclass avx512_packs_rm opc, string OpcodeStr, - SDNode OpNode,X86VectorVTInfo _Src, +multiclass avx512_packs_rm opc, string OpcodeStr, + SDNode OpNode,X86VectorVTInfo _Src, X86VectorVTInfo _Dst> { - defm rr : AVX512_maskable, EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V; let mayLoad = 1 in { @@ -3255,95 +3255,33 @@ let Predicates = [HasBWI] in { avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase; } -defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax, +defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", smax, +defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, SSE_INTALU_ITINS_P, HasBWI, 1>; defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", umax, +defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", umax, +defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", smin, +defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", smin, +defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, SSE_INTALU_ITINS_P, HasBWI, 1>; defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", umin, +defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", umin, +defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; - -//===----------------------------------------------------------------------===// -// AVX-512 - Unpack Instructions -//===----------------------------------------------------------------------===// - -multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, - PatFrag mem_frag, RegisterClass RC, - X86MemOperand x86memop, string asm, - Domain d> { - def rr : AVX512PI, EVEX_4V; - def rm : AVX512PI, EVEX_4V; -} - -defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, loadv8f64, - VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, loadv8f64, - VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, loadv8f64, - VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, loadv8f64, - VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop> { - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; -} -defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, - VR512, loadv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, - VR512, loadv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; -defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, - VR512, loadv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, - VR512, loadv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// @@ -3381,12 +3319,12 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, let isCodeGenOnly = 1, isCommutable = IsCommutable, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), - (ins _.FRC:$src1, _.FRC:$src2), + (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], itins.rr>; def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), - (ins _.FRC:$src1, _.ScalarMemOp:$src2), + (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))], itins.rr>; @@ -3394,7 +3332,7 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, } multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo _, - SDNode VecNode, OpndItins itins, bit IsCommutable> { + SDNode VecNode, OpndItins itins, bit IsCommutable = 0> { defm rrb : AVX512_maskable_scalar opc, string OpcodeStr, SDNode OpNodeRnd, EVEX_4V, EVEX_B; } -multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, bit IsCommutable = 0> { defm PSZ : avx512_fp_packed, EVEX_V512, PS, @@ -3533,7 +3471,7 @@ defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>, avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>; defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>, avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>; defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>; @@ -3569,13 +3507,34 @@ multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, }//let mayLoad = 1 } -multiclass avx512_fp_scalef_all opc, string OpcodeStr, SDNode OpNode> { - defm PSZ : avx512_fp_scalef_p, +multiclass avx512_fp_scalef_scalar opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr: AVX512_maskable_scalar; + let mayLoad = 1 in { + defm rm: AVX512_maskable_scalar; + }//let mayLoad = 1 +} + +multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> { + defm PSZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_scalef_p, + defm PDZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm SSZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V,EVEX_CD8<32, CD8VT1>; + defm SDZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; + // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp_scalef_p, @@ -3588,7 +3547,7 @@ multiclass avx512_fp_scalef_all opc, string OpcodeStr, SDNode OpNode> { EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD; +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD; //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions @@ -3605,7 +3564,7 @@ multiclass avx512_vptest opc, string OpcodeStr, SDNode OpNode, defm rm : AVX512_maskable_cmp, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; @@ -3767,12 +3726,12 @@ multiclass avx512_shift_rmi_sizes opc, Format ImmFormR, Format ImmFormM, VTInfo.info256>, EVEX_V256; defm Z128: avx512_shift_rmi, - avx512_shift_rmbi, EVEX_V128; } } -multiclass avx512_shift_rmi_w opcw, +multiclass avx512_shift_rmi_w opcw, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in @@ -3941,13 +3900,13 @@ defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", //===----------------------------------------------------------------------===// defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", - X86PShufd, avx512vl_i32_info>, + X86PShufd, avx512vl_i32_info>, EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W; defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W; - + multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in defm Z: avx512_var_shift, EVEX_V512; @@ -4053,7 +4012,7 @@ multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>, - AVX512FMA3Base; + AVX512FMA3Base; defm mb: AVX512_maskable_3src, T8PD, EVEX_4V; defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V; } + +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, @@ -5481,47 +5442,6 @@ let Predicates = [HasAVX512] in { (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; } - -multiclass avx512_rndscale opc, string OpcodeStr, - X86MemOperand x86memop, RegisterClass RC, - PatFrag mem_frag, Domain d> { -let ExeDomain = d in { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def r : AVX512AIi8, EVEX; - - // Vector intrinsic operation, mem - def m : AVX512AIi8, EVEX; -} // ExeDomain -} - -defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512, - loadv16f32, SSEPackedSingle>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - -def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1), - imm:$src2, (v16f32 VR512:$src1), (i16 -1), - FROUND_CURRENT)), - (VRNDSCALEPSZr VR512:$src1, imm:$src2)>; - - -defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512, - loadv8f64, SSEPackedDouble>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; - -def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1), - imm:$src2, (v8f64 VR512:$src1), (i8 -1), - FROUND_CURRENT)), - (VRNDSCALEPDZr VR512:$src1, imm:$src2)>; - multiclass avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { @@ -5529,20 +5449,20 @@ avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { defm r : AVX512_maskable_scalar; defm rb : AVX512_maskable_scalar, EVEX_B; let mayLoad = 1 in defm m : AVX512_maskable_scalar; } @@ -5587,109 +5507,221 @@ defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>, defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>; -let Predicates = [HasAVX512] in { -def : Pat<(v16f32 (ffloor VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x1))>; -def : Pat<(v16f32 (fnearbyint VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0xC))>; -def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x2))>; -def : Pat<(v16f32 (frint VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x4))>; -def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x3))>; - -def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x1))>; -def : Pat<(v8f64 (fnearbyint VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0xC))>; -def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x2))>; -def : Pat<(v8f64 (frint VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x4))>; -def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x3))>; -} //------------------------------------------------- // Integer truncate and extend operations //------------------------------------------------- -multiclass avx512_trunc_sat opc, string OpcodeStr, - RegisterClass dstRC, RegisterClass srcRC, - RegisterClass KRC, X86MemOperand x86memop> { - def rr : AVX512XS8I opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo, + X86MemOperand x86memop> { + + defm rr : AVX512_maskable, + EVEX, T8XS; + + // for intrinsic patter match + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + undef)), + (!cast(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + DestInfo.ImmAllZerosV)), + (!cast(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + DestInfo.RC:$src0)), + (!cast(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0, + DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + let mayStore = 1 in { + def mr : AVX512XS8I, EVEX; - def rrk : AVX512XS8I, EVEX, EVEX_K; + }//mayStore = 1 +} - def rrkz : AVX512XS8I, EVEX, EVEX_KZ; +multiclass avx512_trunc_mr_lowering { - def mr : AVX512XS8I, EVEX; + def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), + (!cast(NAME#SrcInfo.ZSuffix##mr) + addr:$dst, SrcInfo.RC:$src)>; - def mrk : AVX512XS8I, EVEX, EVEX_K; + def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask, + (SrcInfo.VT SrcInfo.RC:$src)), + (!cast(NAME#SrcInfo.ZSuffix##mrk) + addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; +} + +multiclass avx512_trunc_sat_mr_lowering { + + def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# + DestInfo.Suffix#"_mem_"#SrcInfo.Size) + addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask), + (!cast(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr, + (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM), + (SrcInfo.VT SrcInfo.RC:$src))>; + def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# + DestInfo.Suffix#"_mem_"#SrcInfo.Size) + addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1), + (!cast(NAME#SrcInfo.ZSuffix##mr) addr:$ptr, + (SrcInfo.VT SrcInfo.RC:$src))>; } -defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; - -def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; -def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; -def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; -def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; -def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; - -def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>; -def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>; -def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>; -def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>; +multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, + X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, + X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, + X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag, + Predicate prd = HasAVX512>{ + + let Predicates = [HasVLX, prd] in { + defm Z128: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V128; + + defm Z256: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V256; + } + let Predicates = [prd] in + defm Z: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V512; +} + +multiclass avx512_trunc_sat opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, + X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, + X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, + X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{ + + let Predicates = [HasVLX, prd] in { + defm Z128: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V128; + + defm Z256: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V256; + } + let Predicates = [prd] in + defm Z: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V512; +} + +multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; +} +multiclass avx512_trunc_sat_qb opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VO>; +} + +multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; +} +multiclass avx512_trunc_sat_qw opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VQ>; +} + +multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; +} +multiclass avx512_trunc_sat_qd opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<32, CD8VH>; +} + +multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; +} +multiclass avx512_trunc_sat_db opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VQ>; +} + +multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; +} +multiclass avx512_trunc_sat_dw opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; +} + +multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; +} +multiclass avx512_trunc_sat_wb opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; +} + +defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>; +defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>; +defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>; + +defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>; +defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>; +defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>; + +defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>; +defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>; +defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>; + +defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>; +defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>; +defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>; + +defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>; +defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>; +defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>; + +defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>; +defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>; +defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>; multiclass avx512_extend_common opc, string OpcodeStr, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, @@ -6249,7 +6281,7 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", multiclass compress_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { defm rr : AVX512_maskable, AVX5128IBase; let mayStore = 1 in { @@ -6261,7 +6293,7 @@ multiclass compress_by_vec_width opc, X86VectorVTInfo _, def mrk : AVX5128I, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; @@ -6291,7 +6323,7 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info multiclass expand_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { defm rr : AVX512_maskable, AVX5128IBase; let mayLoad = 1 in @@ -6321,6 +6353,62 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>, defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, EVEX, VEX_W; +//handle instruction reg_vec1 = op(reg_vec,imm) +// op(mem_vec,imm) +// op(broadcast(eltVt),imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable; + let mayLoad = 1 in { + defm rmi : AVX512_maskable; + defm rmbi : AVX512_maskable, EVEX_B; + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ + defm rrib : AVX512_maskable, EVEX_B; +} + +multiclass avx512_common_unary_fp_sae_packed_imm opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z : avx512_unary_fp_packed_imm, + avx512_unary_fp_sae_packed_imm, + EVEX_V512; + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_unary_fp_packed_imm, + EVEX_V128; + defm Z256 : avx512_unary_fp_packed_imm, + EVEX_V256; + } +} + //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) // op(reg_vec2,broadcast(eltVt),imm) @@ -6328,27 +6416,27 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm rri : AVX512_maskable; let mayLoad = 1 in { defm rmi : AVX512_maskable; defm rmbi : AVX512_maskable, EVEX_B; } } @@ -6388,20 +6476,20 @@ multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { defm rri : AVX512_maskable_scalar; let mayLoad = 1 in { defm rmi : AVX512_maskable_scalar; let isAsmParserOnly = 1 in { @@ -6417,18 +6505,25 @@ multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm rrib : AVX512_maskable, EVEX_B; } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { - defm NAME: avx512_fp_sae_packed_imm; + defm NAME#rrib : AVX512_maskable_scalar, EVEX_B; } multiclass avx512_common_fp_sae_packed_imm opcPs, + bits<8> opcPd, SDNode OpNode, Predicate prd>{ + defm PS : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<32, CD8VF>; + defm PD : avx512_common_unary_fp_sae_packed_imm,EVEX_CD8<64, CD8VF> , VEX_W; +} + defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd", avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; @@ -6480,6 +6583,9 @@ defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info, 0x55, X86VFixupimm, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; +defm VREDUCE : avx512_common_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, X86VReduce, HasDQI>,AVX512AIi8Base,EVEX; +defm VRNDSCALE : avx512_common_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, X86VRndScale, HasAVX512>,AVX512AIi8Base, EVEX; + defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 0x50, X86VRange, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; @@ -6494,6 +6600,12 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 0x51, X86VRange, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; +defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, + 0x57, X86Reduces, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, + 0x57, X86Reduces, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; multiclass avx512_shuff_packed_128 opc, SDNode OpNode = X86Shuf128>{ @@ -6505,6 +6617,29 @@ multiclass avx512_shuff_packed_128, EVEX_V256; } } +let Predicates = [HasAVX512] in { +def : Pat<(v16f32 (ffloor VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>; +def : Pat<(v16f32 (fnearbyint VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>; +def : Pat<(v16f32 (fceil VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>; +def : Pat<(v16f32 (frint VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>; +def : Pat<(v16f32 (ftrunc VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>; + +def : Pat<(v8f64 (ffloor VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>; +def : Pat<(v8f64 (fnearbyint VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>; +def : Pat<(v8f64 (fceil VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>; +def : Pat<(v8f64 (frint VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>; +def : Pat<(v8f64 (ftrunc VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>; +} defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; @@ -6617,3 +6752,27 @@ def : Pat<(xor (bc_v8i64 (v8i1sextv8i64)), (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), (VPABSQZrr VR512:$src)>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Unpack Instructions +//===----------------------------------------------------------------------===// +defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh>; +defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl>; + +defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, + SSE_INTALU_ITINS_P, HasBWI>; + +defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, + SSE_INTALU_ITINS_P, HasAVX512>;