X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrAVX512.td;h=987ff6cab2ccd29edc4878a2da595865f8589f1b;hp=4ab1b2994ce21724f116b21aa25c1540a775f1b5;hb=e868005a274ca7d7bc12b75771fb85625c86749f;hpb=d0d5b08fbd68e725996734352bf427eba61c4352 diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 4ab1b2994ce..987ff6cab2c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -79,6 +79,11 @@ class X86VectorVTInfo("v" # !srl(Size, 5) # "i32"); + dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV))); } def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; @@ -115,26 +120,33 @@ def avx512vl_i32_info : AVX512VLVectorVTInfo; - -// Common base class of AVX512_masking and AVX512_masking_3src. -multiclass AVX512_masking_common O, Format F, X86VectorVTInfo _, +// This multiclass generates the masking variants from the non-masking +// variant. It only provides the assembly pieces for the masking variants. +// It assumes custom ISel patterns for masking which can be provided as +// template arguments. +multiclass AVX512_masking_custom O, Format F, dag Outs, dag Ins, dag MaskingIns, dag ZeroMaskingIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, dag MaskingRHS, - string MaskingConstraint = ""> { - def NAME: AVX512 Pattern, + list MaskingPattern, + list ZeroMaskingPattern, + string MaskingConstraint = "", + InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> { + let isCommutable = IsCommutable in + def NAME: AVX512; + Pattern, itin>; // Prefer over VMOV*rrk Pat<> let AddedComplexity = 20 in def NAME#k: AVX512, + MaskingPattern, itin>, EVEX_K { // In case of the 3src subclass this is overridden with a let. string Constraints = MaskingConstraint; @@ -143,26 +155,44 @@ multiclass AVX512_masking_common O, Format F, X86VectorVTInfo _, def NAME#kz: AVX512, + ZeroMaskingPattern, + itin>, EVEX_KZ; } + +// Common base class of AVX512_masking and AVX512_masking_3src. +multiclass AVX512_masking_common O, Format F, X86VectorVTInfo _, + dag Outs, + dag Ins, dag MaskingIns, dag ZeroMaskingIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, dag MaskingRHS, + string MaskingConstraint = "", + InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> : + AVX512_masking_custom; + // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the instruction. In the masking case, the // perserved vector elements come from a new dummy input operand tied to $dst. multiclass AVX512_masking O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS> : + dag RHS, InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> : AVX512_masking_common; + "$src0 = $dst", itin, IsCommutable>; // Similar to AVX512_masking but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved @@ -179,6 +209,18 @@ multiclass AVX512_masking_3src O, Format F, X86VectorVTInfo _, OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>; + +multiclass AVX512_masking_in_asm O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + list Pattern> : + AVX512_masking_custom; + // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion let Predicates = [HasAVX512] in { @@ -298,110 +340,59 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; //===----------------------------------------------------------------------===// // AVX-512 - VECTOR INSERT // -// -- 32x8 form -- -let hasSideEffects = 0, ExeDomain = SSEPackedSingle in { -def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR128X:$src2, i8imm:$src3), - "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512; -let mayLoad = 1 in -def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, f128mem:$src2, i8imm:$src3), - "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; -} - -// -- 64x4 fp form -- -let hasSideEffects = 0, ExeDomain = SSEPackedDouble in { -def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR256X:$src2, i8imm:$src3), - "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W; -let mayLoad = 1 in -def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, i256mem:$src2, i8imm:$src3), - "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; + +multiclass vinsert_for_size { + let hasSideEffects = 0, ExeDomain = To.ExeDomain in { + def rr : AVX512AIi8, + EVEX_4V, EVEX_V512; + + let mayLoad = 1 in + def rm : AVX512AIi8, EVEX_4V, EVEX_V512, EVEX_CD8; + } + + // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for + // vinserti32x4 + def : Pat<(vinsert_insert:$ins + (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)), + (AltTo.VT (!cast(NAME # From.EltSize # "x4rr") + VR512:$src1, From.RC:$src2, + (INSERT_get_vinsert_imm VR512:$ins)))>; } -// -- 32x4 integer form -- -let hasSideEffects = 0 in { -def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR128X:$src2, i8imm:$src3), - "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512; -let mayLoad = 1 in -def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, i128mem:$src2, i8imm:$src3), - "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; + +multiclass vinsert_for_type { + defm NAME # "32x4" : vinsert_for_size, + X86VectorVTInfo<16, EltVT32, VR512>, + X86VectorVTInfo< 2, EltVT64, VR128X>, + X86VectorVTInfo< 8, EltVT64, VR512>, + vinsert128_insert, + INSERT_get_vinsert128_imm>; + defm NAME # "64x4" : vinsert_for_size, + X86VectorVTInfo< 8, EltVT64, VR512>, + X86VectorVTInfo< 8, EltVT32, VR256>, + X86VectorVTInfo<16, EltVT32, VR512>, + vinsert256_insert, + INSERT_get_vinsert256_imm>, VEX_W; } -let hasSideEffects = 0 in { -// -- 64x4 form -- -def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR256X:$src2, i8imm:$src3), - "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W; -let mayLoad = 1 in -def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, i256mem:$src2, i8imm:$src3), - "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; -} - -def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2), - (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2), - (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2), - (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2), - (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; - -def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2), - (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), - (bc_v4i32 (loadv2i64 addr:$src2)), - (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2), - (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2), - (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; - -def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2), - (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2), - (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2), - (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2), - (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; - -def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2), - (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2), - (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2), - (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1), - (bc_v8i32 (loadv4i64 addr:$src2)), - (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; +defm VINSERTF : vinsert_for_type; +defm VINSERTI : vinsert_for_type; // vinsertps - insert f32 to XMM def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), @@ -426,13 +417,13 @@ multiclass vextract_for_size { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { - def rr : AVX512AIi8, - EVEX, EVEX_V512; + defm rr : AVX512_masking_in_asm, + AVX512AIi8Base, EVEX, EVEX_V512; let mayStore = 1 in def rm : AVX512AIi8 opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass KRC, - RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, PatFrag scalar_mfrag, - X86MemOperand x86scalar_mop, string BrdcstStr, - OpndItins itins, bit IsCommutable = 0> { - let isCommutable = IsCommutable in - def rr : AVX512BI, EVEX_4V; - let AddedComplexity = 30 in { - let Constraints = "$src0 = $dst" in - def rrk : AVX512BI, EVEX_4V, EVEX_K; - def rrkz : AVX512BI, EVEX_4V, EVEX_KZ; - } + X86VectorVTInfo _, OpndItins itins, + bit IsCommutable = 0> { + defm rr : AVX512_masking, + AVX512BIBase, EVEX_4V; let mayLoad = 1 in { - def rm : AVX512BI, EVEX_4V; - let AddedComplexity = 30 in { - let Constraints = "$src0 = $dst" in - def rmk : AVX512BI, EVEX_4V, EVEX_K; - def rmkz : AVX512BI, EVEX_4V, EVEX_KZ; - } - def rmb : AVX512BI, EVEX_4V, EVEX_B; - let AddedComplexity = 30 in { - let Constraints = "$src0 = $dst" in - def rmbk : AVX512BI, EVEX_4V, EVEX_B, EVEX_K; - def rmbkz : AVX512BI, EVEX_4V, EVEX_B, EVEX_KZ; - } + defm rm : AVX512_masking, + AVX512BIBase, EVEX_4V; + defm rmb : AVX512_masking, + AVX512BIBase, EVEX_4V, EVEX_B; } } @@ -2640,24 +2571,19 @@ multiclass avx512_binop_rm2 opc, string OpcodeStr, ValueType DstVT, } } -defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", +defm VPADDDZ : avx512_binop_rm<0xFE, "vpadd", add, v16i32_info, SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", +defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsub", sub, v16i32_info, SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", +defm VPMULLDZ : avx512_binop_rm<0x40, "vpmull", mul, v16i32_info, SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", +defm VPADDQZ : avx512_binop_rm<0xD4, "vpadd", add, v8i64_info, SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W; -defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", +defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsub", sub, v8i64_info, SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512, @@ -2679,39 +2605,31 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1), (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), (VPMULDQZrr VR512:$src1, VR512:$src2)>; -defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", +defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxu", X86umax, v16i32_info, SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", +defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxu", X86umax, v8i64_info, SSE_INTALU_ITINS_P, 0>, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", +defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxs", X86smax, v16i32_info, SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", +defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxs", X86smax, v8i64_info, SSE_INTALU_ITINS_P, 0>, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", +defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminu", X86umin, v16i32_info, SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", +defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminu", X86umin, v8i64_info, SSE_INTALU_ITINS_P, 0>, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", +defm VPMINSDZ : avx512_binop_rm<0x39, "vpmins", X86smin, v16i32_info, SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", +defm VPMINSQZ : avx512_binop_rm<0x39, "vpmins", X86smin, v8i64_info, SSE_INTALU_ITINS_P, 0>, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; @@ -2844,29 +2762,21 @@ def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// -defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, +defm VPANDDZ : avx512_binop_rm<0xDB, "vpand", and, v16i32_info, SSE_BIT_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, +defm VPANDQZ : avx512_binop_rm<0xDB, "vpand", and, v8i64_info, SSE_BIT_ITINS_P, 1>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, +defm VPORDZ : avx512_binop_rm<0xEB, "vpor", or, v16i32_info, SSE_BIT_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, +defm VPORQZ : avx512_binop_rm<0xEB, "vpor", or, v8i64_info, SSE_BIT_ITINS_P, 1>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, +defm VPXORDZ : avx512_binop_rm<0xEF, "vpxor", xor, v16i32_info, SSE_BIT_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, +defm VPXORQZ : avx512_binop_rm<0xEF, "vpxor", xor, v8i64_info, SSE_BIT_ITINS_P, 1>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", +defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandn", X86andnp, v16i32_info, SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", +defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandn", X86andnp, v8i64_info, SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// @@ -5052,3 +4962,32 @@ def truncstorei1 : PatFrag<(ops node:$val, node:$ptr), def : Pat<(truncstorei1 GR8:$src, addr:$dst), (MOV8mr addr:$dst, GR8:$src)>; +multiclass cvt_by_vec_width opc, X86VectorVTInfo Vec, string OpcodeStr > { +def rr : AVX512XS8I, EVEX; +} + +multiclass cvt_mask_by_elt_width opc, AVX512VLVectorVTInfo VTInfo, + string OpcodeStr, Predicate prd> { +let Predicates = [prd] in + defm Z : cvt_by_vec_width, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : cvt_by_vec_width, EVEX_V256; + defm Z128 : cvt_by_vec_width, EVEX_V128; + } +} + +multiclass avx512_convert_mask_to_vector { + defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, OpcodeStr, + HasBWI>; + defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr, + HasBWI>, VEX_W; + defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr, + HasDQI>; + defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr, + HasDQI>, VEX_W; +} + +defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;