[(set VR512:$dst, (v16f32 immAllZerosV))]>;
}
+let Predicates = [HasAVX512] in {
def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
-def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
+}
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
(ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
- addr:$dst)]>, EVEX;
+ addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem,
X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERM2D : avx512_perm_3src<0x7E, "vperm2d", VR512, memopv16i32, i512mem,
+defm VPERMT2D : avx512_perm_3src<0x7E, "vpermt2d", VR512, memopv16i32, i512mem,
X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERM2Q : avx512_perm_3src<0x7E, "vperm2q", VR512, memopv8i64, i512mem,
+defm VPERMT2Q : avx512_perm_3src<0x7E, "vpermt2q", VR512, memopv8i64, i512mem,
X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERM2PS : avx512_perm_3src<0x7F, "vperm2ps", VR512, memopv16f32, i512mem,
+defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps", VR512, memopv16f32, i512mem,
X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERM2PD : avx512_perm_3src<0x7F, "vperm2pd", VR512, memopv8f64, i512mem,
+defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem,
X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
}
defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
- "ps", SSEPackedSingle>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ "ps", SSEPackedSingle>, TB, EVEX_4V, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
"pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
EVEX_CD8<64, CD8VF>;
(KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
sub_8bit)>;
- def : Pat<(i1 (extractelt VK16:$src, (iPTR 0))),
+ def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK16:$src, VK1)>;
- def : Pat<(i1 (extractelt VK8:$src, (iPTR 0))),
+ def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK8:$src, VK1)>;
}
defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f32,
"vmovaps", SSEPackedSingle>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
+ TB, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64,
"vmovapd", SSEPackedDouble>,
PD, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32,
"vmovups", SSEPackedSingle>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
+ TB, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64,
"vmovupd", SSEPackedDouble, 0>,
PD, EVEX_V512, VEX_W,
def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
"vmovaps\t{$src, $dst|$dst, $src}",
[(alignedstore512 (v16f32 VR512:$src), addr:$dst)],
- SSEPackedSingle>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ SSEPackedSingle>, EVEX, EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
"vmovapd\t{$src, $dst|$dst, $src}",
[(alignedstore512 (v8f64 VR512:$src), addr:$dst)],
def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
"vmovups\t{$src, $dst|$dst, $src}",
[(store (v16f32 VR512:$src), addr:$dst)],
- SSEPackedSingle>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ SSEPackedSingle>, EVEX, EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
"vmovupd\t{$src, $dst|$dst, $src}",
[(store (v8f64 VR512:$src), addr:$dst)],
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[]>, EVEX, EVEX_K;
}
+ def rrkz : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src),
+ !strconcat(asm,
+ " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), []>,
+ EVEX, EVEX_KZ;
}
defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
(VMOVDQU32mr addr:$dst, VR512:$src)>;
let AddedComplexity = 20 in {
+def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
+ (bc_v8i64 (v16i32 immAllZerosV)))),
+ (VMOVDQU64rrkz VK8WM:$mask, VR512:$src)>;
+
+def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
+ (v8i64 VR512:$src))),
+ (VMOVDQU64rrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
+ VK8), VR512:$src)>;
+
+def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
+ (v16i32 immAllZerosV))),
+ (VMOVDQU32rrkz VK16WM:$mask, VR512:$src)>;
+
+def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
+ (v16i32 VR512:$src))),
+ (VMOVDQU32rrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
+
def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
(v16f32 VR512:$src2))),
(VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ SSEPackedSingle>, TB, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ SSEPackedSingle>, TB, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
- EVEX_4V, TB;
+ EVEX_4V;
let mayLoad = 1 in {
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
- itins.rm, d>, EVEX_4V, TB;
+ itins.rm, d>, EVEX_4V;
def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86scalar_mop:$src2),
!strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
[(set RC:$dst, (OpNode RC:$src1,
(vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
- itins.rm, d>, EVEX_4V, EVEX_B, TB;
+ itins.rm, d>, EVEX_4V, EVEX_B;
}
}
defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem,
memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ SSE_ALU_ITINS_P.s, 1>, EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem,
memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem,
memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ SSE_ALU_ITINS_P.s, 1>, EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem,
memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
SSE_ALU_ITINS_P.d, 1>,
defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem,
memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
SSE_ALU_ITINS_P.s, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
+ EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem,
memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
SSE_ALU_ITINS_P.s, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
+ EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem,
memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem,
memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ SSE_ALU_ITINS_P.s, 0>, EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem,
memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ SSE_ALU_ITINS_P.s, 0>, EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem,
memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
SDNode OpNode, ValueType vt> {
- def rr : AVX5128I<opc, MRMSrcReg,
+ def rr : AVX512PI<opc, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))]>, EVEX_4V;
- def rm : AVX5128I<opc, MRMSrcMem,
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
+ SSEPackedInt>, EVEX_4V;
+ def rm : AVX512PI<opc, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode (vt RC:$src1),
- (bitconvert (memop_frag addr:$src2))))]>, EVEX_4V;
+ (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
}
defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
- memopv16i32, X86testm, v16i32>, EVEX_V512,
+ memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
EVEX_CD8<32, CD8VF>;
defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
- memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W,
+ memopv8i64, X86testm, v8i64>, T8XS, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
+let Predicates = [HasCDI] in {
+defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
+ memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
+ memopv8i64, X86testnm, v8i64>, T8PD, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+}
+
def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
(v16i32 VR512:$src2), (i16 -1))),
(COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
memopv4f64, f256mem, v8f64, v8f32,
- SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>;
+ SSEPackedDouble>, EVEX_V512, TB,
+ EVEX_CD8<32, CD8VH>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
memopv8i64, i512mem, v16f32, v16i32,
- SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ SSEPackedSingle>, EVEX_V512, TB,
+ EVEX_CD8<32, CD8VF>;
defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
memopv4i64, i256mem, v8f64, v8i32,
defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
memopv16f32, f512mem, v16i32, v16f32,
- SSEPackedSingle>, EVEX_V512,
+ SSEPackedSingle>, EVEX_V512, TB,
EVEX_CD8<32, CD8VF>;
// cvttps2udq (src, 0, mask-all-ones, sae-current)
defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
memopv8f64, f512mem, v8i32, v8f64,
- SSEPackedDouble>, EVEX_V512, VEX_W,
+ SSEPackedDouble>, EVEX_V512, TB, VEX_W,
EVEX_CD8<64, CD8VF>;
// cvttpd2udq (src, 0, mask-all-ones, sae-current)
defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
memopv16f32, f512mem, SSEPackedSingle>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
+ TB, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
- EVEX_V512, EVEX_CD8<64, CD8VF>;
+ TB, EVEX_V512, EVEX_CD8<64, CD8VF>;
def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
(v16i32 immAllZerosV), (i16 -1), imm:$rc)),
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
-multiclass avx512_f16c_ph2ps<RegisterClass destRC, RegisterClass srcRC,
- X86MemOperand x86memop, Intrinsic Int> {
+multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
+ X86MemOperand x86memop> {
def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
- [(set destRC:$dst, (Int srcRC:$src))]>, EVEX;
+ []>, EVEX;
let hasSideEffects = 0, mayLoad = 1 in
def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
}
-multiclass avx512_f16c_ps2ph<RegisterClass destRC, RegisterClass srcRC,
- X86MemOperand x86memop, Intrinsic Int> {
+multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
+ X86MemOperand x86memop> {
def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
(ins srcRC:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set destRC:$dst, (Int srcRC:$src1, imm:$src2))]>, EVEX;
+ "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX;
let hasSideEffects = 0, mayStore = 1 in
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
+ "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
}
-defm VCVTPH2PSZ : avx512_f16c_ph2ps<VR512, VR256X, f256mem,
- int_x86_avx512_vcvtph2ps_512>, EVEX_V512,
+defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
-defm VCVTPS2PHZ : avx512_f16c_ps2ph<VR256X, VR512, f256mem,
- int_x86_avx512_vcvtps2ph_512>, EVEX_V512,
+defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
+def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
+ imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
+ (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
+
+def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
+ (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
+ (VCVTPH2PSZrr VR256X:$src)>;
+
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
"ucomiss">, TB, EVEX, VEX_LIG,
}
defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
- SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ SSEPackedSingle>, TB, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;