AVX512: Implemented encoding and intrinsics for vplzcntq, vplzcntd, vpconflictq,...
authorIgor Breger <igor.breger@intel.com>
Thu, 3 Sep 2015 09:05:31 +0000 (09:05 +0000)
committerIgor Breger <igor.breger@intel.com>
Thu, 3 Sep 2015 09:05:31 +0000 (09:05 +0000)
Added tests for intrinsics and encoding.

Differential Revision: http://reviews.llvm.org/D11931

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246750 91177308-0d34-0410-b5e6-96231b3b80d8

13 files changed:
include/llvm/IR/IntrinsicsX86.td
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86ISelLowering.h
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrFragmentsSIMD.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/avx512-intrinsics.ll
test/CodeGen/X86/avx512cdvl-intrinsics.ll [new file with mode: 0644]
test/CodeGen/X86/vector-lzcnt-128.ll
test/CodeGen/X86/vector-lzcnt-256.ll
test/CodeGen/X86/vector-lzcnt-512.ll [new file with mode: 0644]
test/MC/X86/x86-64-avx512cd.s [new file with mode: 0644]
test/MC/X86/x86-64-avx512cd_vl.s [new file with mode: 0644]

index c9d584bd4ae4abd14d75f6c8804f83a7efb22237..82d57e344bcb80938313ba8a573540943e605c82 100644 (file)
@@ -5638,27 +5638,71 @@ let TargetPrefix = "x86" in {
                      llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
 }
 
-// AVX-512 conflict detection
+// AVX-512 conflict detection instruction
+// Instructions that count the number of leading zero bits
 let TargetPrefix = "x86" in {
+  def int_x86_avx512_mask_conflict_d_128 :
+          GCCBuiltin<"__builtin_ia32_vpconflictsi_128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_conflict_d_256 :
+          GCCBuiltin<"__builtin_ia32_vpconflictsi_256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
   def int_x86_avx512_mask_conflict_d_512 :
           GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">,
-          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                    llvm_v16i32_ty, llvm_i16_ty],
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+
+  def int_x86_avx512_mask_conflict_q_128 :
+          GCCBuiltin<"__builtin_ia32_vpconflictdi_128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_conflict_q_256 :
+          GCCBuiltin<"__builtin_ia32_vpconflictdi_256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+                    [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
                     [IntrNoMem]>;
   def int_x86_avx512_mask_conflict_q_512 :
           GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">,
-          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                    llvm_v8i64_ty, llvm_i8_ty],
+          Intrinsic<[llvm_v8i64_ty],
+                    [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+
+  def int_x86_avx512_mask_lzcnt_d_128 :
+          GCCBuiltin<"__builtin_ia32_vplzcntd_128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_lzcnt_d_256 :
+          GCCBuiltin<"__builtin_ia32_vplzcntd_256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
                     [IntrNoMem]>;
   def int_x86_avx512_mask_lzcnt_d_512 :
           GCCBuiltin<"__builtin_ia32_vplzcntd_512_mask">,
-          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                    llvm_v16i32_ty, llvm_i16_ty],
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+
+  def int_x86_avx512_mask_lzcnt_q_128 :
+          GCCBuiltin<"__builtin_ia32_vplzcntq_128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+ def int_x86_avx512_mask_lzcnt_q_256 :
+          GCCBuiltin<"__builtin_ia32_vplzcntq_256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+                    [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
                     [IntrNoMem]>;
   def int_x86_avx512_mask_lzcnt_q_512 :
           GCCBuiltin<"__builtin_ia32_vplzcntq_512_mask">,
-          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                    llvm_v8i64_ty, llvm_i8_ty],
+          Intrinsic<[llvm_v8i64_ty],
+                    [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
                     [IntrNoMem]>;
 }
 
index fc6a904a5936233ae7bb68ee93d62c1dd05c17a0..4ec6133d6538f1b42bce506b263635934288cbc7 100644 (file)
@@ -1496,6 +1496,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     if (Subtarget->hasCDI()) {
       setOperationAction(ISD::CTLZ,             MVT::v8i64, Legal);
       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i64, Legal);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v16i32, Legal);
+    }
+    if (Subtarget->hasVLX() && Subtarget->hasCDI()) {
+      setOperationAction(ISD::CTLZ,             MVT::v4i64, Legal);
+      setOperationAction(ISD::CTLZ,             MVT::v8i32, Legal);
+      setOperationAction(ISD::CTLZ,             MVT::v2i64, Legal);
+      setOperationAction(ISD::CTLZ,             MVT::v4i32, Legal);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i64, Legal);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i32, Legal);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v2i64, Legal);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i32, Legal);
     }
     if (Subtarget->hasDQI()) {
       setOperationAction(ISD::MUL,             MVT::v2i64, Legal);
@@ -19282,6 +19294,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FHADD:              return "X86ISD::FHADD";
   case X86ISD::FHSUB:              return "X86ISD::FHSUB";
   case X86ISD::ABS:                return "X86ISD::ABS";
+  case X86ISD::CONFLICT:           return "X86ISD::CONFLICT";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMAX_RND:           return "X86ISD::FMAX_RND";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
index ac518a29f3706ac00fe69267296aaee9e582d025..ba9d0ce69e2d27b525d332d6c6426eb1b75da32c 100644 (file)
@@ -240,6 +240,9 @@ namespace llvm {
       // Integer absolute value
       ABS,
 
+      // Detect Conflicts Within a Vector
+      CONFLICT,
+
       /// Floating point max and min.
       FMAX, FMIN,
 
index d93deaec55d57739fa688563371ab54d67b810c1..b87ce7c6c5feea23848b29b9c15a40168fc9e9e8 100644 (file)
@@ -6047,120 +6047,6 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd
 def v16i1sextv16i32  : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
 def v8i1sextv8i64  : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
 
-multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
-                        RegisterClass RC, RegisterClass KRC,
-                        X86MemOperand x86memop,
-                        X86MemOperand x86scalar_mop, string BrdcstStr> {
-  let hasSideEffects = 0 in {
-  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
-       (ins RC:$src),
-       !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
-       []>, EVEX;
-  let mayLoad = 1 in
-  def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins x86memop:$src),
-       !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
-       []>, EVEX;
-  let mayLoad = 1 in
-  def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins x86scalar_mop:$src),
-       !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
-                  ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
-       []>, EVEX, EVEX_B;
-  def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
-       (ins KRC:$mask, RC:$src),
-       !strconcat(OpcodeStr,
-                  "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-       []>, EVEX, EVEX_KZ;
-  let mayLoad = 1 in
-  def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins KRC:$mask, x86memop:$src),
-       !strconcat(OpcodeStr,
-                  "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-       []>, EVEX, EVEX_KZ;
-  let mayLoad = 1 in
-  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins KRC:$mask, x86scalar_mop:$src),
-       !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
-                  ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
-                  BrdcstStr, "}"),
-       []>, EVEX, EVEX_KZ, EVEX_B;
-
-  let Constraints = "$src1 = $dst" in {
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
-       (ins RC:$src1, KRC:$mask, RC:$src2),
-       !strconcat(OpcodeStr,
-                  "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
-       []>, EVEX, EVEX_K;
-  let mayLoad = 1 in
-  def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins RC:$src1, KRC:$mask, x86memop:$src2),
-       !strconcat(OpcodeStr,
-                  "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
-       []>, EVEX, EVEX_K;
-  let mayLoad = 1 in
-  def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
-       !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                  ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
-       []>, EVEX, EVEX_K, EVEX_B;
-  }
-  }
-}
-
-let Predicates = [HasCDI] in {
-defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
-                    i512mem, i32mem, "{1to16}">,
-                    EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-
-defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
-                    i512mem, i64mem, "{1to8}">,
-                    EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-}
-
-def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
-                                              GR16:$mask),
-          (VPCONFLICTDrrk VR512:$src1,
-           (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
-
-def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
-                                              GR8:$mask),
-          (VPCONFLICTQrrk VR512:$src1,
-           (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
-
-let Predicates = [HasCDI] in {
-defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
-                    i512mem, i32mem, "{1to16}">,
-                    EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-
-defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
-                    i512mem, i64mem, "{1to8}">,
-                    EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-}
-
-def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
-                                              GR16:$mask),
-          (VPLZCNTDrrk VR512:$src1,
-           (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
-
-def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
-                                              GR8:$mask),
-          (VPLZCNTQrrk VR512:$src1,
-           (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
-
-def : Pat<(v16i32 (ctlz (loadv16i32 addr:$src))),
-          (VPLZCNTDrm addr:$src)>;
-def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
-          (VPLZCNTDrr VR512:$src)>;
-def : Pat<(v8i64 (ctlz (loadv8i64 addr:$src))),
-          (VPLZCNTQrm addr:$src)>;
-def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
-          (VPLZCNTQrr VR512:$src)>;
-
 def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
 def : Pat<(store (i1  1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
 def : Pat<(store (i1  0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
@@ -6782,6 +6668,17 @@ def : Pat<(xor
           (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
           (VPABSQZrr VR512:$src)>;
 
+multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
+
+  defm NAME :          avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
+  let isCodeGenOnly = 1 in
+    defm NAME#_UNDEF : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr,
+                                             ctlz_zero_undef, prd>;
+}
+
+defm VPLZCNT    : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
+defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
+
 //===----------------------------------------------------------------------===//
 // AVX-512 - Unpack Instructions
 //===----------------------------------------------------------------------===//
index 19bf986c33c951809c4ee3921686551eb21a0dc7..73653202382ae14eea05b6cba58df12b975675fb 100644 (file)
@@ -262,7 +262,9 @@ def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
 
 def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
 def X86VAlign  : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
-def X86Abs     : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
+
+def X86Abs      : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
+def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
 
 def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
 def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
index 409ba59b1dc44146ddda55632296fd412b9a0c1d..e2384958083b6da45dbe7bb5a0dc5a418866a46d 100644 (file)
@@ -455,7 +455,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_q_512,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::COMPRESS, 0),
-
+  X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_d_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_q_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_q_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTDQ2PD, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK,
@@ -686,6 +697,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VGETMANT, 0),
   X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
                      X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_512, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_128, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_256, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_512, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
@@ -1288,7 +1311,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
                      X86ISD::FNMSUB_RND),
 
-
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
                     X86ISD::VPERMIV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
index 6e61c263ef0a14176b944065205daf86bdd6f023..6d18b25b7702f88fc8e379a903b776567d7129c1 100644 (file)
@@ -287,9 +287,10 @@ define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
 
 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
-  ; CHECK: movw $-1, %ax
-  ; CHECK: vpxor
-  ; CHECK: vpconflictd
+; CHECK-LABEL: test_conflict_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpconflictd %zmm0, %zmm0
+; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
@@ -297,9 +298,10 @@ define <16 x i32> @test_conflict_d(<16 x i32> %a) {
 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
 
 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
-  ; CHECK: movb $-1, %al
-  ; CHECK: vpxor
-  ; CHECK: vpconflictq
+; CHECK-LABEL: test_conflict_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpconflictq %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
@@ -307,21 +309,32 @@ define <8 x i64> @test_conflict_q(<8 x i64> %a) {
 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
 
 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
-  ; CHECK: vpconflictd
+; CHECK-LABEL: test_maskz_conflict_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpconflictd %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
-  ; CHECK: vpconflictq
+; CHECK-LABEL: test_mask_conflict_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpconflictq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
-  ; CHECK: movw $-1, %ax
-  ; CHECK: vpxor
-  ; CHECK: vplzcntd
+; CHECK-LABEL: test_lzcnt_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vplzcntd %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
@@ -329,9 +342,10 @@ define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
 
 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
-  ; CHECK: movb $-1, %al
-  ; CHECK: vpxor
-  ; CHECK: vplzcntq
+; CHECK-LABEL: test_lzcnt_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vplzcntq %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
@@ -340,35 +354,28 @@ declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) no
 
 
 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
-  ; CHECK: vplzcntd
+; CHECK-LABEL: test_mask_lzcnt_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vplzcntd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
-  ; CHECK: vplzcntq
+; CHECK-LABEL: test_mask_lzcnt_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vplzcntq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
   ret <8 x i64> %res
 }
 
-define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
-  ; CHECK-LABEL: test_ctlz_d
-  ; CHECK: vplzcntd
-  %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
-  ret <16 x i32> %res
-}
-
-declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
-
-define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
-  ; CHECK-LABEL: test_ctlz_q
-  ; CHECK: vplzcntq
-  %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
-  ret <8 x i64> %res
-}
-
-declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
-
 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK: vblendmps %zmm1, %zmm0
   %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/avx512cdvl-intrinsics.ll b/test/CodeGen/X86/avx512cdvl-intrinsics.ll
new file mode 100644 (file)
index 0000000..b82782c
--- /dev/null
@@ -0,0 +1,148 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s
+
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readonly
+
+declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vplzcntd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vplzcntd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vplzcntd %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+  %res3 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  %res2 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vplzcntd %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vplzcntd %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vplzcntq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vplzcntq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vplzcntq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vplzcntq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpconflictd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpconflictd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpconflictd %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+  %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  %res2 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpconflictd %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpconflictd %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpconflictq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpconflictq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpconflictq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpconflictq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
index c18bcfb95b7e63ec065272e9baa7593d824c1b21..e8922c31e82d16a79d49c2fd3847b5e428e5e5ad 100644 (file)
@@ -4,6 +4,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s --check-prefix=AVX512VLCD
 
 target triple = "x86_64-unknown-unknown"
 
@@ -93,6 +94,11 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
 ; AVX-NEXT:    vmovq %rax, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
+
+; AVX512VLCD-LABEL: testv2i64:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntq %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
   %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0)
   ret <2 x i64> %out
 }
@@ -168,6 +174,11 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
 ; AVX-NEXT:    vmovq %rax, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
+
+; AVX512VLCD-LABEL: testv2i64u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntq %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
   %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 -1)
   ret <2 x i64> %out
 }
@@ -319,6 +330,11 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
 ; AVX-NEXT:    xorl $31, %eax
 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
 ; AVX-NEXT:    retq
+
+; AVX512VLCD-LABEL: testv4i32:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntd %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
   %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %in, i1 0)
   ret <4 x i32> %out
 }
@@ -445,6 +461,11 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
 ; AVX-NEXT:    xorl $31, %eax
 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
 ; AVX-NEXT:    retq
+
+; AVX512VLCD-LABEL: testv4i32u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntd %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
   %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %in, i1 -1)
   ret <4 x i32> %out
 }
index 2083fa8d436f8c1b933b4cb52ab22e1ae2252907..66b0ab24b02218795b7f2a6ff3fb77b0f89f0f75 100644 (file)
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s --check-prefix=AVX512VLCD
 
 target triple = "x86_64-unknown-unknown"
 
@@ -61,6 +62,11 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+
+; AVX512VLCD-LABEL: testv4i64:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntq %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
   %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
   ret <4 x i64> %out
 }
@@ -113,6 +119,11 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+
+; AVX512VLCD-LABEL: testv4i64u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntq %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
   %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
   ret <4 x i64> %out
 }
@@ -211,6 +222,11 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
 ; AVX2-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm0
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+
+; AVX512VLCD-LABEL: testv8i32:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
   %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
   ret <8 x i32> %out
 }
@@ -291,6 +307,11 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
 ; AVX2-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+
+; AVX512VLCD-LABEL: testv8i32u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
   %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)
   ret <8 x i32> %out
 }
diff --git a/test/CodeGen/X86/vector-lzcnt-512.ll b/test/CodeGen/X86/vector-lzcnt-512.ll
new file mode 100644 (file)
index 0000000..8fd995d
--- /dev/null
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=AVX512CD
+
+define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
+; AVX512CD-LABEL: test_ctlz_d:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+  %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
+  ret <16 x i32> %res
+}
+
+define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
+; AVX512CD-LABEL: test_ctlz_q:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+  %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
+  ret <8 x i64> %res
+}
+
+define <16 x i32> @test_ctlz_d_undef(<16 x i32> %a) {
+; AVX512CD-LABEL: test_ctlz_d_undef:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+  %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 -1)
+  ret <16 x i32> %res
+}
+
+define <8 x i64> @test_ctlz_q_undef(<8 x i64> %a) {
+; AVX512CD-LABEL: test_ctlz_q_undef:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+  %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 -1)
+  ret <8 x i64> %res
+}
+
+declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
+declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
diff --git a/test/MC/X86/x86-64-avx512cd.s b/test/MC/X86/x86-64-avx512cd.s
new file mode 100644 (file)
index 0000000..49c5ccf
--- /dev/null
@@ -0,0 +1,450 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd --show-encoding %s | FileCheck %s
+
+// CHECK: vplzcntq  %zmm22, %zmm21  
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xee]
+          vplzcntq  %zmm22, %zmm21  
+
+// CHECK: vplzcntq  %zmm22, %zmm21 {%k7} 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4f,0x44,0xee]
+          vplzcntq  %zmm22, %zmm21 {%k7} 
+
+// CHECK: vplzcntq  %zmm22, %zmm21 {%k7} {z} 
+// CHECK: encoding: [0x62,0xa2,0xfd,0xcf,0x44,0xee]
+          vplzcntq  %zmm22, %zmm21 {%k7} {z} 
+
+// CHECK: vplzcntq  (%rcx), %zmm21  
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x29]
+          vplzcntq  (%rcx), %zmm21  
+
+// CHECK: vplzcntq  291(%rax,%r14,8), %zmm21 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntq  291(%rax,%r14,8), %zmm21 
+
+// CHECK: vplzcntq  (%rcx){1to8}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x29]
+          vplzcntq  (%rcx){1to8}, %zmm21 
+
+// CHECK: vplzcntq  4064(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xaa,0xe0,0x0f,0x00,0x00]
+          vplzcntq  4064(%rdx), %zmm21 
+
+// CHECK: vplzcntq  4096(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x6a,0x40]
+          vplzcntq  4096(%rdx), %zmm21 
+
+// CHECK: vplzcntq  -4096(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x6a,0xc0]
+          vplzcntq  -4096(%rdx), %zmm21 
+
+// CHECK: vplzcntq  -4128(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xaa,0xe0,0xef,0xff,0xff]
+          vplzcntq  -4128(%rdx), %zmm21 
+
+// CHECK: vplzcntq  1016(%rdx){1to8}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x6a,0x7f]
+          vplzcntq  1016(%rdx){1to8}, %zmm21 
+
+// CHECK: vplzcntq  1024(%rdx){1to8}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xaa,0x00,0x04,0x00,0x00]
+          vplzcntq  1024(%rdx){1to8}, %zmm21 
+
+// CHECK: vplzcntq  -1024(%rdx){1to8}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x6a,0x80]
+          vplzcntq  -1024(%rdx){1to8}, %zmm21 
+
+// CHECK: vplzcntq  -1032(%rdx){1to8}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xaa,0xf8,0xfb,0xff,0xff]
+          vplzcntq  -1032(%rdx){1to8}, %zmm21 
+
+// CHECK: vplzcntq  %zmm27, %zmm23  
+// CHECK: encoding: [0x62,0x82,0xfd,0x48,0x44,0xfb]
+          vplzcntq  %zmm27, %zmm23  
+
+// CHECK: vplzcntq  %zmm27, %zmm23 {%k5} 
+// CHECK: encoding: [0x62,0x82,0xfd,0x4d,0x44,0xfb]
+          vplzcntq  %zmm27, %zmm23 {%k5} 
+
+// CHECK: vplzcntq  %zmm27, %zmm23 {%k5} {z} 
+// CHECK: encoding: [0x62,0x82,0xfd,0xcd,0x44,0xfb]
+          vplzcntq  %zmm27, %zmm23 {%k5} {z} 
+
+// CHECK: vplzcntq  (%rcx), %zmm23  
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x39]
+          vplzcntq  (%rcx), %zmm23  
+
+// CHECK: vplzcntq  4660(%rax,%r14,8), %zmm23 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xbc,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntq  4660(%rax,%r14,8), %zmm23 
+
+// CHECK: vplzcntq  (%rcx){1to8}, %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x39]
+          vplzcntq  (%rcx){1to8}, %zmm23 
+
+// CHECK: vplzcntq  4064(%rdx), %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xba,0xe0,0x0f,0x00,0x00]
+          vplzcntq  4064(%rdx), %zmm23 
+
+// CHECK: vplzcntq  4096(%rdx), %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x7a,0x40]
+          vplzcntq  4096(%rdx), %zmm23 
+
+// CHECK: vplzcntq  -4096(%rdx), %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x7a,0xc0]
+          vplzcntq  -4096(%rdx), %zmm23 
+
+// CHECK: vplzcntq  -4128(%rdx), %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xba,0xe0,0xef,0xff,0xff]
+          vplzcntq  -4128(%rdx), %zmm23 
+
+// CHECK: vplzcntq  1016(%rdx){1to8}, %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x7a,0x7f]
+          vplzcntq  1016(%rdx){1to8}, %zmm23 
+
+// CHECK: vplzcntq  1024(%rdx){1to8}, %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xba,0x00,0x04,0x00,0x00]
+          vplzcntq  1024(%rdx){1to8}, %zmm23 
+
+// CHECK: vplzcntq  -1024(%rdx){1to8}, %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x7a,0x80]
+          vplzcntq  -1024(%rdx){1to8}, %zmm23 
+
+// CHECK: vplzcntq  -1032(%rdx){1to8}, %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xba,0xf8,0xfb,0xff,0xff]
+          vplzcntq  -1032(%rdx){1to8}, %zmm23 
+
+// CHECK: vplzcntd  %zmm22, %zmm25  
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x44,0xce]
+          vplzcntd  %zmm22, %zmm25  
+
+// CHECK: vplzcntd  %zmm22, %zmm25 {%k2} 
+// CHECK: encoding: [0x62,0x22,0x7d,0x4a,0x44,0xce]
+          vplzcntd  %zmm22, %zmm25 {%k2} 
+
+// CHECK: vplzcntd  %zmm22, %zmm25 {%k2} {z} 
+// CHECK: encoding: [0x62,0x22,0x7d,0xca,0x44,0xce]
+          vplzcntd  %zmm22, %zmm25 {%k2} {z} 
+
+// CHECK: vplzcntd  (%rcx), %zmm25  
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x09]
+          vplzcntd  (%rcx), %zmm25  
+
+// CHECK: vplzcntd  291(%rax,%r14,8), %zmm25 
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x44,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntd  291(%rax,%r14,8), %zmm25 
+
+// CHECK: vplzcntd  (%rcx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x09]
+          vplzcntd  (%rcx){1to16}, %zmm25 
+
+// CHECK: vplzcntd  4064(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x8a,0xe0,0x0f,0x00,0x00]
+          vplzcntd  4064(%rdx), %zmm25 
+
+// CHECK: vplzcntd  4096(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x4a,0x40]
+          vplzcntd  4096(%rdx), %zmm25 
+
+// CHECK: vplzcntd  -4096(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x4a,0xc0]
+          vplzcntd  -4096(%rdx), %zmm25 
+
+// CHECK: vplzcntd  -4128(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x8a,0xe0,0xef,0xff,0xff]
+          vplzcntd  -4128(%rdx), %zmm25 
+
+// CHECK: vplzcntd  508(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x4a,0x7f]
+          vplzcntd  508(%rdx){1to16}, %zmm25 
+
+// CHECK: vplzcntd  512(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x8a,0x00,0x02,0x00,0x00]
+          vplzcntd  512(%rdx){1to16}, %zmm25 
+
+// CHECK: vplzcntd  -512(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x4a,0x80]
+          vplzcntd  -512(%rdx){1to16}, %zmm25 
+
+// CHECK: vplzcntd  -516(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x8a,0xfc,0xfd,0xff,0xff]
+          vplzcntd  -516(%rdx){1to16}, %zmm25 
+
+// CHECK: vplzcntd  %zmm22, %zmm21  
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x44,0xee]
+          vplzcntd  %zmm22, %zmm21  
+
+// CHECK: vplzcntd  %zmm22, %zmm21 {%k3} 
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4b,0x44,0xee]
+          vplzcntd  %zmm22, %zmm21 {%k3} 
+
+// CHECK: vplzcntd  %zmm22, %zmm21 {%k3} {z} 
+// CHECK: encoding: [0x62,0xa2,0x7d,0xcb,0x44,0xee]
+          vplzcntd  %zmm22, %zmm21 {%k3} {z} 
+
+// CHECK: vplzcntd  (%rcx), %zmm21  
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x29]
+          vplzcntd  (%rcx), %zmm21  
+
+// CHECK: vplzcntd  4660(%rax,%r14,8), %zmm21 
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x44,0xac,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntd  4660(%rax,%r14,8), %zmm21 
+
+// CHECK: vplzcntd  (%rcx){1to16}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x29]
+          vplzcntd  (%rcx){1to16}, %zmm21 
+
+// CHECK: vplzcntd  4064(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0xaa,0xe0,0x0f,0x00,0x00]
+          vplzcntd  4064(%rdx), %zmm21 
+
+// CHECK: vplzcntd  4096(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x6a,0x40]
+          vplzcntd  4096(%rdx), %zmm21 
+
+// CHECK: vplzcntd  -4096(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x6a,0xc0]
+          vplzcntd  -4096(%rdx), %zmm21 
+
+// CHECK: vplzcntd  -4128(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0xaa,0xe0,0xef,0xff,0xff]
+          vplzcntd  -4128(%rdx), %zmm21 
+
+// CHECK: vplzcntd  508(%rdx){1to16}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x6a,0x7f]
+          vplzcntd  508(%rdx){1to16}, %zmm21 
+
+// CHECK: vplzcntd  512(%rdx){1to16}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0xaa,0x00,0x02,0x00,0x00]
+          vplzcntd  512(%rdx){1to16}, %zmm21 
+
+// CHECK: vplzcntd  -512(%rdx){1to16}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x6a,0x80]
+          vplzcntd  -512(%rdx){1to16}, %zmm21 
+
+// CHECK: vplzcntd  -516(%rdx){1to16}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0xaa,0xfc,0xfd,0xff,0xff]
+          vplzcntd  -516(%rdx){1to16}, %zmm21 
+
+// CHECK: vpconflictq   %zmm25, %zmm20  
+// CHECK: encoding: [0x62,0x82,0xfd,0x48,0xc4,0xe1]
+          vpconflictq   %zmm25, %zmm20  
+
+// CHECK: vpconflictq   %zmm25, %zmm20 {%k6} 
+// CHECK: encoding: [0x62,0x82,0xfd,0x4e,0xc4,0xe1]
+          vpconflictq   %zmm25, %zmm20 {%k6} 
+
+// CHECK: vpconflictq   %zmm25, %zmm20 {%k6} {z} 
+// CHECK: encoding: [0x62,0x82,0xfd,0xce,0xc4,0xe1]
+          vpconflictq   %zmm25, %zmm20 {%k6} {z} 
+
+// CHECK: vpconflictq   (%rcx), %zmm20  
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x21]
+          vpconflictq   (%rcx), %zmm20  
+
+// CHECK: vpconflictq   291(%rax,%r14,8), %zmm20 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictq   291(%rax,%r14,8), %zmm20 
+
+// CHECK: vpconflictq   (%rcx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x21]
+          vpconflictq   (%rcx){1to8}, %zmm20 
+
+// CHECK: vpconflictq   4064(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0xa2,0xe0,0x0f,0x00,0x00]
+          vpconflictq   4064(%rdx), %zmm20 
+
+// CHECK: vpconflictq   4096(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x62,0x40]
+          vpconflictq   4096(%rdx), %zmm20 
+
+// CHECK: vpconflictq   -4096(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x62,0xc0]
+          vpconflictq   -4096(%rdx), %zmm20 
+
+// CHECK: vpconflictq   -4128(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0xa2,0xe0,0xef,0xff,0xff]
+          vpconflictq   -4128(%rdx), %zmm20 
+
+// CHECK: vpconflictq   1016(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x62,0x7f]
+          vpconflictq   1016(%rdx){1to8}, %zmm20 
+
+// CHECK: vpconflictq   1024(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0xa2,0x00,0x04,0x00,0x00]
+          vpconflictq   1024(%rdx){1to8}, %zmm20 
+
+// CHECK: vpconflictq   -1024(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x62,0x80]
+          vpconflictq   -1024(%rdx){1to8}, %zmm20 
+
+// CHECK: vpconflictq   -1032(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0xa2,0xf8,0xfb,0xff,0xff]
+          vpconflictq   -1032(%rdx){1to8}, %zmm20 
+
+// CHECK: vpconflictq   %zmm21, %zmm17  
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0xcd]
+          vpconflictq   %zmm21, %zmm17  
+
+// CHECK: vpconflictq   %zmm21, %zmm17 {%k6} 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4e,0xc4,0xcd]
+          vpconflictq   %zmm21, %zmm17 {%k6} 
+
+// CHECK: vpconflictq   %zmm21, %zmm17 {%k6} {z} 
+// CHECK: encoding: [0x62,0xa2,0xfd,0xce,0xc4,0xcd]
+          vpconflictq   %zmm21, %zmm17 {%k6} {z} 
+
+// CHECK: vpconflictq   (%rcx), %zmm17  
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x09]
+          vpconflictq   (%rcx), %zmm17  
+
+// CHECK: vpconflictq   4660(%rax,%r14,8), %zmm17 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictq   4660(%rax,%r14,8), %zmm17 
+
+// CHECK: vpconflictq   (%rcx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x09]
+          vpconflictq   (%rcx){1to8}, %zmm17 
+
+// CHECK: vpconflictq   4064(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x8a,0xe0,0x0f,0x00,0x00]
+          vpconflictq   4064(%rdx), %zmm17 
+
+// CHECK: vpconflictq   4096(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x4a,0x40]
+          vpconflictq   4096(%rdx), %zmm17 
+
+// CHECK: vpconflictq   -4096(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x4a,0xc0]
+          vpconflictq   -4096(%rdx), %zmm17 
+
+// CHECK: vpconflictq   -4128(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+          vpconflictq   -4128(%rdx), %zmm17 
+
+// CHECK: vpconflictq   1016(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x4a,0x7f]
+          vpconflictq   1016(%rdx){1to8}, %zmm17 
+
+// CHECK: vpconflictq   1024(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x8a,0x00,0x04,0x00,0x00]
+          vpconflictq   1024(%rdx){1to8}, %zmm17 
+
+// CHECK: vpconflictq   -1024(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x4a,0x80]
+          vpconflictq   -1024(%rdx){1to8}, %zmm17 
+
+// CHECK: vpconflictq   -1032(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x8a,0xf8,0xfb,0xff,0xff]
+          vpconflictq   -1032(%rdx){1to8}, %zmm17 
+
+// CHECK: vpconflictd   %zmm19, %zmm25  
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0xcb]
+          vpconflictd   %zmm19, %zmm25  
+
+// CHECK: vpconflictd   %zmm19, %zmm25 {%k4} 
+// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0xc4,0xcb]
+          vpconflictd   %zmm19, %zmm25 {%k4} 
+
+// CHECK: vpconflictd   %zmm19, %zmm25 {%k4} {z} 
+// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0xc4,0xcb]
+          vpconflictd   %zmm19, %zmm25 {%k4} {z} 
+
+// CHECK: vpconflictd   (%rcx), %zmm25  
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x09]
+          vpconflictd   (%rcx), %zmm25  
+
+// CHECK: vpconflictd   291(%rax,%r14,8), %zmm25 
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictd   291(%rax,%r14,8), %zmm25 
+
+// CHECK: vpconflictd   (%rcx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x09]
+          vpconflictd   (%rcx){1to16}, %zmm25 
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd --show-encoding %s | FileCheck %s
+
+// CHECK: vpconflictd   4064(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x8a,0xe0,0x0f,0x00,0x00]
+          vpconflictd   4064(%rdx), %zmm25 
+
+// CHECK: vpconflictd   4096(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x4a,0x40]
+          vpconflictd   4096(%rdx), %zmm25 
+
+// CHECK: vpconflictd   -4096(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x4a,0xc0]
+          vpconflictd   -4096(%rdx), %zmm25 
+
+// CHECK: vpconflictd   -4128(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+          vpconflictd   -4128(%rdx), %zmm25 
+
+// CHECK: vpconflictd   508(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x4a,0x7f]
+          vpconflictd   508(%rdx){1to16}, %zmm25 
+
+// CHECK: vpconflictd   512(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x8a,0x00,0x02,0x00,0x00]
+          vpconflictd   512(%rdx){1to16}, %zmm25 
+
+// CHECK: vpconflictd   -512(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x4a,0x80]
+          vpconflictd   -512(%rdx){1to16}, %zmm25 
+
+// CHECK: vpconflictd   -516(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x8a,0xfc,0xfd,0xff,0xff]
+          vpconflictd   -516(%rdx){1to16}, %zmm25 
+
+// CHECK: vpconflictd   %zmm21, %zmm26  
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0xd5]
+          vpconflictd   %zmm21, %zmm26  
+
+// CHECK: vpconflictd   %zmm21, %zmm26 {%k4} 
+// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0xc4,0xd5]
+          vpconflictd   %zmm21, %zmm26 {%k4} 
+
+// CHECK: vpconflictd   %zmm21, %zmm26 {%k4} {z} 
+// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0xc4,0xd5]
+          vpconflictd   %zmm21, %zmm26 {%k4} {z} 
+
+// CHECK: vpconflictd   (%rcx), %zmm26  
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x11]
+          vpconflictd   (%rcx), %zmm26  
+
+// CHECK: vpconflictd   4660(%rax,%r14,8), %zmm26 
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictd   4660(%rax,%r14,8), %zmm26 
+
+// CHECK: vpconflictd   (%rcx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x11]
+          vpconflictd   (%rcx){1to16}, %zmm26 
+
+// CHECK: vpconflictd   4064(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x92,0xe0,0x0f,0x00,0x00]
+          vpconflictd   4064(%rdx), %zmm26 
+
+// CHECK: vpconflictd   4096(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x52,0x40]
+          vpconflictd   4096(%rdx), %zmm26 
+
+// CHECK: vpconflictd   -4096(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x52,0xc0]
+          vpconflictd   -4096(%rdx), %zmm26 
+
+// CHECK: vpconflictd   -4128(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x92,0xe0,0xef,0xff,0xff]
+          vpconflictd   -4128(%rdx), %zmm26 
+
+// CHECK: vpconflictd   508(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x52,0x7f]
+          vpconflictd   508(%rdx){1to16}, %zmm26 
+
+// CHECK: vpconflictd   512(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x92,0x00,0x02,0x00,0x00]
+          vpconflictd   512(%rdx){1to16}, %zmm26 
+
+// CHECK: vpconflictd   -512(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x52,0x80]
+          vpconflictd   -512(%rdx){1to16}, %zmm26 
+
+// CHECK: vpconflictd   -516(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x92,0xfc,0xfd,0xff,0xff]
+          vpconflictd   -516(%rdx){1to16}, %zmm26 
diff --git a/test/MC/X86/x86-64-avx512cd_vl.s b/test/MC/X86/x86-64-avx512cd_vl.s
new file mode 100644 (file)
index 0000000..13126ec
--- /dev/null
@@ -0,0 +1,898 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl  --show-encoding %s | FileCheck %s
+
+// CHECK: vplzcntq %xmm20, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x44,0xd4]
+          vplzcntq %xmm20, %xmm18
+
+// CHECK: vplzcntq %xmm20, %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x09,0x44,0xd4]
+          vplzcntq %xmm20, %xmm18 {%k1}
+
+// CHECK: vplzcntq %xmm20, %xmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x89,0x44,0xd4]
+          vplzcntq %xmm20, %xmm18 {%k1} {z}
+
+// CHECK: vplzcntq (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x44,0x11]
+          vplzcntq (%rcx), %xmm18
+
+// CHECK: vplzcntq 291(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x44,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntq 291(%rax,%r14,8), %xmm18
+
+// CHECK: vplzcntq (%rcx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x44,0x11]
+          vplzcntq (%rcx){1to2}, %xmm18
+
+// CHECK: vplzcntq 2032(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x44,0x52,0x7f]
+          vplzcntq 2032(%rdx), %xmm18
+
+// CHECK: vplzcntq 2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x44,0x92,0x00,0x08,0x00,0x00]
+          vplzcntq 2048(%rdx), %xmm18
+
+// CHECK: vplzcntq -2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x44,0x52,0x80]
+          vplzcntq -2048(%rdx), %xmm18
+
+// CHECK: vplzcntq -2064(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x44,0x92,0xf0,0xf7,0xff,0xff]
+          vplzcntq -2064(%rdx), %xmm18
+
+// CHECK: vplzcntq 1016(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x44,0x52,0x7f]
+          vplzcntq 1016(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq 1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x44,0x92,0x00,0x04,0x00,0x00]
+          vplzcntq 1024(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq -1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x44,0x52,0x80]
+          vplzcntq -1024(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq -1032(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x44,0x92,0xf8,0xfb,0xff,0xff]
+          vplzcntq -1032(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq %ymm22, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x44,0xee]
+          vplzcntq %ymm22, %ymm21
+
+// CHECK: vplzcntq %ymm22, %ymm21 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x2f,0x44,0xee]
+          vplzcntq %ymm22, %ymm21 {%k7}
+
+// CHECK: vplzcntq %ymm22, %ymm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xaf,0x44,0xee]
+          vplzcntq %ymm22, %ymm21 {%k7} {z}
+
+// CHECK: vplzcntq (%rcx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x29]
+          vplzcntq (%rcx), %ymm21
+
+// CHECK: vplzcntq 291(%rax,%r14,8), %ymm21
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x44,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntq 291(%rax,%r14,8), %ymm21
+
+// CHECK: vplzcntq (%rcx){1to4}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x29]
+          vplzcntq (%rcx){1to4}, %ymm21
+
+// CHECK: vplzcntq 4064(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x6a,0x7f]
+          vplzcntq 4064(%rdx), %ymm21
+
+// CHECK: vplzcntq 4096(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0xaa,0x00,0x10,0x00,0x00]
+          vplzcntq 4096(%rdx), %ymm21
+
+// CHECK: vplzcntq -4096(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x6a,0x80]
+          vplzcntq -4096(%rdx), %ymm21
+
+// CHECK: vplzcntq -4128(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0xaa,0xe0,0xef,0xff,0xff]
+          vplzcntq -4128(%rdx), %ymm21
+
+// CHECK: vplzcntq 1016(%rdx){1to4}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x6a,0x7f]
+          vplzcntq 1016(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq 1024(%rdx){1to4}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0xaa,0x00,0x04,0x00,0x00]
+          vplzcntq 1024(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq -1024(%rdx){1to4}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x6a,0x80]
+          vplzcntq -1024(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq -1032(%rdx){1to4}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0xaa,0xf8,0xfb,0xff,0xff]
+          vplzcntq -1032(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x22,0xfd,0x08,0x44,0xc4]
+          vplzcntq %xmm20, %xmm24
+
+// CHECK: vplzcntq %xmm20, %xmm24 {%k3}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x0b,0x44,0xc4]
+          vplzcntq %xmm20, %xmm24 {%k3}
+
+// CHECK: vplzcntq %xmm20, %xmm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x8b,0x44,0xc4]
+          vplzcntq %xmm20, %xmm24 {%k3} {z}
+
+// CHECK: vplzcntq (%rcx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x44,0x01]
+          vplzcntq (%rcx), %xmm24
+
+// CHECK: vplzcntq 4660(%rax,%r14,8), %xmm24
+// CHECK:  encoding: [0x62,0x22,0xfd,0x08,0x44,0x84,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntq 4660(%rax,%r14,8), %xmm24
+
+// CHECK: vplzcntq (%rcx){1to2}, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x18,0x44,0x01]
+          vplzcntq (%rcx){1to2}, %xmm24
+
+// CHECK: vplzcntq 2032(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x44,0x42,0x7f]
+          vplzcntq 2032(%rdx), %xmm24
+
+// CHECK: vplzcntq 2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x44,0x82,0x00,0x08,0x00,0x00]
+          vplzcntq 2048(%rdx), %xmm24
+
+// CHECK: vplzcntq -2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x44,0x42,0x80]
+          vplzcntq -2048(%rdx), %xmm24
+
+// CHECK: vplzcntq -2064(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x44,0x82,0xf0,0xf7,0xff,0xff]
+          vplzcntq -2064(%rdx), %xmm24
+
+// CHECK: vplzcntq 1016(%rdx){1to2}, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x18,0x44,0x42,0x7f]
+          vplzcntq 1016(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq 1024(%rdx){1to2}, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x18,0x44,0x82,0x00,0x04,0x00,0x00]
+          vplzcntq 1024(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq -1024(%rdx){1to2}, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x18,0x44,0x42,0x80]
+          vplzcntq -1024(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq -1032(%rdx){1to2}, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x18,0x44,0x82,0xf8,0xfb,0xff,0xff]
+          vplzcntq -1032(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq %ymm27, %ymm23
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0x44,0xfb]
+          vplzcntq %ymm27, %ymm23
+
+// CHECK: vplzcntq %ymm27, %ymm23 {%k5}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2d,0x44,0xfb]
+          vplzcntq %ymm27, %ymm23 {%k5}
+
+// CHECK: vplzcntq %ymm27, %ymm23 {%k5} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xad,0x44,0xfb]
+          vplzcntq %ymm27, %ymm23 {%k5} {z}
+
+// CHECK: vplzcntq (%rcx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x39]
+          vplzcntq (%rcx), %ymm23
+
+// CHECK: vplzcntq 4660(%rax,%r14,8), %ymm23
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x44,0xbc,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntq 4660(%rax,%r14,8), %ymm23
+
+// CHECK: vplzcntq (%rcx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x39]
+          vplzcntq (%rcx){1to4}, %ymm23
+
+// CHECK: vplzcntq 4064(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x7a,0x7f]
+          vplzcntq 4064(%rdx), %ymm23
+
+// CHECK: vplzcntq 4096(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0xba,0x00,0x10,0x00,0x00]
+          vplzcntq 4096(%rdx), %ymm23
+
+// CHECK: vplzcntq -4096(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x7a,0x80]
+          vplzcntq -4096(%rdx), %ymm23
+
+// CHECK: vplzcntq -4128(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0xba,0xe0,0xef,0xff,0xff]
+          vplzcntq -4128(%rdx), %ymm23
+
+// CHECK: vplzcntq 1016(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x7a,0x7f]
+          vplzcntq 1016(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntq 1024(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0xba,0x00,0x04,0x00,0x00]
+          vplzcntq 1024(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntq -1024(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x7a,0x80]
+          vplzcntq -1024(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntq -1032(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0xba,0xf8,0xfb,0xff,0xff]
+          vplzcntq -1032(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntd %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x44,0xd2]
+          vplzcntd %xmm26, %xmm26
+
+// CHECK: vplzcntd %xmm26, %xmm26 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0c,0x44,0xd2]
+          vplzcntd %xmm26, %xmm26 {%k4}
+
+// CHECK: vplzcntd %xmm26, %xmm26 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8c,0x44,0xd2]
+          vplzcntd %xmm26, %xmm26 {%k4} {z}
+
+// CHECK: vplzcntd (%rcx), %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x11]
+          vplzcntd (%rcx), %xmm26
+
+// CHECK: vplzcntd 291(%rax,%r14,8), %xmm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x44,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntd 291(%rax,%r14,8), %xmm26
+
+// CHECK: vplzcntd (%rcx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x11]
+          vplzcntd (%rcx){1to4}, %xmm26
+
+// CHECK: vplzcntd 2032(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x52,0x7f]
+          vplzcntd 2032(%rdx), %xmm26
+
+// CHECK: vplzcntd 2048(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x92,0x00,0x08,0x00,0x00]
+          vplzcntd 2048(%rdx), %xmm26
+
+// CHECK: vplzcntd -2048(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x52,0x80]
+          vplzcntd -2048(%rdx), %xmm26
+
+// CHECK: vplzcntd -2064(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x92,0xf0,0xf7,0xff,0xff]
+          vplzcntd -2064(%rdx), %xmm26
+
+// CHECK: vplzcntd 508(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x52,0x7f]
+          vplzcntd 508(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd 512(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x92,0x00,0x02,0x00,0x00]
+          vplzcntd 512(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd -512(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x52,0x80]
+          vplzcntd -512(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd -516(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x92,0xfc,0xfd,0xff,0xff]
+          vplzcntd -516(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x44,0xce]
+          vplzcntd %ymm22, %ymm25
+
+// CHECK: vplzcntd %ymm22, %ymm25 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2a,0x44,0xce]
+          vplzcntd %ymm22, %ymm25 {%k2}
+
+// CHECK: vplzcntd %ymm22, %ymm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaa,0x44,0xce]
+          vplzcntd %ymm22, %ymm25 {%k2} {z}
+
+// CHECK: vplzcntd (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x44,0x09]
+          vplzcntd (%rcx), %ymm25
+
+// CHECK: vplzcntd 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x44,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntd 291(%rax,%r14,8), %ymm25
+
+// CHECK: vplzcntd (%rcx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x44,0x09]
+          vplzcntd (%rcx){1to8}, %ymm25
+
+// CHECK: vplzcntd 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x44,0x4a,0x7f]
+          vplzcntd 4064(%rdx), %ymm25
+
+// CHECK: vplzcntd 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x44,0x8a,0x00,0x10,0x00,0x00]
+          vplzcntd 4096(%rdx), %ymm25
+
+// CHECK: vplzcntd -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x44,0x4a,0x80]
+          vplzcntd -4096(%rdx), %ymm25
+
+// CHECK: vplzcntd -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x44,0x8a,0xe0,0xef,0xff,0xff]
+          vplzcntd -4128(%rdx), %ymm25
+
+// CHECK: vplzcntd 508(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x44,0x4a,0x7f]
+          vplzcntd 508(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd 512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x44,0x8a,0x00,0x02,0x00,0x00]
+          vplzcntd 512(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd -512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x44,0x4a,0x80]
+          vplzcntd -512(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd -516(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x44,0x8a,0xfc,0xfd,0xff,0xff]
+          vplzcntd -516(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x44,0xf6]
+          vplzcntd %xmm22, %xmm30
+
+// CHECK: vplzcntd %xmm22, %xmm30 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0f,0x44,0xf6]
+          vplzcntd %xmm22, %xmm30 {%k7}
+
+// CHECK: vplzcntd %xmm22, %xmm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8f,0x44,0xf6]
+          vplzcntd %xmm22, %xmm30 {%k7} {z}
+
+// CHECK: vplzcntd (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x31]
+          vplzcntd (%rcx), %xmm30
+
+// CHECK: vplzcntd 4660(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x44,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntd 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vplzcntd (%rcx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x31]
+          vplzcntd (%rcx){1to4}, %xmm30
+
+// CHECK: vplzcntd 2032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x72,0x7f]
+          vplzcntd 2032(%rdx), %xmm30
+
+// CHECK: vplzcntd 2048(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0xb2,0x00,0x08,0x00,0x00]
+          vplzcntd 2048(%rdx), %xmm30
+
+// CHECK: vplzcntd -2048(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x72,0x80]
+          vplzcntd -2048(%rdx), %xmm30
+
+// CHECK: vplzcntd -2064(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0xb2,0xf0,0xf7,0xff,0xff]
+          vplzcntd -2064(%rdx), %xmm30
+
+// CHECK: vplzcntd 508(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x72,0x7f]
+          vplzcntd 508(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd 512(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0xb2,0x00,0x02,0x00,0x00]
+          vplzcntd 512(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd -512(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x72,0x80]
+          vplzcntd -512(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd -516(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0xb2,0xfc,0xfd,0xff,0xff]
+          vplzcntd -516(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd %ymm22, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x44,0xee]
+          vplzcntd %ymm22, %ymm21
+
+// CHECK: vplzcntd %ymm22, %ymm21 {%k3}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x2b,0x44,0xee]
+          vplzcntd %ymm22, %ymm21 {%k3}
+
+// CHECK: vplzcntd %ymm22, %ymm21 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0xab,0x44,0xee]
+          vplzcntd %ymm22, %ymm21 {%k3} {z}
+
+// CHECK: vplzcntd (%rcx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x44,0x29]
+          vplzcntd (%rcx), %ymm21
+
+// CHECK: vplzcntd 4660(%rax,%r14,8), %ymm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x44,0xac,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntd 4660(%rax,%r14,8), %ymm21
+
+// CHECK: vplzcntd (%rcx){1to8}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x38,0x44,0x29]
+          vplzcntd (%rcx){1to8}, %ymm21
+
+// CHECK: vplzcntd 4064(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x44,0x6a,0x7f]
+          vplzcntd 4064(%rdx), %ymm21
+
+// CHECK: vplzcntd 4096(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x44,0xaa,0x00,0x10,0x00,0x00]
+          vplzcntd 4096(%rdx), %ymm21
+
+// CHECK: vplzcntd -4096(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x44,0x6a,0x80]
+          vplzcntd -4096(%rdx), %ymm21
+
+// CHECK: vplzcntd -4128(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x44,0xaa,0xe0,0xef,0xff,0xff]
+          vplzcntd -4128(%rdx), %ymm21
+
+// CHECK: vplzcntd 508(%rdx){1to8}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x38,0x44,0x6a,0x7f]
+          vplzcntd 508(%rdx){1to8}, %ymm21
+
+// CHECK: vplzcntd 512(%rdx){1to8}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x38,0x44,0xaa,0x00,0x02,0x00,0x00]
+          vplzcntd 512(%rdx){1to8}, %ymm21
+
+// CHECK: vplzcntd -512(%rdx){1to8}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x38,0x44,0x6a,0x80]
+          vplzcntd -512(%rdx){1to8}, %ymm21
+
+// CHECK: vplzcntd -516(%rdx){1to8}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x38,0x44,0xaa,0xfc,0xfd,0xff,0xff]
+          vplzcntd -516(%rdx){1to8}, %ymm21
+
+// CHECK: vpconflictq %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0x82,0xfd,0x08,0xc4,0xd8]
+          vpconflictq %xmm24, %xmm19
+
+// CHECK: vpconflictq %xmm24, %xmm19 {%k7}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x0f,0xc4,0xd8]
+          vpconflictq %xmm24, %xmm19 {%k7}
+
+// CHECK: vpconflictq %xmm24, %xmm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x8f,0xc4,0xd8]
+          vpconflictq %xmm24, %xmm19 {%k7} {z}
+
+// CHECK: vpconflictq (%rcx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x19]
+          vpconflictq (%rcx), %xmm19
+
+// CHECK: vpconflictq 291(%rax,%r14,8), %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0xc4,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictq 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpconflictq (%rcx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x19]
+          vpconflictq (%rcx){1to2}, %xmm19
+
+// CHECK: vpconflictq 2032(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x5a,0x7f]
+          vpconflictq 2032(%rdx), %xmm19
+
+// CHECK: vpconflictq 2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x9a,0x00,0x08,0x00,0x00]
+          vpconflictq 2048(%rdx), %xmm19
+
+// CHECK: vpconflictq -2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x5a,0x80]
+          vpconflictq -2048(%rdx), %xmm19
+
+// CHECK: vpconflictq -2064(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x9a,0xf0,0xf7,0xff,0xff]
+          vpconflictq -2064(%rdx), %xmm19
+
+// CHECK: vpconflictq 1016(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x5a,0x7f]
+          vpconflictq 1016(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq 1024(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x9a,0x00,0x04,0x00,0x00]
+          vpconflictq 1024(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq -1024(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x5a,0x80]
+          vpconflictq -1024(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq -1032(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x9a,0xf8,0xfb,0xff,0xff]
+          vpconflictq -1032(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0xc4,0xe1]
+          vpconflictq %ymm25, %ymm20
+
+// CHECK: vpconflictq %ymm25, %ymm20 {%k6}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2e,0xc4,0xe1]
+          vpconflictq %ymm25, %ymm20 {%k6}
+
+// CHECK: vpconflictq %ymm25, %ymm20 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xae,0xc4,0xe1]
+          vpconflictq %ymm25, %ymm20 {%k6} {z}
+
+// CHECK: vpconflictq (%rcx), %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x21]
+          vpconflictq (%rcx), %ymm20
+
+// CHECK: vpconflictq 291(%rax,%r14,8), %ymm20
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0xc4,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictq 291(%rax,%r14,8), %ymm20
+
+// CHECK: vpconflictq (%rcx){1to4}, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x21]
+          vpconflictq (%rcx){1to4}, %ymm20
+
+// CHECK: vpconflictq 4064(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x62,0x7f]
+          vpconflictq 4064(%rdx), %ymm20
+
+// CHECK: vpconflictq 4096(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0xa2,0x00,0x10,0x00,0x00]
+          vpconflictq 4096(%rdx), %ymm20
+
+// CHECK: vpconflictq -4096(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x62,0x80]
+          vpconflictq -4096(%rdx), %ymm20
+
+// CHECK: vpconflictq -4128(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0xa2,0xe0,0xef,0xff,0xff]
+          vpconflictq -4128(%rdx), %ymm20
+
+// CHECK: vpconflictq 1016(%rdx){1to4}, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x62,0x7f]
+          vpconflictq 1016(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq 1024(%rdx){1to4}, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0xa2,0x00,0x04,0x00,0x00]
+          vpconflictq 1024(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq -1024(%rdx){1to4}, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x62,0x80]
+          vpconflictq -1024(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq -1032(%rdx){1to4}, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0xa2,0xf8,0xfb,0xff,0xff]
+          vpconflictq -1032(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0x82,0xfd,0x08,0xc4,0xd3]
+          vpconflictq %xmm27, %xmm18
+
+// CHECK: vpconflictq %xmm27, %xmm18 {%k4}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x0c,0xc4,0xd3]
+          vpconflictq %xmm27, %xmm18 {%k4}
+
+// CHECK: vpconflictq %xmm27, %xmm18 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x8c,0xc4,0xd3]
+          vpconflictq %xmm27, %xmm18 {%k4} {z}
+
+// CHECK: vpconflictq (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x11]
+          vpconflictq (%rcx), %xmm18
+
+// CHECK: vpconflictq 4660(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictq 4660(%rax,%r14,8), %xmm18
+
+// CHECK: vpconflictq (%rcx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x11]
+          vpconflictq (%rcx){1to2}, %xmm18
+
+// CHECK: vpconflictq 2032(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x52,0x7f]
+          vpconflictq 2032(%rdx), %xmm18
+
+// CHECK: vpconflictq 2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x92,0x00,0x08,0x00,0x00]
+          vpconflictq 2048(%rdx), %xmm18
+
+// CHECK: vpconflictq -2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x52,0x80]
+          vpconflictq -2048(%rdx), %xmm18
+
+// CHECK: vpconflictq -2064(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x92,0xf0,0xf7,0xff,0xff]
+          vpconflictq -2064(%rdx), %xmm18
+
+// CHECK: vpconflictq 1016(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x52,0x7f]
+          vpconflictq 1016(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq 1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x92,0x00,0x04,0x00,0x00]
+          vpconflictq 1024(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq -1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x52,0x80]
+          vpconflictq -1024(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq -1032(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x92,0xf8,0xfb,0xff,0xff]
+          vpconflictq -1032(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq %ymm21, %ymm17
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0xc4,0xcd]
+          vpconflictq %ymm21, %ymm17
+
+// CHECK: vpconflictq %ymm21, %ymm17 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x2e,0xc4,0xcd]
+          vpconflictq %ymm21, %ymm17 {%k6}
+
+// CHECK: vpconflictq %ymm21, %ymm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xae,0xc4,0xcd]
+          vpconflictq %ymm21, %ymm17 {%k6} {z}
+
+// CHECK: vpconflictq (%rcx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x09]
+          vpconflictq (%rcx), %ymm17
+
+// CHECK: vpconflictq 4660(%rax,%r14,8), %ymm17
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0xc4,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictq 4660(%rax,%r14,8), %ymm17
+
+// CHECK: vpconflictq (%rcx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x09]
+          vpconflictq (%rcx){1to4}, %ymm17
+
+// CHECK: vpconflictq 4064(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x4a,0x7f]
+          vpconflictq 4064(%rdx), %ymm17
+
+// CHECK: vpconflictq 4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x8a,0x00,0x10,0x00,0x00]
+          vpconflictq 4096(%rdx), %ymm17
+
+// CHECK: vpconflictq -4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x4a,0x80]
+          vpconflictq -4096(%rdx), %ymm17
+
+// CHECK: vpconflictq -4128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+          vpconflictq -4128(%rdx), %ymm17
+
+// CHECK: vpconflictq 1016(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x4a,0x7f]
+          vpconflictq 1016(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictq 1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x8a,0x00,0x04,0x00,0x00]
+          vpconflictq 1024(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictq -1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x4a,0x80]
+          vpconflictq -1024(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictq -1032(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x8a,0xf8,0xfb,0xff,0xff]
+          vpconflictq -1032(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictd %xmm27, %xmm21
+// CHECK:  encoding: [0x62,0x82,0x7d,0x08,0xc4,0xeb]
+          vpconflictd %xmm27, %xmm21
+
+// CHECK: vpconflictd %xmm27, %xmm21 {%k5}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x0d,0xc4,0xeb]
+          vpconflictd %xmm27, %xmm21 {%k5}
+
+// CHECK: vpconflictd %xmm27, %xmm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x8d,0xc4,0xeb]
+          vpconflictd %xmm27, %xmm21 {%k5} {z}
+
+// CHECK: vpconflictd (%rcx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x29]
+          vpconflictd (%rcx), %xmm21
+
+// CHECK: vpconflictd 291(%rax,%r14,8), %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0xc4,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictd 291(%rax,%r14,8), %xmm21
+
+// CHECK: vpconflictd (%rcx){1to4}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x29]
+          vpconflictd (%rcx){1to4}, %xmm21
+
+// CHECK: vpconflictd 2032(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x6a,0x7f]
+          vpconflictd 2032(%rdx), %xmm21
+
+// CHECK: vpconflictd 2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0xc4,0xaa,0x00,0x08,0x00,0x00]
+          vpconflictd 2048(%rdx), %xmm21
+
+// CHECK: vpconflictd -2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x6a,0x80]
+          vpconflictd -2048(%rdx), %xmm21
+
+// CHECK: vpconflictd -2064(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0xc4,0xaa,0xf0,0xf7,0xff,0xff]
+          vpconflictd -2064(%rdx), %xmm21
+
+// CHECK: vpconflictd 508(%rdx){1to4}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x6a,0x7f]
+          vpconflictd 508(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd 512(%rdx){1to4}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0xc4,0xaa,0x00,0x02,0x00,0x00]
+          vpconflictd 512(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd -512(%rdx){1to4}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x6a,0x80]
+          vpconflictd -512(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd -516(%rdx){1to4}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0xc4,0xaa,0xfc,0xfd,0xff,0xff]
+          vpconflictd -516(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd %ymm19, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0xc4,0xcb]
+          vpconflictd %ymm19, %ymm25
+
+// CHECK: vpconflictd %ymm19, %ymm25 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2c,0xc4,0xcb]
+          vpconflictd %ymm19, %ymm25 {%k4}
+
+// CHECK: vpconflictd %ymm19, %ymm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xac,0xc4,0xcb]
+          vpconflictd %ymm19, %ymm25 {%k4} {z}
+
+// CHECK: vpconflictd (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x09]
+          vpconflictd (%rcx), %ymm25
+
+// CHECK: vpconflictd 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0xc4,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictd 291(%rax,%r14,8), %ymm25
+
+// CHECK: vpconflictd (%rcx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x09]
+          vpconflictd (%rcx){1to8}, %ymm25
+
+// CHECK: vpconflictd 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x4a,0x7f]
+          vpconflictd 4064(%rdx), %ymm25
+
+// CHECK: vpconflictd 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x8a,0x00,0x10,0x00,0x00]
+          vpconflictd 4096(%rdx), %ymm25
+
+// CHECK: vpconflictd -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x4a,0x80]
+          vpconflictd -4096(%rdx), %ymm25
+
+// CHECK: vpconflictd -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+          vpconflictd -4128(%rdx), %ymm25
+
+// CHECK: vpconflictd 508(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x4a,0x7f]
+          vpconflictd 508(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd 512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x8a,0x00,0x02,0x00,0x00]
+          vpconflictd 512(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd -512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x4a,0x80]
+          vpconflictd -512(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd -516(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x8a,0xfc,0xfd,0xff,0xff]
+          vpconflictd -516(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd %xmm28, %xmm27
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0xc4,0xdc]
+          vpconflictd %xmm28, %xmm27
+
+// CHECK: vpconflictd %xmm28, %xmm27 {%k3}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0b,0xc4,0xdc]
+          vpconflictd %xmm28, %xmm27 {%k3}
+
+// CHECK: vpconflictd %xmm28, %xmm27 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8b,0xc4,0xdc]
+          vpconflictd %xmm28, %xmm27 {%k3} {z}
+
+// CHECK: vpconflictd (%rcx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0xc4,0x19]
+          vpconflictd (%rcx), %xmm27
+
+// CHECK: vpconflictd 4660(%rax,%r14,8), %xmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0xc4,0x9c,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictd 4660(%rax,%r14,8), %xmm27
+
+// CHECK: vpconflictd (%rcx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0xc4,0x19]
+          vpconflictd (%rcx){1to4}, %xmm27
+
+// CHECK: vpconflictd 2032(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0xc4,0x5a,0x7f]
+          vpconflictd 2032(%rdx), %xmm27
+
+// CHECK: vpconflictd 2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0xc4,0x9a,0x00,0x08,0x00,0x00]
+          vpconflictd 2048(%rdx), %xmm27
+
+// CHECK: vpconflictd -2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0xc4,0x5a,0x80]
+          vpconflictd -2048(%rdx), %xmm27
+
+// CHECK: vpconflictd -2064(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0xc4,0x9a,0xf0,0xf7,0xff,0xff]
+          vpconflictd -2064(%rdx), %xmm27
+
+// CHECK: vpconflictd 508(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0xc4,0x5a,0x7f]
+          vpconflictd 508(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd 512(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0xc4,0x9a,0x00,0x02,0x00,0x00]
+          vpconflictd 512(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd -512(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0xc4,0x5a,0x80]
+          vpconflictd -512(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd -516(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0xc4,0x9a,0xfc,0xfd,0xff,0xff]
+          vpconflictd -516(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd %ymm21, %ymm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0xc4,0xd5]
+          vpconflictd %ymm21, %ymm26
+
+// CHECK: vpconflictd %ymm21, %ymm26 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2c,0xc4,0xd5]
+          vpconflictd %ymm21, %ymm26 {%k4}
+
+// CHECK: vpconflictd %ymm21, %ymm26 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xac,0xc4,0xd5]
+          vpconflictd %ymm21, %ymm26 {%k4} {z}
+
+// CHECK: vpconflictd (%rcx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x11]
+          vpconflictd (%rcx), %ymm26
+
+// CHECK: vpconflictd 4660(%rax,%r14,8), %ymm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictd 4660(%rax,%r14,8), %ymm26
+
+// CHECK: vpconflictd (%rcx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x11]
+          vpconflictd (%rcx){1to8}, %ymm26
+
+// CHECK: vpconflictd 4064(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x52,0x7f]
+          vpconflictd 4064(%rdx), %ymm26
+
+// CHECK: vpconflictd 4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x92,0x00,0x10,0x00,0x00]
+          vpconflictd 4096(%rdx), %ymm26
+
+// CHECK: vpconflictd -4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x52,0x80]
+          vpconflictd -4096(%rdx), %ymm26
+
+// CHECK: vpconflictd -4128(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x92,0xe0,0xef,0xff,0xff]
+          vpconflictd -4128(%rdx), %ymm26
+
+// CHECK: vpconflictd 508(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x52,0x7f]
+          vpconflictd 508(%rdx){1to8}, %ymm26
+
+// CHECK: vpconflictd 512(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x92,0x00,0x02,0x00,0x00]
+          vpconflictd 512(%rdx){1to8}, %ymm26
+
+// CHECK: vpconflictd -512(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x52,0x80]
+          vpconflictd -512(%rdx){1to8}, %ymm26
+
+// CHECK: vpconflictd -516(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x92,0xfc,0xfd,0xff,0xff]
+          vpconflictd -516(%rdx){1to8}, %ymm26
+