AVX512: Implemented encoding and intrinsics for VPBROADCASTB/W/D/Q instructions.
authorIgor Breger <igor.breger@intel.com>
Tue, 20 Oct 2015 11:56:42 +0000 (11:56 +0000)
committerIgor Breger <igor.breger@intel.com>
Tue, 20 Oct 2015 11:56:42 +0000 (11:56 +0000)
Differential Revision: http://reviews.llvm.org/D13884

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250819 91177308-0d34-0410-b5e6-96231b3b80d8

12 files changed:
include/llvm/IR/IntrinsicsX86.td
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrSSE.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/avx-isa-check.ll
test/CodeGen/X86/avx512-intrinsics.ll
test/CodeGen/X86/avx512bwvl-intrinsics.ll
test/CodeGen/X86/avx512vl-intrinsics.ll
test/MC/X86/avx512-encodings.s
test/MC/X86/x86-64-avx512bw.s
test/MC/X86/x86-64-avx512bw_vl.s
test/MC/X86/x86-64-avx512f_vl.s

index 2bdca6d3b4ff24607ab8d6cc26cddaf9911326a9..d9c95f25ba55ae0e9dd0c4d094405a4374d37816 100644 (file)
@@ -4825,15 +4825,58 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">,
               Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_pbroadcastb_128 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastb_256 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastb256_mask">,
+          Intrinsic<[llvm_v32i8_ty],
+                    [llvm_v16i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastb_512 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastb512_mask">,
+          Intrinsic<[llvm_v64i8_ty],
+                    [llvm_v16i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastw_128 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastw_256 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastw256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+                    [llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastw_512 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastw512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+                    [llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastd_128 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastd128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastd_256 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastd256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_pbroadcastd_512 :
-         GCCBuiltin<"__builtin_ia32_pbroadcastd512">,
-         Intrinsic<[llvm_v16i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+          GCCBuiltin<"__builtin_ia32_pbroadcastd512">,
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastq_128 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastq_256 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+                    [llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastq_512 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastq512">,
+          Intrinsic<[llvm_v8i64_ty],
+                    [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
   def int_x86_avx512_pbroadcastd_i32_512 :
          Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_pbroadcastq_512 :
-         GCCBuiltin<"__builtin_ia32_pbroadcastq512">,
-         Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
   def int_x86_avx512_pbroadcastq_i64_512 :
          Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>;
 }
index a5a904873e531f85ab95d27e687d5e198deb39ed..9f57958b2da65441f36540c1d2cdc7e334c81fd7 100644 (file)
@@ -807,46 +807,45 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
 //===---------------------------------------------------------------------===//
 // AVX-512 BROADCAST
 //---
-multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
-                              ValueType svt, X86VectorVTInfo _> {
-  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                   (ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
-                   "$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
-                   T8PD, EVEX;
 
-  let mayLoad = 1 in {
-    defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                     (ins _.ScalarMemOp:$src),
-                     "vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
-                     (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
-                     T8PD, EVEX;
-  }
+multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
+                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
+
+  defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+                   (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
+                   (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
+                   T8PD, EVEX;
+  let mayLoad = 1 in
+    defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+                     (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+                     (DestInfo.VT (X86VBroadcast
+                                     (SrcInfo.ScalarLdFrag addr:$src)))>,
+                     T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
 }
 
-multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
-                                  AVX512VLVectorVTInfo _> {
-  defm Z  : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
+multiclass avx512_fp_broadcast_vl<bits<8> opc, string OpcodeStr,
+                                                       AVX512VLVectorVTInfo _> {
+  defm Z  : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
                              EVEX_V512;
 
   let Predicates = [HasVLX] in {
-    defm Z256  : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
-                                  EVEX_V256;
+    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+                             EVEX_V256;
   }
 }
 
 let ExeDomain = SSEPackedSingle in {
-  defm VBROADCASTSS  : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
-                              avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
+  defm VBROADCASTSS  : avx512_fp_broadcast_vl<0x18, "vbroadcastss",
+                                         avx512vl_f32_info>;
    let Predicates = [HasVLX] in {
-     defm VBROADCASTSSZ128  : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
-                                     v4f32, v4f32x_info>, EVEX_V128,
-                                     EVEX_CD8<32, CD8VT1>;
+     defm VBROADCASTSSZ128  : avx512_broadcast_rm<0x18, "vbroadcastss",
+                                         v4f32x_info, v4f32x_info>, EVEX_V128;
    }
 }
 
 let ExeDomain = SSEPackedDouble in {
-  defm VBROADCASTSD  : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
-                              avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
+  defm VBROADCASTSD  : avx512_fp_broadcast_vl<0x19, "vbroadcastsd",
+                                         avx512vl_f64_info>, VEX_W;
 }
 
 // avx512_broadcast_pat introduces patterns for broadcast with a scalar argument.
@@ -947,50 +946,41 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
                    (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
           (VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
 
-multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
-                          X86MemOperand x86memop, PatFrag ld_frag,
-                          RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
-                          RegisterClass KRC> {
-  def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  [(set DstRC:$dst,
-                    (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
-                                                         VR128X:$src),
-                    !strconcat(OpcodeStr,
-                    "\t{$src, ${dst} {${mask}} |${dst} {${mask}}, $src}"),
-                    []>, EVEX, EVEX_K;
-  def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
-                                                         VR128X:$src),
-                    !strconcat(OpcodeStr,
-                    "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-                    []>, EVEX, EVEX_KZ;
-  let mayLoad = 1 in {
-  def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  [(set DstRC:$dst,
-                    (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
-  def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
-                                                         x86memop:$src),
-                  !strconcat(OpcodeStr,
-                      "\t{$src, ${dst} {${mask}}|${dst} {${mask}} , $src}"),
-                  []>, EVEX, EVEX_K;
-  def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
-                                                         x86memop:$src),
-                  !strconcat(OpcodeStr,
-                      "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-                  [(set DstRC:$dst, (OpVT (vselect KRC:$mask,
-                             (X86VBroadcast (ld_frag addr:$src)),
-                             (OpVT (bitconvert (v16i32 immAllZerosV))))))]>, EVEX, EVEX_KZ;
+// Provide aliases for broadcast from the same register class that
+// automatically does the extract.
+multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
+                                            X86VectorVTInfo SrcInfo> {
+  def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
+            (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
+                (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
+}
+
+multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
+                                        AVX512VLVectorVTInfo _, Predicate prd> {
+  let Predicates = [prd] in {
+    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
+               avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
+                                  EVEX_V512;
+    // Defined separately to avoid redefinition.
+    defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
+  }
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+                avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
+                                 EVEX_V256;
+    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
+                                 EVEX_V128;
   }
 }
 
-defm VPBROADCASTDZ  : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
-                      loadi32, VR512, v16i32, v4i32, VK16WM>,
-                      EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VPBROADCASTQZ  : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
-                      loadi64, VR512, v8i64, v2i64, VK8WM>,  EVEX_V512, VEX_W,
-                      EVEX_CD8<64, CD8VT1>;
+defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
+                                           avx512vl_i8_info, HasBWI>;
+defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
+                                           avx512vl_i16_info, HasBWI>;
+defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
+                                           avx512vl_i32_info, HasAVX512>;
+defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
+                                           avx512vl_i64_info, HasAVX512>, VEX_W;
 
 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
@@ -1057,11 +1047,6 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
 }
 
-def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
-          (VPBROADCASTDZrr VR128X:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
-          (VPBROADCASTQZrr VR128X:$src)>;
-
 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
           (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
@@ -1072,16 +1057,6 @@ def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
           (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
 
-def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
-          (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
-def : Pat<(v16i32 (X86VBroadcast (v8i32 VR256X:$src))),
-          (VPBROADCASTDZrr (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm))>;
-
-def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
-          (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
-def : Pat<(v8i64 (X86VBroadcast (v4i64 VR256X:$src))),
-          (VPBROADCASTQZrr (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm))>;
-
 def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
           (VBROADCASTSSZr VR128X:$src)>;
 def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
index dac9e35f4b8bfce8c42d3f98398b4bdb56627a0d..17edb500d66f9be7ac590945730bc133f3672772 100644 (file)
@@ -8318,29 +8318,45 @@ defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
 //
 multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
                           X86MemOperand x86memop, PatFrag ld_frag,
-                          ValueType OpVT128, ValueType OpVT256> {
-  def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                          ValueType OpVT128, ValueType OpVT256, Predicate prd> {
+  let Predicates = [HasAVX2, prd] in {
+    def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  [(set VR128:$dst, (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
+                  [(set VR128:$dst,
+                   (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
                   Sched<[WriteShuffle]>, VEX;
-  def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+    def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  [(set VR128:$dst, (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
+                  [(set VR128:$dst,
+                   (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
                   Sched<[WriteLoad]>, VEX;
-  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+    def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR256:$dst, (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
+                   [(set VR256:$dst,
+                    (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
                    Sched<[WriteShuffle256]>, VEX, VEX_L;
-  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
+    def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR256:$dst, (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
+                   [(set VR256:$dst,
+                    (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
                    Sched<[WriteLoad]>, VEX, VEX_L;
+
+    // Provide aliases for broadcast from the same register class that
+    // automatically does the extract.
+    def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))),
+              (!cast<Instruction>(NAME#"Yrr")
+                  (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>;
+  }
 }
 
-defm VPBROADCASTB  : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, v16i8, v32i8>;
-defm VPBROADCASTW  : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, v8i16, v16i16>;
-defm VPBROADCASTD  : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, v4i32, v8i32>;
-defm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, v2i64, v4i64>;
+defm VPBROADCASTB  : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
+                                    v16i8, v32i8, NoVLX_Or_NoBWI>;
+defm VPBROADCASTW  : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
+                                    v8i16, v16i16, NoVLX_Or_NoBWI>;
+defm VPBROADCASTD  : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
+                                    v4i32, v8i32, NoVLX>;
+defm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
+                                    v2i64, v4i64, NoVLX>;
 
 let Predicates = [HasAVX2] in {
   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
@@ -8352,18 +8368,6 @@ let Predicates = [HasAVX2] in {
 
   // Provide aliases for broadcast from the same register class that
   // automatically does the extract.
-  def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
-            (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
-                                                    sub_xmm)))>;
-  def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))),
-            (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src),
-                                                    sub_xmm)))>;
-  def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))),
-            (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src),
-                                                    sub_xmm)))>;
-  def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))),
-            (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src),
-                                                    sub_xmm)))>;
   def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
             (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src),
                                                     sub_xmm)))>;
index 6e5ca8de55504d2d10de88172b73fc6a6dbfbe17..5a2aa5b51a38501c7decc175808a468c3e9a3ab8 100644 (file)
@@ -1564,6 +1564,30 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
                      X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastd_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastd_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
   X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
   X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0),
   X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0),
index 071891c148ff7a454787ef40890fbe2dd109a352..02b4f37f96a2e19955cf4c0bbb8d8af26aa89d73 100644 (file)
@@ -323,3 +323,24 @@ define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
   %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
   ret <16 x i8> %r2
 }
+
+define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
+  ret <4 x i32> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
index 7b3b62b36158daea6319045f565ce32395915763..e7b51434ebd39cca6f394ad7b510f95059a0df4e 100644 (file)
@@ -365,12 +365,24 @@ define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
 }
 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
 
-define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32>  %a0) {
-  ; CHECK: vpbroadcastd
-  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
-  ret <16 x i32> %res
+define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpbroadcastd %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
+  %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
+  %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res2, %res3
+  ret <16 x i32> %res4
 }
-declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
+declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
 
 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32  %a0) {
   ; CHECK: vpbroadcastd
@@ -379,12 +391,25 @@ define <16 x i32> @test_x86_pbroadcastd_i32_512(i32  %a0) {
 }
 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
 
-define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
-  ; CHECK: vpbroadcastq
-  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
-  ret <8 x i64> %res
+define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastq %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
+  %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
+  %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
+  %res3 = add <8 x i64> %res, %res1
+  %res4 = add <8 x i64> %res2, %res3
+  ret <8 x i64> %res4
 }
-declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
+declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
 
 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
   ; CHECK: vpbroadcastq
index bb8a1f51cb03158e19bcf7f9fc6cd6db6a47ac83..733cb01e7b0c250d21404ca06d3c7e2f6f778a48 100644 (file)
@@ -4276,3 +4276,124 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8>
   ret <16 x i16> %res4
 }
 
+declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
+; CHECK-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddb %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1)
+  %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask)
+  %res2 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask)
+  %res3 = add <32 x i8> %res, %res1
+  %res4 = add <32 x i8> %res2, %res3
+  ret <32 x i8> %res4
+}
+
+declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
+  %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask)
+  %res2 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask)
+  %res3 = add <16 x i8> %res, %res1
+  %res4 = add <16 x i8> %res2, %res3
+  ret <16 x i8> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1)
+  %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask)
+  %res2 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask)
+  %res3 = add <16 x i16> %res, %res1
+  %res4 = add <16 x i16> %res2, %res3
+  ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
+  %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask)
+  %res2 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask)
+  %res3 = add <8 x i16> %res, %res1
+  %res4 = add <8 x i16> %res2, %res3
+  ret <8 x i16> %res4
+}
+
+declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovq %rdi, %k1 ## encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vpbroadcastb %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
+; CHECK-NEXT:    vpbroadcastb %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xd0]
+; CHECK-NEXT:    vpbroadcastb %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0]
+; CHECK-NEXT:    vpaddb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
+; CHECK-NEXT:    vpaddb %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1)
+  %res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask)
+  %res2 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask)
+  %res3 = add <64 x i8> %res, %res1
+  %res4 = add <64 x i8> %res2, %res3
+  ret <64 x i8> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vpbroadcastw %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
+; CHECK-NEXT:    vpbroadcastw %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xd0]
+; CHECK-NEXT:    vpbroadcastw %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0]
+; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
+; CHECK-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1)
+  %res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask)
+  %res2 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask)
+  %res3 = add <32 x i16> %res, %res1
+  %res4 = add <32 x i16> %res2, %res3
+  ret <32 x i16> %res4
+}
+
index a600057c9094428f8fdb33cc265cc2489c3e808b..96c860cf5d22bd52275258ac6510c1afa3926fc9 100644 (file)
@@ -5184,3 +5184,88 @@ define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i6
   %res2 = add <4 x i64> %res, %res1
   ret <4 x i64> %res2
 }
+
+declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 -1)
+  %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
+  %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res2, %res3
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+  %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
+  %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
+  %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
+  %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
+  %res3 = add <4 x i64> %res, %res1
+  %res4 = add <4 x i64> %res2, %res3
+  ret <4 x i64> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
+  %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
+  %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
+  %res3 = add <2 x i64> %res, %res1
+  %res4 = add <2 x i64> %res2, %res3
+  ret <2 x i64> %res4
+}
+
index 2a9bf16d37864e33652aaefb6173e8d3f6e434ce..d39b16ffa32afb0ebef7578f4b281808c25c7c5d 100644 (file)
@@ -17733,3 +17733,127 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK:  encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0xf8,0xfb,0xff,0xff,0x7b]
           vpternlogq $0x7b, -1032(%rdx){1to8}, %zmm2, %zmm15
 
+// CHECK: vpbroadcastd (%rcx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x58,0x11]
+          vpbroadcastd (%rcx), %zmm26
+
+// CHECK: vpbroadcastd (%rcx), %zmm26 {%k2}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x4a,0x58,0x11]
+          vpbroadcastd (%rcx), %zmm26 {%k2}
+
+// CHECK: vpbroadcastd (%rcx), %zmm26 {%k2} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xca,0x58,0x11]
+          vpbroadcastd (%rcx), %zmm26 {%k2} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %zmm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x58,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastd 291(%rax,%r14,8), %zmm26
+
+// CHECK: vpbroadcastd 508(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x58,0x52,0x7f]
+          vpbroadcastd 508(%rdx), %zmm26
+
+// CHECK: vpbroadcastd 512(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x58,0x92,0x00,0x02,0x00,0x00]
+          vpbroadcastd 512(%rdx), %zmm26
+
+// CHECK: vpbroadcastd -512(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x58,0x52,0x80]
+          vpbroadcastd -512(%rdx), %zmm26
+
+// CHECK: vpbroadcastd -516(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x58,0x92,0xfc,0xfd,0xff,0xff]
+          vpbroadcastd -516(%rdx), %zmm26
+
+// CHECK: vpbroadcastd %xmm22, %zmm10
+// CHECK:  encoding: [0x62,0x32,0x7d,0x48,0x58,0xd6]
+          vpbroadcastd %xmm22, %zmm10
+
+// CHECK: vpbroadcastd %xmm22, %zmm10 {%k7}
+// CHECK:  encoding: [0x62,0x32,0x7d,0x4f,0x58,0xd6]
+          vpbroadcastd %xmm22, %zmm10 {%k7}
+
+// CHECK: vpbroadcastd %xmm22, %zmm10 {%k7} {z}
+// CHECK:  encoding: [0x62,0x32,0x7d,0xcf,0x58,0xd6]
+          vpbroadcastd %xmm22, %zmm10 {%k7} {z}
+
+// CHECK: vpbroadcastd %eax, %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x7c,0xd8]
+          vpbroadcastd %eax, %zmm11
+
+// CHECK: vpbroadcastd %eax, %zmm11 {%k6}
+// CHECK:  encoding: [0x62,0x72,0x7d,0x4e,0x7c,0xd8]
+          vpbroadcastd %eax, %zmm11 {%k6}
+
+// CHECK: vpbroadcastd %eax, %zmm11 {%k6} {z}
+// CHECK:  encoding: [0x62,0x72,0x7d,0xce,0x7c,0xd8]
+          vpbroadcastd %eax, %zmm11 {%k6} {z}
+
+// CHECK: vpbroadcastd %ebp, %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x7c,0xdd]
+          vpbroadcastd %ebp, %zmm11
+
+// CHECK: vpbroadcastd %r13d, %zmm11
+// CHECK:  encoding: [0x62,0x52,0x7d,0x48,0x7c,0xdd]
+          vpbroadcastd %r13d, %zmm11
+
+// CHECK: vpbroadcastq (%rcx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x59,0x09]
+          vpbroadcastq (%rcx), %zmm25
+
+// CHECK: vpbroadcastq (%rcx), %zmm25 {%k2}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x4a,0x59,0x09]
+          vpbroadcastq (%rcx), %zmm25 {%k2}
+
+// CHECK: vpbroadcastq (%rcx), %zmm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x62,0xfd,0xca,0x59,0x09]
+          vpbroadcastq (%rcx), %zmm25 {%k2} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %zmm25
+// CHECK:  encoding: [0x62,0x22,0xfd,0x48,0x59,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastq 291(%rax,%r14,8), %zmm25
+
+// CHECK: vpbroadcastq 1016(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x59,0x4a,0x7f]
+          vpbroadcastq 1016(%rdx), %zmm25
+
+// CHECK: vpbroadcastq 1024(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x59,0x8a,0x00,0x04,0x00,0x00]
+          vpbroadcastq 1024(%rdx), %zmm25
+
+// CHECK: vpbroadcastq -1024(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x59,0x4a,0x80]
+          vpbroadcastq -1024(%rdx), %zmm25
+
+// CHECK: vpbroadcastq -1032(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x59,0x8a,0xf8,0xfb,0xff,0xff]
+          vpbroadcastq -1032(%rdx), %zmm25
+
+// CHECK: vpbroadcastq %xmm5, %zmm3
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x59,0xdd]
+          vpbroadcastq %xmm5, %zmm3
+
+// CHECK: vpbroadcastq %xmm5, %zmm3 {%k5}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x4d,0x59,0xdd]
+          vpbroadcastq %xmm5, %zmm3 {%k5}
+
+// CHECK: vpbroadcastq %xmm5, %zmm3 {%k5} {z}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0xcd,0x59,0xdd]
+          vpbroadcastq %xmm5, %zmm3 {%k5} {z}
+
+// CHECK: vpbroadcastq %rax, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xc8]
+          vpbroadcastq %rax, %zmm1
+
+// CHECK: vpbroadcastq %rax, %zmm1 {%k6}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x4e,0x7c,0xc8]
+          vpbroadcastq %rax, %zmm1 {%k6}
+
+// CHECK: vpbroadcastq %rax, %zmm1 {%k6} {z}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0xce,0x7c,0xc8]
+          vpbroadcastq %rax, %zmm1 {%k6} {z}
+
+// CHECK: vpbroadcastq %r8, %zmm1
+// CHECK:  encoding: [0x62,0xd2,0xfd,0x48,0x7c,0xc8]
+          vpbroadcastq %r8, %zmm1
+
index d968986f815108878078828a3c113e67785ec77c..d6be005735433021e03c42f68293407c79ca00fa 100644 (file)
 // CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x70,0x92,0xc0,0xdf,0xff,0xff,0x7b]
           vpshufhw $123, -8256(%rdx), %zmm18
 
+// CHECK: vpbroadcastb %xmm23, %zmm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x78,0xcf]
+          vpbroadcastb %xmm23, %zmm25
+
+// CHECK: vpbroadcastb %xmm23, %zmm25 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x4f,0x78,0xcf]
+          vpbroadcastb %xmm23, %zmm25 {%k7}
+
+// CHECK: vpbroadcastb %xmm23, %zmm25 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xcf,0x78,0xcf]
+          vpbroadcastb %xmm23, %zmm25 {%k7} {z}
+
+// CHECK: vpbroadcastb (%rcx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x78,0x09]
+          vpbroadcastb (%rcx), %zmm25
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %zmm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x78,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastb 291(%rax,%r14,8), %zmm25
+
+// CHECK: vpbroadcastb 127(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x78,0x4a,0x7f]
+          vpbroadcastb 127(%rdx), %zmm25
+
+// CHECK: vpbroadcastb 128(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x78,0x8a,0x80,0x00,0x00,0x00]
+          vpbroadcastb 128(%rdx), %zmm25
+
+// CHECK: vpbroadcastb -128(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x78,0x4a,0x80]
+          vpbroadcastb -128(%rdx), %zmm25
+
+// CHECK: vpbroadcastb -129(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x78,0x8a,0x7f,0xff,0xff,0xff]
+          vpbroadcastb -129(%rdx), %zmm25
+
+// CHECK: vpbroadcastb %eax, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x7a,0xd8]
+          vpbroadcastb %eax, %zmm19
+
+// CHECK: vpbroadcastb %eax, %zmm19 {%k7}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x4f,0x7a,0xd8]
+          vpbroadcastb %eax, %zmm19 {%k7}
+
+// CHECK: vpbroadcastb %eax, %zmm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xcf,0x7a,0xd8]
+          vpbroadcastb %eax, %zmm19 {%k7} {z}
+
+// CHECK: vpbroadcastw %xmm19, %zmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x79,0xf3]
+          vpbroadcastw %xmm19, %zmm30
+
+// CHECK: vpbroadcastw %xmm19, %zmm30 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x4c,0x79,0xf3]
+          vpbroadcastw %xmm19, %zmm30 {%k4}
+
+// CHECK: vpbroadcastw %xmm19, %zmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xcc,0x79,0xf3]
+          vpbroadcastw %xmm19, %zmm30 {%k4} {z}
+
+// CHECK: vpbroadcastw (%rcx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x79,0x31]
+          vpbroadcastw (%rcx), %zmm30
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %zmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x79,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastw 291(%rax,%r14,8), %zmm30
+
+// CHECK: vpbroadcastw 254(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x79,0x72,0x7f]
+          vpbroadcastw 254(%rdx), %zmm30
+
+// CHECK: vpbroadcastw 256(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x79,0xb2,0x00,0x01,0x00,0x00]
+          vpbroadcastw 256(%rdx), %zmm30
+
+// CHECK: vpbroadcastw -256(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x79,0x72,0x80]
+          vpbroadcastw -256(%rdx), %zmm30
+
+// CHECK: vpbroadcastw -258(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x79,0xb2,0xfe,0xfe,0xff,0xff]
+          vpbroadcastw -258(%rdx), %zmm30
+
+// CHECK: vpbroadcastw %eax, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x7b,0xc0]
+          vpbroadcastw %eax, %zmm24
+
+// CHECK: vpbroadcastw %eax, %zmm24 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x49,0x7b,0xc0]
+          vpbroadcastw %eax, %zmm24 {%k1}
+
+// CHECK: vpbroadcastw %eax, %zmm24 {%k1} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xc9,0x7b,0xc0]
+          vpbroadcastw %eax, %zmm24 {%k1} {z}
+
index cd57ce1d55a1ae2a6f4f87f88806023fd11deb19..75802059be748e41cfefa652cdd50d0bd4a58076 100644 (file)
 // CHECK: vpsadbw -4128(%rdx), %ymm26, %ymm20
 // CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0xe0,0xef,0xff,0xff]
           vpsadbw -4128(%rdx), %ymm26, %ymm20
+
+// CHECK: vpbroadcastb %xmm28, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x78,0xf4]
+          vpbroadcastb %xmm28, %xmm30
+
+// CHECK: vpbroadcastb %xmm28, %xmm30 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0c,0x78,0xf4]
+          vpbroadcastb %xmm28, %xmm30 {%k4}
+
+// CHECK: vpbroadcastb %xmm28, %xmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8c,0x78,0xf4]
+          vpbroadcastb %xmm28, %xmm30 {%k4} {z}
+
+// CHECK: vpbroadcastb (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x78,0x31]
+          vpbroadcastb (%rcx), %xmm30
+
+// CHECK: vpbroadcastb 4660(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x78,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vpbroadcastb 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastb 127(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x78,0x72,0x7f]
+          vpbroadcastb 127(%rdx), %xmm30
+
+// CHECK: vpbroadcastb 128(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x78,0xb2,0x80,0x00,0x00,0x00]
+          vpbroadcastb 128(%rdx), %xmm30
+
+// CHECK: vpbroadcastb -128(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x78,0x72,0x80]
+          vpbroadcastb -128(%rdx), %xmm30
+
+// CHECK: vpbroadcastb -129(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x78,0xb2,0x7f,0xff,0xff,0xff]
+          vpbroadcastb -129(%rdx), %xmm30
+
+// CHECK: vpbroadcastb %xmm25, %ymm17
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x78,0xc9]
+          vpbroadcastb %xmm25, %ymm17
+
+// CHECK: vpbroadcastb %xmm25, %ymm17 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x2a,0x78,0xc9]
+          vpbroadcastb %xmm25, %ymm17 {%k2}
+
+// CHECK: vpbroadcastb %xmm25, %ymm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xaa,0x78,0xc9]
+          vpbroadcastb %xmm25, %ymm17 {%k2} {z}
+
+// CHECK: vpbroadcastb (%rcx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x78,0x09]
+          vpbroadcastb (%rcx), %ymm17
+
+// CHECK: vpbroadcastb 4660(%rax,%r14,8), %ymm17
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x78,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vpbroadcastb 4660(%rax,%r14,8), %ymm17
+
+// CHECK: vpbroadcastb 127(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x78,0x4a,0x7f]
+          vpbroadcastb 127(%rdx), %ymm17
+
+// CHECK: vpbroadcastb 128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x78,0x8a,0x80,0x00,0x00,0x00]
+          vpbroadcastb 128(%rdx), %ymm17
+
+// CHECK: vpbroadcastb -128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x78,0x4a,0x80]
+          vpbroadcastb -128(%rdx), %ymm17
+
+// CHECK: vpbroadcastb -129(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x78,0x8a,0x7f,0xff,0xff,0xff]
+          vpbroadcastb -129(%rdx), %ymm17
+
+// CHECK: vpbroadcastb %eax, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xe0]
+          vpbroadcastb %eax, %xmm20
+
+// CHECK: vpbroadcastb %eax, %xmm20 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xe0]
+          vpbroadcastb %eax, %xmm20 {%k3}
+
+// CHECK: vpbroadcastb %eax, %xmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xe0]
+          vpbroadcastb %eax, %xmm20 {%k3} {z}
+
+// CHECK: vpbroadcastb %eax, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7a,0xd8]
+          vpbroadcastb %eax, %ymm27
+
+// CHECK: vpbroadcastb %eax, %ymm27 {%k6}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2e,0x7a,0xd8]
+          vpbroadcastb %eax, %ymm27 {%k6}
+
+// CHECK: vpbroadcastb %eax, %ymm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xae,0x7a,0xd8]
+          vpbroadcastb %eax, %ymm27 {%k6} {z}
+
+// CHECK: vpbroadcastw %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x79,0xf0]
+          vpbroadcastw %xmm24, %xmm30
+
+// CHECK: vpbroadcastw %xmm24, %xmm30 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x09,0x79,0xf0]
+          vpbroadcastw %xmm24, %xmm30 {%k1}
+
+// CHECK: vpbroadcastw %xmm24, %xmm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x89,0x79,0xf0]
+          vpbroadcastw %xmm24, %xmm30 {%k1} {z}
+
+// CHECK: vpbroadcastw (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x79,0x31]
+          vpbroadcastw (%rcx), %xmm30
+
+// CHECK: vpbroadcastw 4660(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x79,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vpbroadcastw 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastw 254(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x79,0x72,0x7f]
+          vpbroadcastw 254(%rdx), %xmm30
+
+// CHECK: vpbroadcastw 256(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x79,0xb2,0x00,0x01,0x00,0x00]
+          vpbroadcastw 256(%rdx), %xmm30
+
+// CHECK: vpbroadcastw -256(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x79,0x72,0x80]
+          vpbroadcastw -256(%rdx), %xmm30
+
+// CHECK: vpbroadcastw -258(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x79,0xb2,0xfe,0xfe,0xff,0xff]
+          vpbroadcastw -258(%rdx), %xmm30
+
+// CHECK: vpbroadcastw %xmm18, %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x79,0xe2]
+          vpbroadcastw %xmm18, %ymm28
+
+// CHECK: vpbroadcastw %xmm18, %ymm28 {%k3}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2b,0x79,0xe2]
+          vpbroadcastw %xmm18, %ymm28 {%k3}
+
+// CHECK: vpbroadcastw %xmm18, %ymm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xab,0x79,0xe2]
+          vpbroadcastw %xmm18, %ymm28 {%k3} {z}
+
+// CHECK: vpbroadcastw (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x21]
+          vpbroadcastw (%rcx), %ymm28
+
+// CHECK: vpbroadcastw 4660(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x79,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vpbroadcastw 4660(%rax,%r14,8), %ymm28
+
+// CHECK: vpbroadcastw 254(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x62,0x7f]
+          vpbroadcastw 254(%rdx), %ymm28
+
+// CHECK: vpbroadcastw 256(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0xa2,0x00,0x01,0x00,0x00]
+          vpbroadcastw 256(%rdx), %ymm28
+
+// CHECK: vpbroadcastw -256(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x62,0x80]
+          vpbroadcastw -256(%rdx), %ymm28
+
+// CHECK: vpbroadcastw -258(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0xa2,0xfe,0xfe,0xff,0xff]
+          vpbroadcastw -258(%rdx), %ymm28
+
+// CHECK: vpbroadcastw %eax, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x7b,0xc0]
+          vpbroadcastw %eax, %xmm24
+
+// CHECK: vpbroadcastw %eax, %xmm24 {%k6}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x0e,0x7b,0xc0]
+          vpbroadcastw %eax, %xmm24 {%k6}
+
+// CHECK: vpbroadcastw %eax, %xmm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x8e,0x7b,0xc0]
+          vpbroadcastw %eax, %xmm24 {%k6} {z}
+
+// CHECK: vpbroadcastw %eax, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x7b,0xd8]
+          vpbroadcastw %eax, %ymm19
+
+// CHECK: vpbroadcastw %eax, %ymm19 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x2b,0x7b,0xd8]
+          vpbroadcastw %eax, %ymm19 {%k3}
+
+// CHECK: vpbroadcastw %eax, %ymm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xab,0x7b,0xd8]
+          vpbroadcastw %eax, %ymm19 {%k3} {z}
+
+// CHECK: vpbroadcastb %xmm20, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x78,0xe4]
+          vpbroadcastb %xmm20, %xmm20
+
+// CHECK: vpbroadcastb %xmm20, %xmm20 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x0f,0x78,0xe4]
+          vpbroadcastb %xmm20, %xmm20 {%k7}
+
+// CHECK: vpbroadcastb %xmm20, %xmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x8f,0x78,0xe4]
+          vpbroadcastb %xmm20, %xmm20 {%k7} {z}
+
+// CHECK: vpbroadcastb (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x78,0x21]
+          vpbroadcastb (%rcx), %xmm20
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x78,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastb 291(%rax,%r14,8), %xmm20
+
+// CHECK: vpbroadcastb 127(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x78,0x62,0x7f]
+          vpbroadcastb 127(%rdx), %xmm20
+
+// CHECK: vpbroadcastb 128(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x78,0xa2,0x80,0x00,0x00,0x00]
+          vpbroadcastb 128(%rdx), %xmm20
+
+// CHECK: vpbroadcastb -128(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x78,0x62,0x80]
+          vpbroadcastb -128(%rdx), %xmm20
+
+// CHECK: vpbroadcastb -129(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x78,0xa2,0x7f,0xff,0xff,0xff]
+          vpbroadcastb -129(%rdx), %xmm20
+
+// CHECK: vpbroadcastb %xmm27, %ymm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x28,0x78,0xf3]
+          vpbroadcastb %xmm27, %ymm30
+
+// CHECK: vpbroadcastb %xmm27, %ymm30 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x2e,0x78,0xf3]
+          vpbroadcastb %xmm27, %ymm30 {%k6}
+
+// CHECK: vpbroadcastb %xmm27, %ymm30 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xae,0x78,0xf3]
+          vpbroadcastb %xmm27, %ymm30 {%k6} {z}
+
+// CHECK: vpbroadcastb (%rcx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x78,0x31]
+          vpbroadcastb (%rcx), %ymm30
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %ymm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x78,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastb 291(%rax,%r14,8), %ymm30
+
+// CHECK: vpbroadcastb 127(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x78,0x72,0x7f]
+          vpbroadcastb 127(%rdx), %ymm30
+
+// CHECK: vpbroadcastb 128(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x78,0xb2,0x80,0x00,0x00,0x00]
+          vpbroadcastb 128(%rdx), %ymm30
+
+// CHECK: vpbroadcastb -128(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x78,0x72,0x80]
+          vpbroadcastb -128(%rdx), %ymm30
+
+// CHECK: vpbroadcastb -129(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x78,0xb2,0x7f,0xff,0xff,0xff]
+          vpbroadcastb -129(%rdx), %ymm30
+
+// CHECK: vpbroadcastb %eax, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xf0]
+          vpbroadcastb %eax, %xmm22
+
+// CHECK: vpbroadcastb %eax, %xmm22 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xf0]
+          vpbroadcastb %eax, %xmm22 {%k3}
+
+// CHECK: vpbroadcastb %eax, %xmm22 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xf0]
+          vpbroadcastb %eax, %xmm22 {%k3} {z}
+
+// CHECK: vpbroadcastb %eax, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x7a,0xc8]
+          vpbroadcastb %eax, %ymm17
+
+// CHECK: vpbroadcastb %eax, %ymm17 {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x29,0x7a,0xc8]
+          vpbroadcastb %eax, %ymm17 {%k1}
+
+// CHECK: vpbroadcastb %eax, %ymm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xa9,0x7a,0xc8]
+          vpbroadcastb %eax, %ymm17 {%k1} {z}
+
+// CHECK: vpbroadcastw %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x79,0xdc]
+          vpbroadcastw %xmm20, %xmm19
+
+// CHECK: vpbroadcastw %xmm20, %xmm19 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x0a,0x79,0xdc]
+          vpbroadcastw %xmm20, %xmm19 {%k2}
+
+// CHECK: vpbroadcastw %xmm20, %xmm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x8a,0x79,0xdc]
+          vpbroadcastw %xmm20, %xmm19 {%k2} {z}
+
+// CHECK: vpbroadcastw (%rcx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x79,0x19]
+          vpbroadcastw (%rcx), %xmm19
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x79,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastw 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpbroadcastw 254(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x79,0x5a,0x7f]
+          vpbroadcastw 254(%rdx), %xmm19
+
+// CHECK: vpbroadcastw 256(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x79,0x9a,0x00,0x01,0x00,0x00]
+          vpbroadcastw 256(%rdx), %xmm19
+
+// CHECK: vpbroadcastw -256(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x79,0x5a,0x80]
+          vpbroadcastw -256(%rdx), %xmm19
+
+// CHECK: vpbroadcastw -258(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x79,0x9a,0xfe,0xfe,0xff,0xff]
+          vpbroadcastw -258(%rdx), %xmm19
+
+// CHECK: vpbroadcastw %xmm17, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x79,0xc9]
+          vpbroadcastw %xmm17, %ymm25
+
+// CHECK: vpbroadcastw %xmm17, %ymm25 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2f,0x79,0xc9]
+          vpbroadcastw %xmm17, %ymm25 {%k7}
+
+// CHECK: vpbroadcastw %xmm17, %ymm25 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaf,0x79,0xc9]
+          vpbroadcastw %xmm17, %ymm25 {%k7} {z}
+
+// CHECK: vpbroadcastw (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x09]
+          vpbroadcastw (%rcx), %ymm25
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x79,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastw 291(%rax,%r14,8), %ymm25
+
+// CHECK: vpbroadcastw 254(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x4a,0x7f]
+          vpbroadcastw 254(%rdx), %ymm25
+
+// CHECK: vpbroadcastw 256(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x8a,0x00,0x01,0x00,0x00]
+          vpbroadcastw 256(%rdx), %ymm25
+
+// CHECK: vpbroadcastw -256(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x4a,0x80]
+          vpbroadcastw -256(%rdx), %ymm25
+
+// CHECK: vpbroadcastw -258(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x8a,0xfe,0xfe,0xff,0xff]
+          vpbroadcastw -258(%rdx), %ymm25
+
+// CHECK: vpbroadcastw %eax, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x7b,0xe8]
+          vpbroadcastw %eax, %xmm29
+
+// CHECK: vpbroadcastw %eax, %xmm29 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x09,0x7b,0xe8]
+          vpbroadcastw %eax, %xmm29 {%k1}
+
+// CHECK: vpbroadcastw %eax, %xmm29 {%k1} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x89,0x7b,0xe8]
+          vpbroadcastw %eax, %xmm29 {%k1} {z}
+
+// CHECK: vpbroadcastw %eax, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7b,0xe0]
+          vpbroadcastw %eax, %ymm28
+
+// CHECK: vpbroadcastw %eax, %ymm28 {%k4}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2c,0x7b,0xe0]
+          vpbroadcastw %eax, %ymm28 {%k4}
+
+// CHECK: vpbroadcastw %eax, %ymm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xac,0x7b,0xe0]
+          vpbroadcastw %eax, %ymm28 {%k4} {z}
+
index b6bc85bcb5de0b9cbfd64d22ba071f04602e7abf..972b8a4007acc6f3cbe83f4e954c7fc134333206 100644 (file)
@@ -21563,3 +21563,251 @@ vaddpd  {rz-sae}, %zmm2, %zmm1, %zmm1
 // CHECK:  encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0xf8,0xfb,0xff,0xff,0x7b]
           vpternlogq $0x7b, -1032(%rdx){1to4}, %ymm23, %ymm26
 
+// CHECK: vpbroadcastd (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x58,0x29]
+          vpbroadcastd (%rcx), %xmm29
+
+// CHECK: vpbroadcastd (%rcx), %xmm29 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x09,0x58,0x29]
+          vpbroadcastd (%rcx), %xmm29 {%k1}
+
+// CHECK: vpbroadcastd (%rcx), %xmm29 {%k1} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x89,0x58,0x29]
+          vpbroadcastd (%rcx), %xmm29 {%k1} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x58,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastd 291(%rax,%r14,8), %xmm29
+
+// CHECK: vpbroadcastd 508(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x58,0x6a,0x7f]
+          vpbroadcastd 508(%rdx), %xmm29
+
+// CHECK: vpbroadcastd 512(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x58,0xaa,0x00,0x02,0x00,0x00]
+          vpbroadcastd 512(%rdx), %xmm29
+
+// CHECK: vpbroadcastd -512(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x58,0x6a,0x80]
+          vpbroadcastd -512(%rdx), %xmm29
+
+// CHECK: vpbroadcastd -516(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x58,0xaa,0xfc,0xfd,0xff,0xff]
+          vpbroadcastd -516(%rdx), %xmm29
+
+// CHECK: vpbroadcastd (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x58,0x21]
+          vpbroadcastd (%rcx), %ymm28
+
+// CHECK: vpbroadcastd (%rcx), %ymm28 {%k2}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2a,0x58,0x21]
+          vpbroadcastd (%rcx), %ymm28 {%k2}
+
+// CHECK: vpbroadcastd (%rcx), %ymm28 {%k2} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xaa,0x58,0x21]
+          vpbroadcastd (%rcx), %ymm28 {%k2} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x58,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastd 291(%rax,%r14,8), %ymm28
+
+// CHECK: vpbroadcastd 508(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x58,0x62,0x7f]
+          vpbroadcastd 508(%rdx), %ymm28
+
+// CHECK: vpbroadcastd 512(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x58,0xa2,0x00,0x02,0x00,0x00]
+          vpbroadcastd 512(%rdx), %ymm28
+
+// CHECK: vpbroadcastd -512(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x58,0x62,0x80]
+          vpbroadcastd -512(%rdx), %ymm28
+
+// CHECK: vpbroadcastd -516(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x58,0xa2,0xfc,0xfd,0xff,0xff]
+          vpbroadcastd -516(%rdx), %ymm28
+
+// CHECK: vpbroadcastd %xmm18, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x58,0xea]
+          vpbroadcastd %xmm18, %xmm29
+
+// CHECK: vpbroadcastd %xmm18, %xmm29 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0a,0x58,0xea]
+          vpbroadcastd %xmm18, %xmm29 {%k2}
+
+// CHECK: vpbroadcastd %xmm18, %xmm29 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8a,0x58,0xea]
+          vpbroadcastd %xmm18, %xmm29 {%k2} {z}
+
+// CHECK: vpbroadcastd %xmm26, %ymm17
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x58,0xca]
+          vpbroadcastd %xmm26, %ymm17
+
+// CHECK: vpbroadcastd %xmm26, %ymm17 {%k3}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x2b,0x58,0xca]
+          vpbroadcastd %xmm26, %ymm17 {%k3}
+
+// CHECK: vpbroadcastd %xmm26, %ymm17 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xab,0x58,0xca]
+          vpbroadcastd %xmm26, %ymm17 {%k3} {z}
+
+// CHECK: vpbroadcastd %eax, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf0]
+          vpbroadcastd %eax, %xmm22
+
+// CHECK: vpbroadcastd %eax, %xmm22 {%k5}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0d,0x7c,0xf0]
+          vpbroadcastd %eax, %xmm22 {%k5}
+
+// CHECK: vpbroadcastd %eax, %xmm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x8d,0x7c,0xf0]
+          vpbroadcastd %eax, %xmm22 {%k5} {z}
+
+// CHECK: vpbroadcastd %ebp, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf5]
+          vpbroadcastd %ebp, %xmm22
+
+// CHECK: vpbroadcastd %r13d, %xmm22
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x08,0x7c,0xf5]
+          vpbroadcastd %r13d, %xmm22
+
+// CHECK: vpbroadcastd %eax, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7c,0xc8]
+          vpbroadcastd %eax, %ymm25
+
+// CHECK: vpbroadcastd %eax, %ymm25 {%k5}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2d,0x7c,0xc8]
+          vpbroadcastd %eax, %ymm25 {%k5}
+
+// CHECK: vpbroadcastd %eax, %ymm25 {%k5} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xad,0x7c,0xc8]
+          vpbroadcastd %eax, %ymm25 {%k5} {z}
+
+// CHECK: vpbroadcastd %ebp, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7c,0xcd]
+          vpbroadcastd %ebp, %ymm25
+
+// CHECK: vpbroadcastd %r13d, %ymm25
+// CHECK:  encoding: [0x62,0x42,0x7d,0x28,0x7c,0xcd]
+          vpbroadcastd %r13d, %ymm25
+
+// CHECK: vpbroadcastq (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x59,0x31]
+          vpbroadcastq (%rcx), %xmm30
+
+// CHECK: vpbroadcastq (%rcx), %xmm30 {%k7}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x0f,0x59,0x31]
+          vpbroadcastq (%rcx), %xmm30 {%k7}
+
+// CHECK: vpbroadcastq (%rcx), %xmm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x8f,0x59,0x31]
+          vpbroadcastq (%rcx), %xmm30 {%k7} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0xfd,0x08,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastq 291(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastq 1016(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x59,0x72,0x7f]
+          vpbroadcastq 1016(%rdx), %xmm30
+
+// CHECK: vpbroadcastq 1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x59,0xb2,0x00,0x04,0x00,0x00]
+          vpbroadcastq 1024(%rdx), %xmm30
+
+// CHECK: vpbroadcastq -1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x59,0x72,0x80]
+          vpbroadcastq -1024(%rdx), %xmm30
+
+// CHECK: vpbroadcastq -1032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+          vpbroadcastq -1032(%rdx), %xmm30
+
+// CHECK: vpbroadcastq (%rcx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x59,0x19]
+          vpbroadcastq (%rcx), %ymm19
+
+// CHECK: vpbroadcastq (%rcx), %ymm19 {%k7}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x2f,0x59,0x19]
+          vpbroadcastq (%rcx), %ymm19 {%k7}
+
+// CHECK: vpbroadcastq (%rcx), %ymm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0xaf,0x59,0x19]
+          vpbroadcastq (%rcx), %ymm19 {%k7} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %ymm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x59,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastq 291(%rax,%r14,8), %ymm19
+
+// CHECK: vpbroadcastq 1016(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x59,0x5a,0x7f]
+          vpbroadcastq 1016(%rdx), %ymm19
+
+// CHECK: vpbroadcastq 1024(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x59,0x9a,0x00,0x04,0x00,0x00]
+          vpbroadcastq 1024(%rdx), %ymm19
+
+// CHECK: vpbroadcastq -1024(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x59,0x5a,0x80]
+          vpbroadcastq -1024(%rdx), %ymm19
+
+// CHECK: vpbroadcastq -1032(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x59,0x9a,0xf8,0xfb,0xff,0xff]
+          vpbroadcastq -1032(%rdx), %ymm19
+
+// CHECK: vpbroadcastq %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0x82,0xfd,0x08,0x59,0xd8]
+          vpbroadcastq %xmm24, %xmm19
+
+// CHECK: vpbroadcastq %xmm24, %xmm19 {%k6}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x0e,0x59,0xd8]
+          vpbroadcastq %xmm24, %xmm19 {%k6}
+
+// CHECK: vpbroadcastq %xmm24, %xmm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x8e,0x59,0xd8]
+          vpbroadcastq %xmm24, %xmm19 {%k6} {z}
+
+// CHECK: vpbroadcastq %xmm26, %ymm19
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0x59,0xda]
+          vpbroadcastq %xmm26, %ymm19
+
+// CHECK: vpbroadcastq %xmm26, %ymm19 {%k6}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2e,0x59,0xda]
+          vpbroadcastq %xmm26, %ymm19 {%k6}
+
+// CHECK: vpbroadcastq %xmm26, %ymm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xae,0x59,0xda]
+          vpbroadcastq %xmm26, %ymm19 {%k6} {z}
+
+// CHECK: vpbroadcastq %rax, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x7c,0xf0]
+          vpbroadcastq %rax, %xmm22
+
+// CHECK: vpbroadcastq %rax, %xmm22 {%k2}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x0a,0x7c,0xf0]
+          vpbroadcastq %rax, %xmm22 {%k2}
+
+// CHECK: vpbroadcastq %rax, %xmm22 {%k2} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x8a,0x7c,0xf0]
+          vpbroadcastq %rax, %xmm22 {%k2} {z}
+
+// CHECK: vpbroadcastq %r8, %xmm22
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x08,0x7c,0xf0]
+          vpbroadcastq %r8, %xmm22
+
+// CHECK: vpbroadcastq %rax, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x7c,0xd8]
+          vpbroadcastq %rax, %ymm19
+
+// CHECK: vpbroadcastq %rax, %ymm19 {%k5}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x2d,0x7c,0xd8]
+          vpbroadcastq %rax, %ymm19 {%k5}
+
+// CHECK: vpbroadcastq %rax, %ymm19 {%k5} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0xad,0x7c,0xd8]
+          vpbroadcastq %rax, %ymm19 {%k5} {z}
+
+// CHECK: vpbroadcastq %r8, %ymm19
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x28,0x7c,0xd8]
+          vpbroadcastq %r8, %ymm19
+