[AVX512] Added VBROADCAST{SS/SD} encoding for VL subset.
authorRobert Khasanov <rob.khasanov@gmail.com>
Thu, 30 Oct 2014 14:21:47 +0000 (14:21 +0000)
committerRobert Khasanov <rob.khasanov@gmail.com>
Thu, 30 Oct 2014 14:21:47 +0000 (14:21 +0000)
Refactored through AVX512_maskable

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220908 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86InstrAVX512.td
test/MC/X86/avx512-encodings.s
test/MC/X86/x86-64-avx512f_vl.s

index 61e6bc502c17d4cab81849e6a922e2caba467dca..3ff37d4537673e60dfb6307e2766191ff8b3fde1 100644 (file)
@@ -129,6 +129,10 @@ def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
                                              v4i32x_info>;
 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
                                              v2i64x_info>;
+def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
+                                             v4f32x_info>;
+def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
+                                             v2f64x_info>;
 
 // This multiclass generates the masking variants from the non-masking
 // variant.  It only provides the assembly pieces for the masking variants.
@@ -573,36 +577,57 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
 //===---------------------------------------------------------------------===//
 // AVX-512 BROADCAST
 //---
-multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr, 
-                         RegisterClass DestRC,
-                         RegisterClass SrcRC, X86MemOperand x86memop> {
-  def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
-         !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
-         []>, EVEX;
-  def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
-        !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
+multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+                              ValueType svt, X86VectorVTInfo _> {
+  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                   (ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
+                   "$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
+                   T8PD, EVEX;
+
+  let mayLoad = 1 in {
+    defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                     (ins _.ScalarMemOp:$src),
+                     "vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
+                     (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
+                     T8PD, EVEX;
+  }
 }
+
+multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
+                                  AVX512VLVectorVTInfo _> {
+  defm Z  : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
+                             EVEX_V512;
+
+  let Predicates = [HasVLX] in {
+    defm Z256  : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
+                                  EVEX_V256;
+  }
+}
+
 let ExeDomain = SSEPackedSingle in {
-  defm VBROADCASTSSZ  : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
-                                       VR128X, f32mem>,
-                                       EVEX_V512, EVEX_CD8<32, CD8VT1>;
+  defm VBROADCASTSS  : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
+                              avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
+   let Predicates = [HasVLX] in {
+     defm VBROADCASTSSZ128  : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
+                                     v4f32, v4f32x_info>, EVEX_V128,
+                                     EVEX_CD8<32, CD8VT1>;
+   }
 }
 
 let ExeDomain = SSEPackedDouble in {
-  defm VBROADCASTSDZ  : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
-                                       VR128X, f64mem>,
-                                       EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+  defm VBROADCASTSD  : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
+                              avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
 }
 
 def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
-          (VBROADCASTSSZrm addr:$src)>;
+          (VBROADCASTSSZm addr:$src)>;
 def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
-          (VBROADCASTSDZrm addr:$src)>;
+          (VBROADCASTSDZm addr:$src)>;
 
 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
-          (VBROADCASTSSZrm addr:$src)>;
+          (VBROADCASTSSZm addr:$src)>;
 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
-          (VBROADCASTSDZrm addr:$src)>;
+          (VBROADCASTSDZm addr:$src)>;
 
 multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
                           RegisterClass SrcRC, RegisterClass KRC> {
@@ -711,14 +736,14 @@ def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
           (VPBROADCASTQZrr VR128X:$src)>;
 
 def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
-          (VBROADCASTSSZrr VR128X:$src)>;
+          (VBROADCASTSSZr VR128X:$src)>;
 def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
-          (VBROADCASTSDZrr VR128X:$src)>;
+          (VBROADCASTSDZr VR128X:$src)>;
 
 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
-          (VBROADCASTSSZrr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
+          (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
-          (VBROADCASTSDZrr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
+          (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
 
 def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
           (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
@@ -726,16 +751,16 @@ def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
           (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
 
 def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
-          (VBROADCASTSSZrr VR128X:$src)>;
+          (VBROADCASTSSZr VR128X:$src)>;
 def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
-          (VBROADCASTSDZrr VR128X:$src)>;
+          (VBROADCASTSDZr VR128X:$src)>;
     
 // Provide fallback in case the load node that is used in the patterns above
 // is used by additional users, which prevents the pattern selection.
 def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
-          (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
+          (VBROADCASTSSZr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
 def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
-          (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+          (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
 
 
 let Predicates = [HasAVX512] in {
index 231226f347ea9b5c8acc06660c0663a4a314de6e..c734da8fddf08faf28a85aafad345f1be3553aee 100644 (file)
 // CHECK:  encoding: [0x62,0xe1,0x14,0x58,0x58,0x92,0xfc,0xfd,0xff,0xff]
           vaddps -516(%rdx){1to16}, %zmm13, %zmm18
 
+// CHECK: vbroadcastsd (%rcx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x19,0x31]
+          vbroadcastsd (%rcx), %zmm30
+
+// CHECK: vbroadcastsd (%rcx), %zmm30 {%k4}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x4c,0x19,0x31]
+          vbroadcastsd (%rcx), %zmm30 {%k4}
+
+// CHECK: vbroadcastsd (%rcx), %zmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x62,0xfd,0xcc,0x19,0x31]
+          vbroadcastsd (%rcx), %zmm30 {%k4} {z}
+
+// CHECK: vbroadcastsd 291(%rax,%r14,8), %zmm30
+// CHECK:  encoding: [0x62,0x22,0xfd,0x48,0x19,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastsd 291(%rax,%r14,8), %zmm30
+
+// CHECK: vbroadcastsd 1016(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x19,0x72,0x7f]
+          vbroadcastsd 1016(%rdx), %zmm30
+
+// CHECK: vbroadcastsd 1024(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x19,0xb2,0x00,0x04,0x00,0x00]
+          vbroadcastsd 1024(%rdx), %zmm30
+
+// CHECK: vbroadcastsd -1024(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x19,0x72,0x80]
+          vbroadcastsd -1024(%rdx), %zmm30
+
+// CHECK: vbroadcastsd -1032(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x19,0xb2,0xf8,0xfb,0xff,0xff]
+          vbroadcastsd -1032(%rdx), %zmm30
+
+// CHECK: vbroadcastsd %xmm22, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x48,0x19,0xee]
+          vbroadcastsd %xmm22, %zmm21
+
+// CHECK: vbroadcastsd %xmm22, %zmm21 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x4f,0x19,0xee]
+          vbroadcastsd %xmm22, %zmm21 {%k7}
+
+// CHECK: vbroadcastsd %xmm22, %zmm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xcf,0x19,0xee]
+          vbroadcastsd %xmm22, %zmm21 {%k7} {z}
+
+// CHECK: vbroadcastss (%rcx), %zmm3
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x18,0x19]
+          vbroadcastss (%rcx), %zmm3
+
+// CHECK: vbroadcastss (%rcx), %zmm3 {%k4}
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x4c,0x18,0x19]
+          vbroadcastss (%rcx), %zmm3 {%k4}
+
+// CHECK: vbroadcastss (%rcx), %zmm3 {%k4} {z}
+// CHECK:  encoding: [0x62,0xf2,0x7d,0xcc,0x18,0x19]
+          vbroadcastss (%rcx), %zmm3 {%k4} {z}
+
+// CHECK: vbroadcastss 291(%rax,%r14,8), %zmm3
+// CHECK:  encoding: [0x62,0xb2,0x7d,0x48,0x18,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastss 291(%rax,%r14,8), %zmm3
+
+// CHECK: vbroadcastss 508(%rdx), %zmm3
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x18,0x5a,0x7f]
+          vbroadcastss 508(%rdx), %zmm3
+
+// CHECK: vbroadcastss 512(%rdx), %zmm3
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x18,0x9a,0x00,0x02,0x00,0x00]
+          vbroadcastss 512(%rdx), %zmm3
+
+// CHECK: vbroadcastss -512(%rdx), %zmm3
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x18,0x5a,0x80]
+          vbroadcastss -512(%rdx), %zmm3
+
+// CHECK: vbroadcastss -516(%rdx), %zmm3
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x18,0x9a,0xfc,0xfd,0xff,0xff]
+          vbroadcastss -516(%rdx), %zmm3
+
+// CHECK: vbroadcastss %xmm18, %zmm18
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x18,0xd2]
+          vbroadcastss %xmm18, %zmm18
+
+// CHECK: vbroadcastss %xmm18, %zmm18 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x4a,0x18,0xd2]
+          vbroadcastss %xmm18, %zmm18 {%k2}
+
+// CHECK: vbroadcastss %xmm18, %zmm18 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0xca,0x18,0xd2]
+          vbroadcastss %xmm18, %zmm18 {%k2} {z}
+
 // CHECK: vdivpd %zmm11, %zmm6, %zmm18
 // CHECK:  encoding: [0x62,0xc1,0xcd,0x48,0x5e,0xd3]
           vdivpd %zmm11, %zmm6, %zmm18
index a0ba3b365b218e5979f2cf0e56709733b457b649..973a553a8abb37bfa1f635185b4acf286f1cd143 100644 (file)
 // CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x58,0x8a,0xfc,0xfd,0xff,0xff]
           vaddps -516(%rdx){1to8}, %ymm26, %ymm25
 
+// CHECK: vbroadcastsd (%rcx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x19,0x31]
+          vbroadcastsd (%rcx), %ymm22
+
+// CHECK: vbroadcastsd (%rcx), %ymm22 {%k5}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x2d,0x19,0x31]
+          vbroadcastsd (%rcx), %ymm22 {%k5}
+
+// CHECK: vbroadcastsd (%rcx), %ymm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0xad,0x19,0x31]
+          vbroadcastsd (%rcx), %ymm22 {%k5} {z}
+
+// CHECK: vbroadcastsd 291(%rax,%r14,8), %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x19,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastsd 291(%rax,%r14,8), %ymm22
+
+// CHECK: vbroadcastsd 1016(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x19,0x72,0x7f]
+          vbroadcastsd 1016(%rdx), %ymm22
+
+// CHECK: vbroadcastsd 1024(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x19,0xb2,0x00,0x04,0x00,0x00]
+          vbroadcastsd 1024(%rdx), %ymm22
+
+// CHECK: vbroadcastsd -1024(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x19,0x72,0x80]
+          vbroadcastsd -1024(%rdx), %ymm22
+
+// CHECK: vbroadcastsd -1032(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x19,0xb2,0xf8,0xfb,0xff,0xff]
+          vbroadcastsd -1032(%rdx), %ymm22
+
+// CHECK: vbroadcastsd %xmm17, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x19,0xd9]
+          vbroadcastsd %xmm17, %ymm19
+
+// CHECK: vbroadcastsd %xmm17, %ymm19 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x2e,0x19,0xd9]
+          vbroadcastsd %xmm17, %ymm19 {%k6}
+
+// CHECK: vbroadcastsd %xmm17, %ymm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xae,0x19,0xd9]
+          vbroadcastsd %xmm17, %ymm19 {%k6} {z}
+
+// CHECK: vbroadcastss (%rcx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x18,0x29]
+          vbroadcastss (%rcx), %xmm21
+
+// CHECK: vbroadcastss (%rcx), %xmm21 {%k2}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0a,0x18,0x29]
+          vbroadcastss (%rcx), %xmm21 {%k2}
+
+// CHECK: vbroadcastss (%rcx), %xmm21 {%k2} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x8a,0x18,0x29]
+          vbroadcastss (%rcx), %xmm21 {%k2} {z}
+
+// CHECK: vbroadcastss 291(%rax,%r14,8), %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x18,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastss 291(%rax,%r14,8), %xmm21
+
+// CHECK: vbroadcastss 508(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x18,0x6a,0x7f]
+          vbroadcastss 508(%rdx), %xmm21
+
+// CHECK: vbroadcastss 512(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x18,0xaa,0x00,0x02,0x00,0x00]
+          vbroadcastss 512(%rdx), %xmm21
+
+// CHECK: vbroadcastss -512(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x18,0x6a,0x80]
+          vbroadcastss -512(%rdx), %xmm21
+
+// CHECK: vbroadcastss -516(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x18,0xaa,0xfc,0xfd,0xff,0xff]
+          vbroadcastss -516(%rdx), %xmm21
+
+// CHECK: vbroadcastss (%rcx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x18,0x31]
+          vbroadcastss (%rcx), %ymm30
+
+// CHECK: vbroadcastss (%rcx), %ymm30 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x29,0x18,0x31]
+          vbroadcastss (%rcx), %ymm30 {%k1}
+
+// CHECK: vbroadcastss (%rcx), %ymm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xa9,0x18,0x31]
+          vbroadcastss (%rcx), %ymm30 {%k1} {z}
+
+// CHECK: vbroadcastss 291(%rax,%r14,8), %ymm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x18,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastss 291(%rax,%r14,8), %ymm30
+
+// CHECK: vbroadcastss 508(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x18,0x72,0x7f]
+          vbroadcastss 508(%rdx), %ymm30
+
+// CHECK: vbroadcastss 512(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x18,0xb2,0x00,0x02,0x00,0x00]
+          vbroadcastss 512(%rdx), %ymm30
+
+// CHECK: vbroadcastss -512(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x18,0x72,0x80]
+          vbroadcastss -512(%rdx), %ymm30
+
+// CHECK: vbroadcastss -516(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x18,0xb2,0xfc,0xfd,0xff,0xff]
+          vbroadcastss -516(%rdx), %ymm30
+
+// CHECK: vbroadcastss %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x18,0xc0]
+          vbroadcastss %xmm24, %xmm24
+
+// CHECK: vbroadcastss %xmm24, %xmm24 {%k2}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0a,0x18,0xc0]
+          vbroadcastss %xmm24, %xmm24 {%k2}
+
+// CHECK: vbroadcastss %xmm24, %xmm24 {%k2} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8a,0x18,0xc0]
+          vbroadcastss %xmm24, %xmm24 {%k2} {z}
+
+// CHECK: vbroadcastss %xmm28, %ymm24
+// CHECK:  encoding: [0x62,0x02,0x7d,0x28,0x18,0xc4]
+          vbroadcastss %xmm28, %ymm24
+
+// CHECK: vbroadcastss %xmm28, %ymm24 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x2e,0x18,0xc4]
+          vbroadcastss %xmm28, %ymm24 {%k6}
+
+// CHECK: vbroadcastss %xmm28, %ymm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xae,0x18,0xc4]
+          vbroadcastss %xmm28, %ymm24 {%k6} {z}
+
 // CHECK: vdivpd %xmm27, %xmm18, %xmm19
 // CHECK:  encoding: [0x62,0x81,0xed,0x00,0x5e,0xdb]
           vdivpd %xmm27, %xmm18, %xmm19