From: Robert Khasanov Date: Thu, 30 Oct 2014 14:21:47 +0000 (+0000) Subject: [AVX512] Added VBROADCAST{SS/SD} encoding for VL subset. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=7d18d46ef2613ff317cd47bbab5f8388d3451f08;p=oota-llvm.git [AVX512] Added VBROADCAST{SS/SD} encoding for VL subset. Refactored through AVX512_maskable git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220908 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 61e6bc502c1..3ff37d45376 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -129,6 +129,10 @@ def avx512vl_i32_info : AVX512VLVectorVTInfo; def avx512vl_i64_info : AVX512VLVectorVTInfo; +def avx512vl_f32_info : AVX512VLVectorVTInfo; +def avx512vl_f64_info : AVX512VLVectorVTInfo; // This multiclass generates the masking variants from the non-masking // variant. It only provides the assembly pieces for the masking variants. @@ -573,36 +577,57 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), //===---------------------------------------------------------------------===// // AVX-512 BROADCAST //--- -multiclass avx512_fp_broadcast opc, string OpcodeStr, - RegisterClass DestRC, - RegisterClass SrcRC, X86MemOperand x86memop> { - def rr : AVX5128I, EVEX; - def rm : AVX5128I, EVEX; +multiclass avx512_fp_broadcast opc, SDNode OpNode, RegisterClass SrcRC, + ValueType svt, X86VectorVTInfo _> { + defm r : AVX512_maskable, + T8PD, EVEX; + + let mayLoad = 1 in { + defm m : AVX512_maskable, + T8PD, EVEX; + } } + +multiclass avx512_fp_broadcast_vl opc, SDNode OpNode, + AVX512VLVectorVTInfo _> { + defm Z : avx512_fp_broadcast, + EVEX_V512; + + let Predicates = [HasVLX] in { + defm Z256 : avx512_fp_broadcast, + EVEX_V256; + } +} + let ExeDomain = SSEPackedSingle in { - defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512, - VR128X, f32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; + defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast, + avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>; + let Predicates = [HasVLX] in { + defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X, + v4f32, v4f32x_info>, EVEX_V128, + EVEX_CD8<32, CD8VT1>; + } } let ExeDomain = SSEPackedDouble in { - defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512, - VR128X, f64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; + defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast, + avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>; } def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSZrm addr:$src)>; + (VBROADCASTSSZm addr:$src)>; def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSDZrm addr:$src)>; + (VBROADCASTSDZm addr:$src)>; def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src), - (VBROADCASTSSZrm addr:$src)>; + (VBROADCASTSSZm addr:$src)>; def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src), - (VBROADCASTSDZrm addr:$src)>; + (VBROADCASTSDZm addr:$src)>; multiclass avx512_int_broadcast_reg opc, string OpcodeStr, RegisterClass SrcRC, RegisterClass KRC> { @@ -711,14 +736,14 @@ def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))), (VPBROADCASTQZrr VR128X:$src)>; def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))), - (VBROADCASTSSZrr VR128X:$src)>; + (VBROADCASTSSZr VR128X:$src)>; def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))), - (VBROADCASTSDZrr VR128X:$src)>; + (VBROADCASTSDZr VR128X:$src)>; def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), - (VBROADCASTSSZrr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; + (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), - (VBROADCASTSDZrr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; + (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))), (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; @@ -726,16 +751,16 @@ def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))), (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))), - (VBROADCASTSSZrr VR128X:$src)>; + (VBROADCASTSSZr VR128X:$src)>; def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))), - (VBROADCASTSDZrr VR128X:$src)>; + (VBROADCASTSDZr VR128X:$src)>; // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. def : Pat<(v16f32 (X86VBroadcast FR32X:$src)), - (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>; + (VBROADCASTSSZr (COPY_TO_REGCLASS FR32X:$src, VR128X))>; def : Pat<(v8f64 (X86VBroadcast FR64X:$src)), - (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; + (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; let Predicates = [HasAVX512] in { diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index 231226f347e..c734da8fddf 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -113,6 +113,94 @@ // CHECK: encoding: [0x62,0xe1,0x14,0x58,0x58,0x92,0xfc,0xfd,0xff,0xff] vaddps -516(%rdx){1to16}, %zmm13, %zmm18 +// CHECK: vbroadcastsd (%rcx), %zmm30 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0x31] + vbroadcastsd (%rcx), %zmm30 + +// CHECK: vbroadcastsd (%rcx), %zmm30 {%k4} +// CHECK: encoding: [0x62,0x62,0xfd,0x4c,0x19,0x31] + vbroadcastsd (%rcx), %zmm30 {%k4} + +// CHECK: vbroadcastsd (%rcx), %zmm30 {%k4} {z} +// CHECK: encoding: [0x62,0x62,0xfd,0xcc,0x19,0x31] + vbroadcastsd (%rcx), %zmm30 {%k4} {z} + +// CHECK: vbroadcastsd 291(%rax,%r14,8), %zmm30 +// CHECK: encoding: [0x62,0x22,0xfd,0x48,0x19,0xb4,0xf0,0x23,0x01,0x00,0x00] + vbroadcastsd 291(%rax,%r14,8), %zmm30 + +// CHECK: vbroadcastsd 1016(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0x72,0x7f] + vbroadcastsd 1016(%rdx), %zmm30 + +// CHECK: vbroadcastsd 1024(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0xb2,0x00,0x04,0x00,0x00] + vbroadcastsd 1024(%rdx), %zmm30 + +// CHECK: vbroadcastsd -1024(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0x72,0x80] + vbroadcastsd -1024(%rdx), %zmm30 + +// CHECK: vbroadcastsd -1032(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0xb2,0xf8,0xfb,0xff,0xff] + vbroadcastsd -1032(%rdx), %zmm30 + +// CHECK: vbroadcastsd %xmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x19,0xee] + vbroadcastsd %xmm22, %zmm21 + +// CHECK: vbroadcastsd %xmm22, %zmm21 {%k7} +// CHECK: encoding: [0x62,0xa2,0xfd,0x4f,0x19,0xee] + vbroadcastsd %xmm22, %zmm21 {%k7} + +// CHECK: vbroadcastsd %xmm22, %zmm21 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0xcf,0x19,0xee] + vbroadcastsd %xmm22, %zmm21 {%k7} {z} + +// CHECK: vbroadcastss (%rcx), %zmm3 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x19] + vbroadcastss (%rcx), %zmm3 + +// CHECK: vbroadcastss (%rcx), %zmm3 {%k4} +// CHECK: encoding: [0x62,0xf2,0x7d,0x4c,0x18,0x19] + vbroadcastss (%rcx), %zmm3 {%k4} + +// CHECK: vbroadcastss (%rcx), %zmm3 {%k4} {z} +// CHECK: encoding: [0x62,0xf2,0x7d,0xcc,0x18,0x19] + vbroadcastss (%rcx), %zmm3 {%k4} {z} + +// CHECK: vbroadcastss 291(%rax,%r14,8), %zmm3 +// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x18,0x9c,0xf0,0x23,0x01,0x00,0x00] + vbroadcastss 291(%rax,%r14,8), %zmm3 + +// CHECK: vbroadcastss 508(%rdx), %zmm3 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x5a,0x7f] + vbroadcastss 508(%rdx), %zmm3 + +// CHECK: vbroadcastss 512(%rdx), %zmm3 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x9a,0x00,0x02,0x00,0x00] + vbroadcastss 512(%rdx), %zmm3 + +// CHECK: vbroadcastss -512(%rdx), %zmm3 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x5a,0x80] + vbroadcastss -512(%rdx), %zmm3 + +// CHECK: vbroadcastss -516(%rdx), %zmm3 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x9a,0xfc,0xfd,0xff,0xff] + vbroadcastss -516(%rdx), %zmm3 + +// CHECK: vbroadcastss %xmm18, %zmm18 +// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x18,0xd2] + vbroadcastss %xmm18, %zmm18 + +// CHECK: vbroadcastss %xmm18, %zmm18 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x18,0xd2] + vbroadcastss %xmm18, %zmm18 {%k2} + +// CHECK: vbroadcastss %xmm18, %zmm18 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0xca,0x18,0xd2] + vbroadcastss %xmm18, %zmm18 {%k2} {z} + // CHECK: vdivpd %zmm11, %zmm6, %zmm18 // CHECK: encoding: [0x62,0xc1,0xcd,0x48,0x5e,0xd3] vdivpd %zmm11, %zmm6, %zmm18 diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index a0ba3b365b2..973a553a8ab 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -224,6 +224,138 @@ // CHECK: encoding: [0x62,0x61,0x2c,0x30,0x58,0x8a,0xfc,0xfd,0xff,0xff] vaddps -516(%rdx){1to8}, %ymm26, %ymm25 +// CHECK: vbroadcastsd (%rcx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0x31] + vbroadcastsd (%rcx), %ymm22 + +// CHECK: vbroadcastsd (%rcx), %ymm22 {%k5} +// CHECK: encoding: [0x62,0xe2,0xfd,0x2d,0x19,0x31] + vbroadcastsd (%rcx), %ymm22 {%k5} + +// CHECK: vbroadcastsd (%rcx), %ymm22 {%k5} {z} +// CHECK: encoding: [0x62,0xe2,0xfd,0xad,0x19,0x31] + vbroadcastsd (%rcx), %ymm22 {%k5} {z} + +// CHECK: vbroadcastsd 291(%rax,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x19,0xb4,0xf0,0x23,0x01,0x00,0x00] + vbroadcastsd 291(%rax,%r14,8), %ymm22 + +// CHECK: vbroadcastsd 1016(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0x72,0x7f] + vbroadcastsd 1016(%rdx), %ymm22 + +// CHECK: vbroadcastsd 1024(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0xb2,0x00,0x04,0x00,0x00] + vbroadcastsd 1024(%rdx), %ymm22 + +// CHECK: vbroadcastsd -1024(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0x72,0x80] + vbroadcastsd -1024(%rdx), %ymm22 + +// CHECK: vbroadcastsd -1032(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0xb2,0xf8,0xfb,0xff,0xff] + vbroadcastsd -1032(%rdx), %ymm22 + +// CHECK: vbroadcastsd %xmm17, %ymm19 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x19,0xd9] + vbroadcastsd %xmm17, %ymm19 + +// CHECK: vbroadcastsd %xmm17, %ymm19 {%k6} +// CHECK: encoding: [0x62,0xa2,0xfd,0x2e,0x19,0xd9] + vbroadcastsd %xmm17, %ymm19 {%k6} + +// CHECK: vbroadcastsd %xmm17, %ymm19 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0xae,0x19,0xd9] + vbroadcastsd %xmm17, %ymm19 {%k6} {z} + +// CHECK: vbroadcastss (%rcx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0x29] + vbroadcastss (%rcx), %xmm21 + +// CHECK: vbroadcastss (%rcx), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x18,0x29] + vbroadcastss (%rcx), %xmm21 {%k2} + +// CHECK: vbroadcastss (%rcx), %xmm21 {%k2} {z} +// CHECK: encoding: [0x62,0xe2,0x7d,0x8a,0x18,0x29] + vbroadcastss (%rcx), %xmm21 {%k2} {z} + +// CHECK: vbroadcastss 291(%rax,%r14,8), %xmm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x18,0xac,0xf0,0x23,0x01,0x00,0x00] + vbroadcastss 291(%rax,%r14,8), %xmm21 + +// CHECK: vbroadcastss 508(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0x6a,0x7f] + vbroadcastss 508(%rdx), %xmm21 + +// CHECK: vbroadcastss 512(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0xaa,0x00,0x02,0x00,0x00] + vbroadcastss 512(%rdx), %xmm21 + +// CHECK: vbroadcastss -512(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0x6a,0x80] + vbroadcastss -512(%rdx), %xmm21 + +// CHECK: vbroadcastss -516(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0xaa,0xfc,0xfd,0xff,0xff] + vbroadcastss -516(%rdx), %xmm21 + +// CHECK: vbroadcastss (%rcx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0x31] + vbroadcastss (%rcx), %ymm30 + +// CHECK: vbroadcastss (%rcx), %ymm30 {%k1} +// CHECK: encoding: [0x62,0x62,0x7d,0x29,0x18,0x31] + vbroadcastss (%rcx), %ymm30 {%k1} + +// CHECK: vbroadcastss (%rcx), %ymm30 {%k1} {z} +// CHECK: encoding: [0x62,0x62,0x7d,0xa9,0x18,0x31] + vbroadcastss (%rcx), %ymm30 {%k1} {z} + +// CHECK: vbroadcastss 291(%rax,%r14,8), %ymm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x18,0xb4,0xf0,0x23,0x01,0x00,0x00] + vbroadcastss 291(%rax,%r14,8), %ymm30 + +// CHECK: vbroadcastss 508(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0x72,0x7f] + vbroadcastss 508(%rdx), %ymm30 + +// CHECK: vbroadcastss 512(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0xb2,0x00,0x02,0x00,0x00] + vbroadcastss 512(%rdx), %ymm30 + +// CHECK: vbroadcastss -512(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0x72,0x80] + vbroadcastss -512(%rdx), %ymm30 + +// CHECK: vbroadcastss -516(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0xb2,0xfc,0xfd,0xff,0xff] + vbroadcastss -516(%rdx), %ymm30 + +// CHECK: vbroadcastss %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x18,0xc0] + vbroadcastss %xmm24, %xmm24 + +// CHECK: vbroadcastss %xmm24, %xmm24 {%k2} +// CHECK: encoding: [0x62,0x02,0x7d,0x0a,0x18,0xc0] + vbroadcastss %xmm24, %xmm24 {%k2} + +// CHECK: vbroadcastss %xmm24, %xmm24 {%k2} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x8a,0x18,0xc0] + vbroadcastss %xmm24, %xmm24 {%k2} {z} + +// CHECK: vbroadcastss %xmm28, %ymm24 +// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x18,0xc4] + vbroadcastss %xmm28, %ymm24 + +// CHECK: vbroadcastss %xmm28, %ymm24 {%k6} +// CHECK: encoding: [0x62,0x02,0x7d,0x2e,0x18,0xc4] + vbroadcastss %xmm28, %ymm24 {%k6} + +// CHECK: vbroadcastss %xmm28, %ymm24 {%k6} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0xae,0x18,0xc4] + vbroadcastss %xmm28, %ymm24 {%k6} {z} + // CHECK: vdivpd %xmm27, %xmm18, %xmm19 // CHECK: encoding: [0x62,0x81,0xed,0x00,0x5e,0xdb] vdivpd %xmm27, %xmm18, %xmm19