From: Robert Khasanov Date: Tue, 28 Oct 2014 16:37:13 +0000 (+0000) Subject: [AVX-512] Expanded rsqrt/rcp instructions to VL subset. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=59cb03d3298bc830dfda5b4ca4981d179c31b0fe [AVX-512] Expanded rsqrt/rcp instructions to VL subset. Refactored multiclass through AVX512_maskable git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220783 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 3dbc3d2abd8..04dc681432a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -104,11 +104,15 @@ def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; +def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; +def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; +def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; +def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; class AVX512VLVectorVTInfo { @@ -4116,26 +4120,49 @@ def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1), /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, X86MemOperand x86memop, - PatFrag mem_frag, ValueType OpVt> { - def r : AVX5128I, - EVEX; - def m : AVX5128I, - EVEX; -} -defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem, - memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem, - memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem, - memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem, - memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + X86VectorVTInfo _> { + defm r: AVX512_maskable, EVEX, T8PD; + let mayLoad = 1 in { + defm m: AVX512_maskable, EVEX, T8PD; + defm mb: AVX512_maskable, + EVEX, T8PD, EVEX_B; + } +} + +multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode> { + defm PSZ : avx512_fp14_p, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp14_p, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp14_p, + EVEX_V128, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp14_p, + EVEX_V256, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp14_p, + EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp14_p, + EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; + } +} + +defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>; +defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>; def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src), (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))), diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index b60b72fd6bb..64e615377b1 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -3193,6 +3193,230 @@ // CHECK: encoding: [0x62,0xf1,0xcd,0x50,0xef,0xba,0xf8,0xfb,0xff,0xff] vpxorq -1032(%rdx){1to8}, %zmm22, %zmm7 +// CHECK: vrcp14pd %zmm4, %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0xec] + vrcp14pd %zmm4, %zmm13 + +// CHECK: vrcp14pd %zmm4, %zmm13 {%k5} +// CHECK: encoding: [0x62,0x72,0xfd,0x4d,0x4c,0xec] + vrcp14pd %zmm4, %zmm13 {%k5} + +// CHECK: vrcp14pd %zmm4, %zmm13 {%k5} {z} +// CHECK: encoding: [0x62,0x72,0xfd,0xcd,0x4c,0xec] + vrcp14pd %zmm4, %zmm13 {%k5} {z} + +// CHECK: vrcp14pd (%rcx), %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0x29] + vrcp14pd (%rcx), %zmm13 + +// CHECK: vrcp14pd 291(%rax,%r14,8), %zmm13 +// CHECK: encoding: [0x62,0x32,0xfd,0x48,0x4c,0xac,0xf0,0x23,0x01,0x00,0x00] + vrcp14pd 291(%rax,%r14,8), %zmm13 + +// CHECK: vrcp14pd (%rcx){1to8}, %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x4c,0x29] + vrcp14pd (%rcx){1to8}, %zmm13 + +// CHECK: vrcp14pd 8128(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0x6a,0x7f] + vrcp14pd 8128(%rdx), %zmm13 + +// CHECK: vrcp14pd 8192(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0xaa,0x00,0x20,0x00,0x00] + vrcp14pd 8192(%rdx), %zmm13 + +// CHECK: vrcp14pd -8192(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0x6a,0x80] + vrcp14pd -8192(%rdx), %zmm13 + +// CHECK: vrcp14pd -8256(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0xaa,0xc0,0xdf,0xff,0xff] + vrcp14pd -8256(%rdx), %zmm13 + +// CHECK: vrcp14pd 1016(%rdx){1to8}, %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x4c,0x6a,0x7f] + vrcp14pd 1016(%rdx){1to8}, %zmm13 + +// CHECK: vrcp14pd 1024(%rdx){1to8}, %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x4c,0xaa,0x00,0x04,0x00,0x00] + vrcp14pd 1024(%rdx){1to8}, %zmm13 + +// CHECK: vrcp14pd -1024(%rdx){1to8}, %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x4c,0x6a,0x80] + vrcp14pd -1024(%rdx){1to8}, %zmm13 + +// CHECK: vrcp14pd -1032(%rdx){1to8}, %zmm13 +// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x4c,0xaa,0xf8,0xfb,0xff,0xff] + vrcp14pd -1032(%rdx){1to8}, %zmm13 + +// CHECK: vrcp14ps %zmm25, %zmm10 +// CHECK: encoding: [0x62,0x12,0x7d,0x48,0x4c,0xd1] + vrcp14ps %zmm25, %zmm10 + +// CHECK: vrcp14ps %zmm25, %zmm10 {%k1} +// CHECK: encoding: [0x62,0x12,0x7d,0x49,0x4c,0xd1] + vrcp14ps %zmm25, %zmm10 {%k1} + +// CHECK: vrcp14ps %zmm25, %zmm10 {%k1} {z} +// CHECK: encoding: [0x62,0x12,0x7d,0xc9,0x4c,0xd1] + vrcp14ps %zmm25, %zmm10 {%k1} {z} + +// CHECK: vrcp14ps (%rcx), %zmm10 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x4c,0x11] + vrcp14ps (%rcx), %zmm10 + +// CHECK: vrcp14ps 291(%rax,%r14,8), %zmm10 +// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x4c,0x94,0xf0,0x23,0x01,0x00,0x00] + vrcp14ps 291(%rax,%r14,8), %zmm10 + +// CHECK: vrcp14ps (%rcx){1to16}, %zmm10 +// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x4c,0x11] + vrcp14ps (%rcx){1to16}, %zmm10 + +// CHECK: vrcp14ps 8128(%rdx), %zmm10 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x4c,0x52,0x7f] + vrcp14ps 8128(%rdx), %zmm10 + +// CHECK: vrcp14ps 8192(%rdx), %zmm10 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x4c,0x92,0x00,0x20,0x00,0x00] + vrcp14ps 8192(%rdx), %zmm10 + +// CHECK: vrcp14ps -8192(%rdx), %zmm10 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x4c,0x52,0x80] + vrcp14ps -8192(%rdx), %zmm10 + +// CHECK: vrcp14ps -8256(%rdx), %zmm10 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x4c,0x92,0xc0,0xdf,0xff,0xff] + vrcp14ps -8256(%rdx), %zmm10 + +// CHECK: vrcp14ps 508(%rdx){1to16}, %zmm10 +// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x4c,0x52,0x7f] + vrcp14ps 508(%rdx){1to16}, %zmm10 + +// CHECK: vrcp14ps 512(%rdx){1to16}, %zmm10 +// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x4c,0x92,0x00,0x02,0x00,0x00] + vrcp14ps 512(%rdx){1to16}, %zmm10 + +// CHECK: vrcp14ps -512(%rdx){1to16}, %zmm10 +// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x4c,0x52,0x80] + vrcp14ps -512(%rdx){1to16}, %zmm10 + +// CHECK: vrcp14ps -516(%rdx){1to16}, %zmm10 +// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x4c,0x92,0xfc,0xfd,0xff,0xff] + vrcp14ps -516(%rdx){1to16}, %zmm10 + +// CHECK: vrsqrt14pd %zmm14, %zmm19 +// CHECK: encoding: [0x62,0xc2,0xfd,0x48,0x4e,0xde] + vrsqrt14pd %zmm14, %zmm19 + +// CHECK: vrsqrt14pd %zmm14, %zmm19 {%k1} +// CHECK: encoding: [0x62,0xc2,0xfd,0x49,0x4e,0xde] + vrsqrt14pd %zmm14, %zmm19 {%k1} + +// CHECK: vrsqrt14pd %zmm14, %zmm19 {%k1} {z} +// CHECK: encoding: [0x62,0xc2,0xfd,0xc9,0x4e,0xde] + vrsqrt14pd %zmm14, %zmm19 {%k1} {z} + +// CHECK: vrsqrt14pd (%rcx), %zmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x19] + vrsqrt14pd (%rcx), %zmm19 + +// CHECK: vrsqrt14pd 291(%rax,%r14,8), %zmm19 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x4e,0x9c,0xf0,0x23,0x01,0x00,0x00] + vrsqrt14pd 291(%rax,%r14,8), %zmm19 + +// CHECK: vrsqrt14pd (%rcx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x19] + vrsqrt14pd (%rcx){1to8}, %zmm19 + +// CHECK: vrsqrt14pd 8128(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x5a,0x7f] + vrsqrt14pd 8128(%rdx), %zmm19 + +// CHECK: vrsqrt14pd 8192(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x9a,0x00,0x20,0x00,0x00] + vrsqrt14pd 8192(%rdx), %zmm19 + +// CHECK: vrsqrt14pd -8192(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x5a,0x80] + vrsqrt14pd -8192(%rdx), %zmm19 + +// CHECK: vrsqrt14pd -8256(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x9a,0xc0,0xdf,0xff,0xff] + vrsqrt14pd -8256(%rdx), %zmm19 + +// CHECK: vrsqrt14pd 1016(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x5a,0x7f] + vrsqrt14pd 1016(%rdx){1to8}, %zmm19 + +// CHECK: vrsqrt14pd 1024(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x9a,0x00,0x04,0x00,0x00] + vrsqrt14pd 1024(%rdx){1to8}, %zmm19 + +// CHECK: vrsqrt14pd -1024(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x5a,0x80] + vrsqrt14pd -1024(%rdx){1to8}, %zmm19 + +// CHECK: vrsqrt14pd -1032(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x9a,0xf8,0xfb,0xff,0xff] + vrsqrt14pd -1032(%rdx){1to8}, %zmm19 + +// CHECK: vrsqrt14ps %zmm9, %zmm16 +// CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x4e,0xc1] + vrsqrt14ps %zmm9, %zmm16 + +// CHECK: vrsqrt14ps %zmm9, %zmm16 {%k5} +// CHECK: encoding: [0x62,0xc2,0x7d,0x4d,0x4e,0xc1] + vrsqrt14ps %zmm9, %zmm16 {%k5} + +// CHECK: vrsqrt14ps %zmm9, %zmm16 {%k5} {z} +// CHECK: encoding: [0x62,0xc2,0x7d,0xcd,0x4e,0xc1] + vrsqrt14ps %zmm9, %zmm16 {%k5} {z} + +// CHECK: vrsqrt14ps (%rcx), %zmm16 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x01] + vrsqrt14ps (%rcx), %zmm16 + +// CHECK: vrsqrt14ps 291(%rax,%r14,8), %zmm16 +// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x4e,0x84,0xf0,0x23,0x01,0x00,0x00] + vrsqrt14ps 291(%rax,%r14,8), %zmm16 + +// CHECK: vrsqrt14ps (%rcx){1to16}, %zmm16 +// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x01] + vrsqrt14ps (%rcx){1to16}, %zmm16 + +// CHECK: vrsqrt14ps 8128(%rdx), %zmm16 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x42,0x7f] + vrsqrt14ps 8128(%rdx), %zmm16 + +// CHECK: vrsqrt14ps 8192(%rdx), %zmm16 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x82,0x00,0x20,0x00,0x00] + vrsqrt14ps 8192(%rdx), %zmm16 + +// CHECK: vrsqrt14ps -8192(%rdx), %zmm16 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x42,0x80] + vrsqrt14ps -8192(%rdx), %zmm16 + +// CHECK: vrsqrt14ps -8256(%rdx), %zmm16 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x82,0xc0,0xdf,0xff,0xff] + vrsqrt14ps -8256(%rdx), %zmm16 + +// CHECK: vrsqrt14ps 508(%rdx){1to16}, %zmm16 +// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x42,0x7f] + vrsqrt14ps 508(%rdx){1to16}, %zmm16 + +// CHECK: vrsqrt14ps 512(%rdx){1to16}, %zmm16 +// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x82,0x00,0x02,0x00,0x00] + vrsqrt14ps 512(%rdx){1to16}, %zmm16 + +// CHECK: vrsqrt14ps -512(%rdx){1to16}, %zmm16 +// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x42,0x80] + vrsqrt14ps -512(%rdx){1to16}, %zmm16 + +// CHECK: vrsqrt14ps -516(%rdx){1to16}, %zmm16 +// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x82,0xfc,0xfd,0xff,0xff] + vrsqrt14ps -516(%rdx){1to16}, %zmm16 + // CHECK: vsubpd %zmm9, %zmm12, %zmm9 // CHECK: encoding: [0x62,0x51,0x9d,0x48,0x5c,0xc9] vsubpd %zmm9, %zmm12, %zmm9 diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index b467e11f044..039b4afab55 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -3984,6 +3984,454 @@ // CHECK: encoding: [0x62,0xe1,0xed,0x30,0xef,0xaa,0xf8,0xfb,0xff,0xff] vpxorq -1032(%rdx){1to4}, %ymm18, %ymm21 +// CHECK: vrcp14pd %xmm29, %xmm18 +// CHECK: encoding: [0x62,0x82,0xfd,0x08,0x4c,0xd5] + vrcp14pd %xmm29, %xmm18 + +// CHECK: vrcp14pd %xmm29, %xmm18 {%k4} +// CHECK: encoding: [0x62,0x82,0xfd,0x0c,0x4c,0xd5] + vrcp14pd %xmm29, %xmm18 {%k4} + +// CHECK: vrcp14pd %xmm29, %xmm18 {%k4} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0x8c,0x4c,0xd5] + vrcp14pd %xmm29, %xmm18 {%k4} {z} + +// CHECK: vrcp14pd (%rcx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x11] + vrcp14pd (%rcx), %xmm18 + +// CHECK: vrcp14pd 291(%rax,%r14,8), %xmm18 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x4c,0x94,0xf0,0x23,0x01,0x00,0x00] + vrcp14pd 291(%rax,%r14,8), %xmm18 + +// CHECK: vrcp14pd (%rcx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x11] + vrcp14pd (%rcx){1to2}, %xmm18 + +// CHECK: vrcp14pd 2032(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x52,0x7f] + vrcp14pd 2032(%rdx), %xmm18 + +// CHECK: vrcp14pd 2048(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x92,0x00,0x08,0x00,0x00] + vrcp14pd 2048(%rdx), %xmm18 + +// CHECK: vrcp14pd -2048(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x52,0x80] + vrcp14pd -2048(%rdx), %xmm18 + +// CHECK: vrcp14pd -2064(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x92,0xf0,0xf7,0xff,0xff] + vrcp14pd -2064(%rdx), %xmm18 + +// CHECK: vrcp14pd 1016(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x52,0x7f] + vrcp14pd 1016(%rdx){1to2}, %xmm18 + +// CHECK: vrcp14pd 1024(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x92,0x00,0x04,0x00,0x00] + vrcp14pd 1024(%rdx){1to2}, %xmm18 + +// CHECK: vrcp14pd -1024(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x52,0x80] + vrcp14pd -1024(%rdx){1to2}, %xmm18 + +// CHECK: vrcp14pd -1032(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x92,0xf8,0xfb,0xff,0xff] + vrcp14pd -1032(%rdx){1to2}, %xmm18 + +// CHECK: vrcp14pd %ymm29, %ymm17 +// CHECK: encoding: [0x62,0x82,0xfd,0x28,0x4c,0xcd] + vrcp14pd %ymm29, %ymm17 + +// CHECK: vrcp14pd %ymm29, %ymm17 {%k4} +// CHECK: encoding: [0x62,0x82,0xfd,0x2c,0x4c,0xcd] + vrcp14pd %ymm29, %ymm17 {%k4} + +// CHECK: vrcp14pd %ymm29, %ymm17 {%k4} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0xac,0x4c,0xcd] + vrcp14pd %ymm29, %ymm17 {%k4} {z} + +// CHECK: vrcp14pd (%rcx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x09] + vrcp14pd (%rcx), %ymm17 + +// CHECK: vrcp14pd 291(%rax,%r14,8), %ymm17 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x4c,0x8c,0xf0,0x23,0x01,0x00,0x00] + vrcp14pd 291(%rax,%r14,8), %ymm17 + +// CHECK: vrcp14pd (%rcx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x09] + vrcp14pd (%rcx){1to4}, %ymm17 + +// CHECK: vrcp14pd 4064(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x4a,0x7f] + vrcp14pd 4064(%rdx), %ymm17 + +// CHECK: vrcp14pd 4096(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x8a,0x00,0x10,0x00,0x00] + vrcp14pd 4096(%rdx), %ymm17 + +// CHECK: vrcp14pd -4096(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x4a,0x80] + vrcp14pd -4096(%rdx), %ymm17 + +// CHECK: vrcp14pd -4128(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x8a,0xe0,0xef,0xff,0xff] + vrcp14pd -4128(%rdx), %ymm17 + +// CHECK: vrcp14pd 1016(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x4a,0x7f] + vrcp14pd 1016(%rdx){1to4}, %ymm17 + +// CHECK: vrcp14pd 1024(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x8a,0x00,0x04,0x00,0x00] + vrcp14pd 1024(%rdx){1to4}, %ymm17 + +// CHECK: vrcp14pd -1024(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x4a,0x80] + vrcp14pd -1024(%rdx){1to4}, %ymm17 + +// CHECK: vrcp14pd -1032(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x8a,0xf8,0xfb,0xff,0xff] + vrcp14pd -1032(%rdx){1to4}, %ymm17 + +// CHECK: vrcp14ps %xmm28, %xmm27 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x4c,0xdc] + vrcp14ps %xmm28, %xmm27 + +// CHECK: vrcp14ps %xmm28, %xmm27 {%k4} +// CHECK: encoding: [0x62,0x02,0x7d,0x0c,0x4c,0xdc] + vrcp14ps %xmm28, %xmm27 {%k4} + +// CHECK: vrcp14ps %xmm28, %xmm27 {%k4} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x8c,0x4c,0xdc] + vrcp14ps %xmm28, %xmm27 {%k4} {z} + +// CHECK: vrcp14ps (%rcx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x4c,0x19] + vrcp14ps (%rcx), %xmm27 + +// CHECK: vrcp14ps 291(%rax,%r14,8), %xmm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x4c,0x9c,0xf0,0x23,0x01,0x00,0x00] + vrcp14ps 291(%rax,%r14,8), %xmm27 + +// CHECK: vrcp14ps (%rcx){1to4}, %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x4c,0x19] + vrcp14ps (%rcx){1to4}, %xmm27 + +// CHECK: vrcp14ps 2032(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x4c,0x5a,0x7f] + vrcp14ps 2032(%rdx), %xmm27 + +// CHECK: vrcp14ps 2048(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x4c,0x9a,0x00,0x08,0x00,0x00] + vrcp14ps 2048(%rdx), %xmm27 + +// CHECK: vrcp14ps -2048(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x4c,0x5a,0x80] + vrcp14ps -2048(%rdx), %xmm27 + +// CHECK: vrcp14ps -2064(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x4c,0x9a,0xf0,0xf7,0xff,0xff] + vrcp14ps -2064(%rdx), %xmm27 + +// CHECK: vrcp14ps 508(%rdx){1to4}, %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x4c,0x5a,0x7f] + vrcp14ps 508(%rdx){1to4}, %xmm27 + +// CHECK: vrcp14ps 512(%rdx){1to4}, %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x4c,0x9a,0x00,0x02,0x00,0x00] + vrcp14ps 512(%rdx){1to4}, %xmm27 + +// CHECK: vrcp14ps -512(%rdx){1to4}, %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x4c,0x5a,0x80] + vrcp14ps -512(%rdx){1to4}, %xmm27 + +// CHECK: vrcp14ps -516(%rdx){1to4}, %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x4c,0x9a,0xfc,0xfd,0xff,0xff] + vrcp14ps -516(%rdx){1to4}, %xmm27 + +// CHECK: vrcp14ps %ymm21, %ymm29 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x4c,0xed] + vrcp14ps %ymm21, %ymm29 + +// CHECK: vrcp14ps %ymm21, %ymm29 {%k7} +// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x4c,0xed] + vrcp14ps %ymm21, %ymm29 {%k7} + +// CHECK: vrcp14ps %ymm21, %ymm29 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x4c,0xed] + vrcp14ps %ymm21, %ymm29 {%k7} {z} + +// CHECK: vrcp14ps (%rcx), %ymm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4c,0x29] + vrcp14ps (%rcx), %ymm29 + +// CHECK: vrcp14ps 291(%rax,%r14,8), %ymm29 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x4c,0xac,0xf0,0x23,0x01,0x00,0x00] + vrcp14ps 291(%rax,%r14,8), %ymm29 + +// CHECK: vrcp14ps (%rcx){1to8}, %ymm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4c,0x29] + vrcp14ps (%rcx){1to8}, %ymm29 + +// CHECK: vrcp14ps 4064(%rdx), %ymm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4c,0x6a,0x7f] + vrcp14ps 4064(%rdx), %ymm29 + +// CHECK: vrcp14ps 4096(%rdx), %ymm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4c,0xaa,0x00,0x10,0x00,0x00] + vrcp14ps 4096(%rdx), %ymm29 + +// CHECK: vrcp14ps -4096(%rdx), %ymm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4c,0x6a,0x80] + vrcp14ps -4096(%rdx), %ymm29 + +// CHECK: vrcp14ps -4128(%rdx), %ymm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4c,0xaa,0xe0,0xef,0xff,0xff] + vrcp14ps -4128(%rdx), %ymm29 + +// CHECK: vrcp14ps 508(%rdx){1to8}, %ymm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4c,0x6a,0x7f] + vrcp14ps 508(%rdx){1to8}, %ymm29 + +// CHECK: vrcp14ps 512(%rdx){1to8}, %ymm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4c,0xaa,0x00,0x02,0x00,0x00] + vrcp14ps 512(%rdx){1to8}, %ymm29 + +// CHECK: vrcp14ps -512(%rdx){1to8}, %ymm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4c,0x6a,0x80] + vrcp14ps -512(%rdx){1to8}, %ymm29 + +// CHECK: vrcp14ps -516(%rdx){1to8}, %ymm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4c,0xaa,0xfc,0xfd,0xff,0xff] + vrcp14ps -516(%rdx){1to8}, %ymm29 + +// CHECK: vrsqrt14pd %xmm28, %xmm21 +// CHECK: encoding: [0x62,0x82,0xfd,0x08,0x4e,0xec] + vrsqrt14pd %xmm28, %xmm21 + +// CHECK: vrsqrt14pd %xmm28, %xmm21 {%k1} +// CHECK: encoding: [0x62,0x82,0xfd,0x09,0x4e,0xec] + vrsqrt14pd %xmm28, %xmm21 {%k1} + +// CHECK: vrsqrt14pd %xmm28, %xmm21 {%k1} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0x89,0x4e,0xec] + vrsqrt14pd %xmm28, %xmm21 {%k1} {z} + +// CHECK: vrsqrt14pd (%rcx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4e,0x29] + vrsqrt14pd (%rcx), %xmm21 + +// CHECK: vrsqrt14pd 291(%rax,%r14,8), %xmm21 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x4e,0xac,0xf0,0x23,0x01,0x00,0x00] + vrsqrt14pd 291(%rax,%r14,8), %xmm21 + +// CHECK: vrsqrt14pd (%rcx){1to2}, %xmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4e,0x29] + vrsqrt14pd (%rcx){1to2}, %xmm21 + +// CHECK: vrsqrt14pd 2032(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4e,0x6a,0x7f] + vrsqrt14pd 2032(%rdx), %xmm21 + +// CHECK: vrsqrt14pd 2048(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4e,0xaa,0x00,0x08,0x00,0x00] + vrsqrt14pd 2048(%rdx), %xmm21 + +// CHECK: vrsqrt14pd -2048(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4e,0x6a,0x80] + vrsqrt14pd -2048(%rdx), %xmm21 + +// CHECK: vrsqrt14pd -2064(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4e,0xaa,0xf0,0xf7,0xff,0xff] + vrsqrt14pd -2064(%rdx), %xmm21 + +// CHECK: vrsqrt14pd 1016(%rdx){1to2}, %xmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4e,0x6a,0x7f] + vrsqrt14pd 1016(%rdx){1to2}, %xmm21 + +// CHECK: vrsqrt14pd 1024(%rdx){1to2}, %xmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4e,0xaa,0x00,0x04,0x00,0x00] + vrsqrt14pd 1024(%rdx){1to2}, %xmm21 + +// CHECK: vrsqrt14pd -1024(%rdx){1to2}, %xmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4e,0x6a,0x80] + vrsqrt14pd -1024(%rdx){1to2}, %xmm21 + +// CHECK: vrsqrt14pd -1032(%rdx){1to2}, %xmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4e,0xaa,0xf8,0xfb,0xff,0xff] + vrsqrt14pd -1032(%rdx){1to2}, %xmm21 + +// CHECK: vrsqrt14pd %ymm19, %ymm18 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x4e,0xd3] + vrsqrt14pd %ymm19, %ymm18 + +// CHECK: vrsqrt14pd %ymm19, %ymm18 {%k4} +// CHECK: encoding: [0x62,0xa2,0xfd,0x2c,0x4e,0xd3] + vrsqrt14pd %ymm19, %ymm18 {%k4} + +// CHECK: vrsqrt14pd %ymm19, %ymm18 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0xac,0x4e,0xd3] + vrsqrt14pd %ymm19, %ymm18 {%k4} {z} + +// CHECK: vrsqrt14pd (%rcx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x11] + vrsqrt14pd (%rcx), %ymm18 + +// CHECK: vrsqrt14pd 291(%rax,%r14,8), %ymm18 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x4e,0x94,0xf0,0x23,0x01,0x00,0x00] + vrsqrt14pd 291(%rax,%r14,8), %ymm18 + +// CHECK: vrsqrt14pd (%rcx){1to4}, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x11] + vrsqrt14pd (%rcx){1to4}, %ymm18 + +// CHECK: vrsqrt14pd 4064(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x52,0x7f] + vrsqrt14pd 4064(%rdx), %ymm18 + +// CHECK: vrsqrt14pd 4096(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x92,0x00,0x10,0x00,0x00] + vrsqrt14pd 4096(%rdx), %ymm18 + +// CHECK: vrsqrt14pd -4096(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x52,0x80] + vrsqrt14pd -4096(%rdx), %ymm18 + +// CHECK: vrsqrt14pd -4128(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x92,0xe0,0xef,0xff,0xff] + vrsqrt14pd -4128(%rdx), %ymm18 + +// CHECK: vrsqrt14pd 1016(%rdx){1to4}, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x52,0x7f] + vrsqrt14pd 1016(%rdx){1to4}, %ymm18 + +// CHECK: vrsqrt14pd 1024(%rdx){1to4}, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x92,0x00,0x04,0x00,0x00] + vrsqrt14pd 1024(%rdx){1to4}, %ymm18 + +// CHECK: vrsqrt14pd -1024(%rdx){1to4}, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x52,0x80] + vrsqrt14pd -1024(%rdx){1to4}, %ymm18 + +// CHECK: vrsqrt14pd -1032(%rdx){1to4}, %ymm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x92,0xf8,0xfb,0xff,0xff] + vrsqrt14pd -1032(%rdx){1to4}, %ymm18 + +// CHECK: vrsqrt14ps %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x4e,0xdc] + vrsqrt14ps %xmm20, %xmm19 + +// CHECK: vrsqrt14ps %xmm20, %xmm19 {%k7} +// CHECK: encoding: [0x62,0xa2,0x7d,0x0f,0x4e,0xdc] + vrsqrt14ps %xmm20, %xmm19 {%k7} + +// CHECK: vrsqrt14ps %xmm20, %xmm19 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0x8f,0x4e,0xdc] + vrsqrt14ps %xmm20, %xmm19 {%k7} {z} + +// CHECK: vrsqrt14ps (%rcx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x19] + vrsqrt14ps (%rcx), %xmm19 + +// CHECK: vrsqrt14ps 291(%rax,%r14,8), %xmm19 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x4e,0x9c,0xf0,0x23,0x01,0x00,0x00] + vrsqrt14ps 291(%rax,%r14,8), %xmm19 + +// CHECK: vrsqrt14ps (%rcx){1to4}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x19] + vrsqrt14ps (%rcx){1to4}, %xmm19 + +// CHECK: vrsqrt14ps 2032(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x5a,0x7f] + vrsqrt14ps 2032(%rdx), %xmm19 + +// CHECK: vrsqrt14ps 2048(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x9a,0x00,0x08,0x00,0x00] + vrsqrt14ps 2048(%rdx), %xmm19 + +// CHECK: vrsqrt14ps -2048(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x5a,0x80] + vrsqrt14ps -2048(%rdx), %xmm19 + +// CHECK: vrsqrt14ps -2064(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x9a,0xf0,0xf7,0xff,0xff] + vrsqrt14ps -2064(%rdx), %xmm19 + +// CHECK: vrsqrt14ps 508(%rdx){1to4}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x5a,0x7f] + vrsqrt14ps 508(%rdx){1to4}, %xmm19 + +// CHECK: vrsqrt14ps 512(%rdx){1to4}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x9a,0x00,0x02,0x00,0x00] + vrsqrt14ps 512(%rdx){1to4}, %xmm19 + +// CHECK: vrsqrt14ps -512(%rdx){1to4}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x5a,0x80] + vrsqrt14ps -512(%rdx){1to4}, %xmm19 + +// CHECK: vrsqrt14ps -516(%rdx){1to4}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x9a,0xfc,0xfd,0xff,0xff] + vrsqrt14ps -516(%rdx){1to4}, %xmm19 + +// CHECK: vrsqrt14ps %ymm18, %ymm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x4e,0xda] + vrsqrt14ps %ymm18, %ymm27 + +// CHECK: vrsqrt14ps %ymm18, %ymm27 {%k7} +// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x4e,0xda] + vrsqrt14ps %ymm18, %ymm27 {%k7} + +// CHECK: vrsqrt14ps %ymm18, %ymm27 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x4e,0xda] + vrsqrt14ps %ymm18, %ymm27 {%k7} {z} + +// CHECK: vrsqrt14ps (%rcx), %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4e,0x19] + vrsqrt14ps (%rcx), %ymm27 + +// CHECK: vrsqrt14ps 291(%rax,%r14,8), %ymm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x4e,0x9c,0xf0,0x23,0x01,0x00,0x00] + vrsqrt14ps 291(%rax,%r14,8), %ymm27 + +// CHECK: vrsqrt14ps (%rcx){1to8}, %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4e,0x19] + vrsqrt14ps (%rcx){1to8}, %ymm27 + +// CHECK: vrsqrt14ps 4064(%rdx), %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4e,0x5a,0x7f] + vrsqrt14ps 4064(%rdx), %ymm27 + +// CHECK: vrsqrt14ps 4096(%rdx), %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4e,0x9a,0x00,0x10,0x00,0x00] + vrsqrt14ps 4096(%rdx), %ymm27 + +// CHECK: vrsqrt14ps -4096(%rdx), %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4e,0x5a,0x80] + vrsqrt14ps -4096(%rdx), %ymm27 + +// CHECK: vrsqrt14ps -4128(%rdx), %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4e,0x9a,0xe0,0xef,0xff,0xff] + vrsqrt14ps -4128(%rdx), %ymm27 + +// CHECK: vrsqrt14ps 508(%rdx){1to8}, %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4e,0x5a,0x7f] + vrsqrt14ps 508(%rdx){1to8}, %ymm27 + +// CHECK: vrsqrt14ps 512(%rdx){1to8}, %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4e,0x9a,0x00,0x02,0x00,0x00] + vrsqrt14ps 512(%rdx){1to8}, %ymm27 + +// CHECK: vrsqrt14ps -512(%rdx){1to8}, %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4e,0x5a,0x80] + vrsqrt14ps -512(%rdx){1to8}, %ymm27 + +// CHECK: vrsqrt14ps -516(%rdx){1to8}, %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4e,0x9a,0xfc,0xfd,0xff,0xff] + vrsqrt14ps -516(%rdx){1to8}, %ymm27 + // CHECK: vmovapd %xmm22, (%rcx) // CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x29,0x31] vmovapd %xmm22, (%rcx)