1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=SKX
4 ; CHECK-LABEL: test_x86_fmadd_ps_z
5 ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0
7 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
8 %x = fmul <16 x float> %a0, %a1
9 %res = fadd <16 x float> %x, %a2
13 ; CHECK-LABEL: test_x86_fmsub_ps_z
14 ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0
16 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
17 %x = fmul <16 x float> %a0, %a1
18 %res = fsub <16 x float> %x, %a2
22 ; CHECK-LABEL: test_x86_fnmadd_ps_z
23 ; CHECK: vfnmadd213ps %zmm2, %zmm1, %zmm0
25 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
26 %x = fmul <16 x float> %a0, %a1
27 %res = fsub <16 x float> %a2, %x
31 ; CHECK-LABEL: test_x86_fnmsub_ps_z
32 ; CHECK: vfnmsub213ps %zmm2, %zmm1, %zmm0
34 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
35 %x = fmul <16 x float> %a0, %a1
36 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
37 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
38 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
39 float -0.000000e+00>, %x
40 %res = fsub <16 x float> %y, %a2
44 ; CHECK-LABEL: test_x86_fmadd_pd_z
45 ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0
47 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
48 %x = fmul <8 x double> %a0, %a1
49 %res = fadd <8 x double> %x, %a2
53 ; CHECK-LABEL: test_x86_fmsub_pd_z
54 ; CHECK: vfmsub213pd %zmm2, %zmm1, %zmm0
56 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
57 %x = fmul <8 x double> %a0, %a1
58 %res = fsub <8 x double> %x, %a2
62 define double @test_x86_fmsub_sd_z(double %a0, double %a1, double %a2) {
63 %x = fmul double %a0, %a1
64 %res = fsub double %x, %a2
68 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
69 ; CHECK-LABEL: test231_br:
71 ; CHECK-NEXT: vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
72 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
74 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
75 %b2 = fadd <16 x float> %b1, %a2
79 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
80 ; CHECK-LABEL: test213_br:
82 ; CHECK-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
84 %b1 = fmul <16 x float> %a1, %a2
85 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
90 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
91 ; CHECK-LABEL: test_x86_fmadd132_ps:
93 ; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
94 ; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
95 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
96 ; CHECK-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
99 ; SKX-LABEL: test_x86_fmadd132_ps:
101 ; SKX-NEXT: vpmovb2m %xmm2, %k1
102 ; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
104 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
105 %x = fmul <16 x float> %a0, %a2
106 %y = fadd <16 x float> %x, %a1
107 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
108 ret <16 x float> %res
112 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
113 ; CHECK-LABEL: test_x86_fmadd231_ps:
115 ; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
116 ; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
117 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
118 ; CHECK-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
119 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
122 ; SKX-LABEL: test_x86_fmadd231_ps:
124 ; SKX-NEXT: vpmovb2m %xmm2, %k1
125 ; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
126 ; SKX-NEXT: vmovaps %zmm1, %zmm0
128 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
129 %x = fmul <16 x float> %a0, %a2
130 %y = fadd <16 x float> %x, %a1
131 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
132 ret <16 x float> %res
136 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
137 ; CHECK-LABEL: test_x86_fmadd213_ps:
139 ; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
140 ; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
141 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
142 ; CHECK-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
143 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
146 ; SKX-LABEL: test_x86_fmadd213_ps:
148 ; SKX-NEXT: vpmovb2m %xmm2, %k1
149 ; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
150 ; SKX-NEXT: vmovaps %zmm1, %zmm0
152 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
153 %x = fmul <16 x float> %a1, %a0
154 %y = fadd <16 x float> %x, %a2
155 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
156 ret <16 x float> %res