1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=CHECK_FMA
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=CHECK_FMA
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=CHECK_FMA4
6 ; Patterns (+ fneg variants): add(mul(x,y),z), sub(mul(x,y),z)
9 define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
10 ; CHECK_FMA-LABEL: test_x86_fmadd_ps:
12 ; CHECK_FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
13 ; CHECK_FMA-NEXT: retq
15 ; CHECK_FMA4-LABEL: test_x86_fmadd_ps:
17 ; CHECK_FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
18 ; CHECK_FMA4-NEXT: retq
19 %x = fmul <4 x float> %a0, %a1
20 %res = fadd <4 x float> %x, %a2
24 define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
25 ; CHECK_FMA-LABEL: test_x86_fmsub_ps:
27 ; CHECK_FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
28 ; CHECK_FMA-NEXT: retq
30 ; CHECK_FMA4-LABEL: test_x86_fmsub_ps:
32 ; CHECK_FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
33 ; CHECK_FMA4-NEXT: retq
34 %x = fmul <4 x float> %a0, %a1
35 %res = fsub <4 x float> %x, %a2
39 define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
40 ; CHECK_FMA-LABEL: test_x86_fnmadd_ps:
42 ; CHECK_FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
43 ; CHECK_FMA-NEXT: retq
45 ; CHECK_FMA4-LABEL: test_x86_fnmadd_ps:
47 ; CHECK_FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
48 ; CHECK_FMA4-NEXT: retq
49 %x = fmul <4 x float> %a0, %a1
50 %res = fsub <4 x float> %a2, %x
54 define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
55 ; CHECK_FMA-LABEL: test_x86_fnmsub_ps:
57 ; CHECK_FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
58 ; CHECK_FMA-NEXT: retq
60 ; CHECK_FMA4-LABEL: test_x86_fnmsub_ps:
62 ; CHECK_FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
63 ; CHECK_FMA4-NEXT: retq
64 %x = fmul <4 x float> %a0, %a1
65 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
66 %res = fsub <4 x float> %y, %a2
70 define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
71 ; CHECK_FMA-LABEL: test_x86_fmadd_ps_y:
73 ; CHECK_FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
74 ; CHECK_FMA-NEXT: retq
76 ; CHECK_FMA4-LABEL: test_x86_fmadd_ps_y:
78 ; CHECK_FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
79 ; CHECK_FMA4-NEXT: retq
80 %x = fmul <8 x float> %a0, %a1
81 %res = fadd <8 x float> %x, %a2
85 define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
86 ; CHECK_FMA-LABEL: test_x86_fmsub_ps_y:
88 ; CHECK_FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
89 ; CHECK_FMA-NEXT: retq
91 ; CHECK_FMA4-LABEL: test_x86_fmsub_ps_y:
93 ; CHECK_FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
94 ; CHECK_FMA4-NEXT: retq
95 %x = fmul <8 x float> %a0, %a1
96 %res = fsub <8 x float> %x, %a2
100 define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
101 ; CHECK_FMA-LABEL: test_x86_fnmadd_ps_y:
103 ; CHECK_FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
104 ; CHECK_FMA-NEXT: retq
106 ; CHECK_FMA4-LABEL: test_x86_fnmadd_ps_y:
107 ; CHECK_FMA4: # BB#0:
108 ; CHECK_FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
109 ; CHECK_FMA4-NEXT: retq
110 %x = fmul <8 x float> %a0, %a1
111 %res = fsub <8 x float> %a2, %x
115 define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
116 ; CHECK_FMA-LABEL: test_x86_fnmsub_ps_y:
118 ; CHECK_FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
119 ; CHECK_FMA-NEXT: retq
121 ; CHECK_FMA4-LABEL: test_x86_fnmsub_ps_y:
122 ; CHECK_FMA4: # BB#0:
123 ; CHECK_FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
124 ; CHECK_FMA4-NEXT: retq
125 %x = fmul <8 x float> %a0, %a1
126 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
127 %res = fsub <8 x float> %y, %a2
131 define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
132 ; CHECK_FMA-LABEL: test_x86_fmadd_pd_y:
134 ; CHECK_FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
135 ; CHECK_FMA-NEXT: retq
137 ; CHECK_FMA4-LABEL: test_x86_fmadd_pd_y:
138 ; CHECK_FMA4: # BB#0:
139 ; CHECK_FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
140 ; CHECK_FMA4-NEXT: retq
141 %x = fmul <4 x double> %a0, %a1
142 %res = fadd <4 x double> %x, %a2
143 ret <4 x double> %res
146 define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
147 ; CHECK_FMA-LABEL: test_x86_fmsub_pd_y:
149 ; CHECK_FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
150 ; CHECK_FMA-NEXT: retq
152 ; CHECK_FMA4-LABEL: test_x86_fmsub_pd_y:
153 ; CHECK_FMA4: # BB#0:
154 ; CHECK_FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
155 ; CHECK_FMA4-NEXT: retq
156 %x = fmul <4 x double> %a0, %a1
157 %res = fsub <4 x double> %x, %a2
158 ret <4 x double> %res
161 define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
162 ; CHECK_FMA-LABEL: test_x86_fmsub_pd:
164 ; CHECK_FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
165 ; CHECK_FMA-NEXT: retq
167 ; CHECK_FMA4-LABEL: test_x86_fmsub_pd:
168 ; CHECK_FMA4: # BB#0:
169 ; CHECK_FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
170 ; CHECK_FMA4-NEXT: retq
171 %x = fmul <2 x double> %a0, %a1
172 %res = fsub <2 x double> %x, %a2
173 ret <2 x double> %res
176 define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
177 ; CHECK_FMA-LABEL: test_x86_fnmadd_ss:
179 ; CHECK_FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
180 ; CHECK_FMA-NEXT: retq
182 ; CHECK_FMA4-LABEL: test_x86_fnmadd_ss:
183 ; CHECK_FMA4: # BB#0:
184 ; CHECK_FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
185 ; CHECK_FMA4-NEXT: retq
186 %x = fmul float %a0, %a1
187 %res = fsub float %a2, %x
191 define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
192 ; CHECK_FMA-LABEL: test_x86_fnmadd_sd:
194 ; CHECK_FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
195 ; CHECK_FMA-NEXT: retq
197 ; CHECK_FMA4-LABEL: test_x86_fnmadd_sd:
198 ; CHECK_FMA4: # BB#0:
199 ; CHECK_FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
200 ; CHECK_FMA4-NEXT: retq
201 %x = fmul double %a0, %a1
202 %res = fsub double %a2, %x
206 define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
207 ; CHECK_FMA-LABEL: test_x86_fmsub_sd:
209 ; CHECK_FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
210 ; CHECK_FMA-NEXT: retq
212 ; CHECK_FMA4-LABEL: test_x86_fmsub_sd:
213 ; CHECK_FMA4: # BB#0:
214 ; CHECK_FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
215 ; CHECK_FMA4-NEXT: retq
216 %x = fmul double %a0, %a1
217 %res = fsub double %x, %a2
221 define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
222 ; CHECK_FMA-LABEL: test_x86_fnmsub_ss:
224 ; CHECK_FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
225 ; CHECK_FMA-NEXT: retq
227 ; CHECK_FMA4-LABEL: test_x86_fnmsub_ss:
228 ; CHECK_FMA4: # BB#0:
229 ; CHECK_FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
230 ; CHECK_FMA4-NEXT: retq
231 %x = fsub float -0.000000e+00, %a0
232 %y = fmul float %x, %a1
233 %res = fsub float %y, %a2
237 define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
238 ; CHECK_FMA-LABEL: test_x86_fmadd_ps_load:
240 ; CHECK_FMA-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0
241 ; CHECK_FMA-NEXT: retq
243 ; CHECK_FMA4-LABEL: test_x86_fmadd_ps_load:
244 ; CHECK_FMA4: # BB#0:
245 ; CHECK_FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
246 ; CHECK_FMA4-NEXT: retq
247 %x = load <4 x float>, <4 x float>* %a0
248 %y = fmul <4 x float> %x, %a1
249 %res = fadd <4 x float> %y, %a2
253 define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
254 ; CHECK_FMA-LABEL: test_x86_fmsub_ps_load:
256 ; CHECK_FMA-NEXT: vfmsub132ps (%rdi), %xmm1, %xmm0
257 ; CHECK_FMA-NEXT: retq
259 ; CHECK_FMA4-LABEL: test_x86_fmsub_ps_load:
260 ; CHECK_FMA4: # BB#0:
261 ; CHECK_FMA4-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
262 ; CHECK_FMA4-NEXT: retq
263 %x = load <4 x float>, <4 x float>* %a0
264 %y = fmul <4 x float> %x, %a1
265 %res = fsub <4 x float> %y, %a2
270 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
273 define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
274 ; CHECK_FMA-LABEL: test_v4f32_mul_add_x_one_y:
276 ; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
277 ; CHECK_FMA-NEXT: retq
279 ; CHECK_FMA4-LABEL: test_v4f32_mul_add_x_one_y:
280 ; CHECK_FMA4: # BB#0:
281 ; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
282 ; CHECK_FMA4-NEXT: retq
283 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
284 %m = fmul <4 x float> %a, %y
288 define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
289 ; CHECK_FMA-LABEL: test_v4f32_mul_y_add_x_one:
291 ; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
292 ; CHECK_FMA-NEXT: retq
294 ; CHECK_FMA4-LABEL: test_v4f32_mul_y_add_x_one:
295 ; CHECK_FMA4: # BB#0:
296 ; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
297 ; CHECK_FMA4-NEXT: retq
298 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
299 %m = fmul <4 x float> %y, %a
303 define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
304 ; CHECK_FMA-LABEL: test_v4f32_mul_add_x_negone_y:
306 ; CHECK_FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
307 ; CHECK_FMA-NEXT: retq
309 ; CHECK_FMA4-LABEL: test_v4f32_mul_add_x_negone_y:
310 ; CHECK_FMA4: # BB#0:
311 ; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
312 ; CHECK_FMA4-NEXT: retq
313 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
314 %m = fmul <4 x float> %a, %y
318 define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
319 ; CHECK_FMA-LABEL: test_v4f32_mul_y_add_x_negone:
321 ; CHECK_FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
322 ; CHECK_FMA-NEXT: retq
324 ; CHECK_FMA4-LABEL: test_v4f32_mul_y_add_x_negone:
325 ; CHECK_FMA4: # BB#0:
326 ; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
327 ; CHECK_FMA4-NEXT: retq
328 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
329 %m = fmul <4 x float> %y, %a
333 define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
334 ; CHECK_FMA-LABEL: test_v4f32_mul_sub_one_x_y:
336 ; CHECK_FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
337 ; CHECK_FMA-NEXT: retq
339 ; CHECK_FMA4-LABEL: test_v4f32_mul_sub_one_x_y:
340 ; CHECK_FMA4: # BB#0:
341 ; CHECK_FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
342 ; CHECK_FMA4-NEXT: retq
343 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
344 %m = fmul <4 x float> %s, %y
348 define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
349 ; CHECK_FMA-LABEL: test_v4f32_mul_y_sub_one_x:
351 ; CHECK_FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
352 ; CHECK_FMA-NEXT: retq
354 ; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_one_x:
355 ; CHECK_FMA4: # BB#0:
356 ; CHECK_FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
357 ; CHECK_FMA4-NEXT: retq
358 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
359 %m = fmul <4 x float> %y, %s
363 define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
364 ; CHECK_FMA-LABEL: test_v4f32_mul_sub_negone_x_y:
366 ; CHECK_FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
367 ; CHECK_FMA-NEXT: retq
369 ; CHECK_FMA4-LABEL: test_v4f32_mul_sub_negone_x_y:
370 ; CHECK_FMA4: # BB#0:
371 ; CHECK_FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
372 ; CHECK_FMA4-NEXT: retq
373 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
374 %m = fmul <4 x float> %s, %y
378 define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
379 ; CHECK_FMA-LABEL: test_v4f32_mul_y_sub_negone_x:
381 ; CHECK_FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
382 ; CHECK_FMA-NEXT: retq
384 ; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_negone_x:
385 ; CHECK_FMA4: # BB#0:
386 ; CHECK_FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
387 ; CHECK_FMA4-NEXT: retq
388 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
389 %m = fmul <4 x float> %y, %s
393 define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
394 ; CHECK_FMA-LABEL: test_v4f32_mul_sub_x_one_y:
396 ; CHECK_FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
397 ; CHECK_FMA-NEXT: retq
399 ; CHECK_FMA4-LABEL: test_v4f32_mul_sub_x_one_y:
400 ; CHECK_FMA4: # BB#0:
401 ; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
402 ; CHECK_FMA4-NEXT: retq
403 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
404 %m = fmul <4 x float> %s, %y
408 define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
409 ; CHECK_FMA-LABEL: test_v4f32_mul_y_sub_x_one:
411 ; CHECK_FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
412 ; CHECK_FMA-NEXT: retq
414 ; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_x_one:
415 ; CHECK_FMA4: # BB#0:
416 ; CHECK_FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
417 ; CHECK_FMA4-NEXT: retq
418 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
419 %m = fmul <4 x float> %y, %s
423 define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
424 ; CHECK_FMA-LABEL: test_v4f32_mul_sub_x_negone_y:
426 ; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
427 ; CHECK_FMA-NEXT: retq
429 ; CHECK_FMA4-LABEL: test_v4f32_mul_sub_x_negone_y:
430 ; CHECK_FMA4: # BB#0:
431 ; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
432 ; CHECK_FMA4-NEXT: retq
433 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
434 %m = fmul <4 x float> %s, %y
438 define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
439 ; CHECK_FMA-LABEL: test_v4f32_mul_y_sub_x_negone:
441 ; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
442 ; CHECK_FMA-NEXT: retq
444 ; CHECK_FMA4-LABEL: test_v4f32_mul_y_sub_x_negone:
445 ; CHECK_FMA4: # BB#0:
446 ; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
447 ; CHECK_FMA4-NEXT: retq
448 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
449 %m = fmul <4 x float> %y, %s
454 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
457 define float @test_f32_interp(float %x, float %y, float %t) {
458 ; CHECK_FMA-LABEL: test_f32_interp:
460 ; CHECK_FMA-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1
461 ; CHECK_FMA-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0
462 ; CHECK_FMA-NEXT: retq
464 ; CHECK_FMA4-LABEL: test_f32_interp:
465 ; CHECK_FMA4: # BB#0:
466 ; CHECK_FMA4-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
467 ; CHECK_FMA4-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
468 ; CHECK_FMA4-NEXT: retq
469 %t1 = fsub float 1.0, %t
470 %tx = fmul float %x, %t
471 %ty = fmul float %y, %t1
472 %r = fadd float %tx, %ty
476 define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
477 ; CHECK_FMA-LABEL: test_v4f32_interp:
479 ; CHECK_FMA-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1
480 ; CHECK_FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0
481 ; CHECK_FMA-NEXT: retq
483 ; CHECK_FMA4-LABEL: test_v4f32_interp:
484 ; CHECK_FMA4: # BB#0:
485 ; CHECK_FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
486 ; CHECK_FMA4-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
487 ; CHECK_FMA4-NEXT: retq
488 %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
489 %tx = fmul <4 x float> %x, %t
490 %ty = fmul <4 x float> %y, %t1
491 %r = fadd <4 x float> %tx, %ty
495 define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
496 ; CHECK_FMA-LABEL: test_v8f32_interp:
498 ; CHECK_FMA-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1
499 ; CHECK_FMA-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0
500 ; CHECK_FMA-NEXT: retq
502 ; CHECK_FMA4-LABEL: test_v8f32_interp:
503 ; CHECK_FMA4: # BB#0:
504 ; CHECK_FMA4-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
505 ; CHECK_FMA4-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
506 ; CHECK_FMA4-NEXT: retq
507 %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
508 %tx = fmul <8 x float> %x, %t
509 %ty = fmul <8 x float> %y, %t1
510 %r = fadd <8 x float> %tx, %ty
514 define double @test_f64_interp(double %x, double %y, double %t) {
515 ; CHECK_FMA-LABEL: test_f64_interp:
517 ; CHECK_FMA-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1
518 ; CHECK_FMA-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0
519 ; CHECK_FMA-NEXT: retq
521 ; CHECK_FMA4-LABEL: test_f64_interp:
522 ; CHECK_FMA4: # BB#0:
523 ; CHECK_FMA4-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
524 ; CHECK_FMA4-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
525 ; CHECK_FMA4-NEXT: retq
526 %t1 = fsub double 1.0, %t
527 %tx = fmul double %x, %t
528 %ty = fmul double %y, %t1
529 %r = fadd double %tx, %ty
533 define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
534 ; CHECK_FMA-LABEL: test_v2f64_interp:
536 ; CHECK_FMA-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1
537 ; CHECK_FMA-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0
538 ; CHECK_FMA-NEXT: retq
540 ; CHECK_FMA4-LABEL: test_v2f64_interp:
541 ; CHECK_FMA4: # BB#0:
542 ; CHECK_FMA4-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
543 ; CHECK_FMA4-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
544 ; CHECK_FMA4-NEXT: retq
545 %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
546 %tx = fmul <2 x double> %x, %t
547 %ty = fmul <2 x double> %y, %t1
548 %r = fadd <2 x double> %tx, %ty
552 define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
553 ; CHECK_FMA-LABEL: test_v4f64_interp:
555 ; CHECK_FMA-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1
556 ; CHECK_FMA-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0
557 ; CHECK_FMA-NEXT: retq
559 ; CHECK_FMA4-LABEL: test_v4f64_interp:
560 ; CHECK_FMA4: # BB#0:
561 ; CHECK_FMA4-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
562 ; CHECK_FMA4-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
563 ; CHECK_FMA4-NEXT: retq
564 %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
565 %tx = fmul <4 x double> %x, %t
566 %ty = fmul <4 x double> %y, %t1
567 %r = fadd <4 x double> %tx, %ty
571 ; (fneg (fma x, y, z)) -> (fma x, -y, -z)
573 define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
574 ; CHECK_FMA-LABEL: test_v4f32_fneg_fmadd:
576 ; CHECK_FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
577 ; CHECK_FMA-NEXT: retq
579 ; CHECK_FMA4-LABEL: test_v4f32_fneg_fmadd:
580 ; CHECK_FMA4: # BB#0:
581 ; CHECK_FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
582 ; CHECK_FMA4-NEXT: retq
583 %mul = fmul <4 x float> %a0, %a1
584 %add = fadd <4 x float> %mul, %a2
585 %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
589 define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
590 ; CHECK_FMA-LABEL: test_v4f64_fneg_fmsub:
592 ; CHECK_FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
593 ; CHECK_FMA-NEXT: retq
595 ; CHECK_FMA4-LABEL: test_v4f64_fneg_fmsub:
596 ; CHECK_FMA4: # BB#0:
597 ; CHECK_FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
598 ; CHECK_FMA4-NEXT: retq
599 %mul = fmul <4 x double> %a0, %a1
600 %sub = fsub <4 x double> %mul, %a2
601 %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
602 ret <4 x double> %neg
605 define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
606 ; CHECK_FMA-LABEL: test_v4f32_fneg_fnmadd:
608 ; CHECK_FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
609 ; CHECK_FMA-NEXT: retq
611 ; CHECK_FMA4-LABEL: test_v4f32_fneg_fnmadd:
612 ; CHECK_FMA4: # BB#0:
613 ; CHECK_FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
614 ; CHECK_FMA4-NEXT: retq
615 %mul = fmul <4 x float> %a0, %a1
616 %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
617 %add = fadd <4 x float> %neg0, %a2
618 %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
619 ret <4 x float> %neg1
622 define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
623 ; CHECK_FMA-LABEL: test_v4f64_fneg_fnmsub:
625 ; CHECK_FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
626 ; CHECK_FMA-NEXT: retq
628 ; CHECK_FMA4-LABEL: test_v4f64_fneg_fnmsub:
629 ; CHECK_FMA4: # BB#0:
630 ; CHECK_FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
631 ; CHECK_FMA4-NEXT: retq
632 %mul = fmul <4 x double> %a0, %a1
633 %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
634 %sub = fsub <4 x double> %neg0, %a2
635 %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
636 ret <4 x double> %neg1
639 ; (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
641 define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
642 ; ALL-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
644 ; ALL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
646 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
647 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
648 %a = fadd <4 x float> %m0, %m1
652 ; (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
654 define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
655 ; CHECK_FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
657 ; CHECK_FMA-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
658 ; CHECK_FMA-NEXT: retq
660 ; CHECK_FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
661 ; CHECK_FMA4: # BB#0:
662 ; CHECK_FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
663 ; CHECK_FMA4-NEXT: retq
664 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
665 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
666 %a = fadd <4 x float> %m1, %y
670 attributes #0 = { "unsafe-fp-math"="true" }