1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=AVX512
8 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
11 define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
12 ; FMA-LABEL: test_16f32_fmadd:
14 ; FMA-NEXT: vfmadd213ps %ymm4, %ymm2, %ymm0
15 ; FMA-NEXT: vfmadd213ps %ymm5, %ymm3, %ymm1
18 ; FMA4-LABEL: test_16f32_fmadd:
20 ; FMA4-NEXT: vfmaddps %ymm4, %ymm2, %ymm0, %ymm0
21 ; FMA4-NEXT: vfmaddps %ymm5, %ymm3, %ymm1, %ymm1
24 ; AVX512-LABEL: test_16f32_fmadd:
26 ; AVX512-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0
28 %x = fmul <16 x float> %a0, %a1
29 %res = fadd <16 x float> %x, %a2
33 define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
34 ; FMA-LABEL: test_8f64_fmadd:
36 ; FMA-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0
37 ; FMA-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1
40 ; FMA4-LABEL: test_8f64_fmadd:
42 ; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
43 ; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
46 ; AVX512-LABEL: test_8f64_fmadd:
48 ; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0
50 %x = fmul <8 x double> %a0, %a1
51 %res = fadd <8 x double> %x, %a2
56 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
59 define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
60 ; FMA-LABEL: test_16f32_fmsub:
62 ; FMA-NEXT: vfmsub213ps %ymm4, %ymm2, %ymm0
63 ; FMA-NEXT: vfmsub213ps %ymm5, %ymm3, %ymm1
66 ; FMA4-LABEL: test_16f32_fmsub:
68 ; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
69 ; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
72 ; AVX512-LABEL: test_16f32_fmsub:
74 ; AVX512-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
76 %x = fmul <16 x float> %a0, %a1
77 %res = fsub <16 x float> %x, %a2
81 define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
82 ; FMA-LABEL: test_8f64_fmsub:
84 ; FMA-NEXT: vfmsub213pd %ymm4, %ymm2, %ymm0
85 ; FMA-NEXT: vfmsub213pd %ymm5, %ymm3, %ymm1
88 ; FMA4-LABEL: test_8f64_fmsub:
90 ; FMA4-NEXT: vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0
91 ; FMA4-NEXT: vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1
94 ; AVX512-LABEL: test_8f64_fmsub:
96 ; AVX512-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0
98 %x = fmul <8 x double> %a0, %a1
99 %res = fsub <8 x double> %x, %a2
100 ret <8 x double> %res
104 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
107 define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
108 ; FMA-LABEL: test_16f32_fnmadd:
110 ; FMA-NEXT: vfnmadd213ps %ymm4, %ymm2, %ymm0
111 ; FMA-NEXT: vfnmadd213ps %ymm5, %ymm3, %ymm1
114 ; FMA4-LABEL: test_16f32_fnmadd:
116 ; FMA4-NEXT: vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0
117 ; FMA4-NEXT: vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1
120 ; AVX512-LABEL: test_16f32_fnmadd:
122 ; AVX512-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0
124 %x = fmul <16 x float> %a0, %a1
125 %res = fsub <16 x float> %a2, %x
126 ret <16 x float> %res
129 define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
130 ; FMA-LABEL: test_8f64_fnmadd:
132 ; FMA-NEXT: vfnmadd213pd %ymm4, %ymm2, %ymm0
133 ; FMA-NEXT: vfnmadd213pd %ymm5, %ymm3, %ymm1
136 ; FMA4-LABEL: test_8f64_fnmadd:
138 ; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
139 ; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
142 ; AVX512-LABEL: test_8f64_fnmadd:
144 ; AVX512-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0
146 %x = fmul <8 x double> %a0, %a1
147 %res = fsub <8 x double> %a2, %x
148 ret <8 x double> %res
152 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
155 define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
156 ; FMA-LABEL: test_16f32_fnmsub:
158 ; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0
159 ; FMA-NEXT: vfnmsub213ps %ymm5, %ymm3, %ymm1
162 ; FMA4-LABEL: test_16f32_fnmsub:
164 ; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
165 ; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
168 ; AVX512-LABEL: test_16f32_fnmsub:
170 ; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
172 %x = fmul <16 x float> %a0, %a1
173 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
174 %res = fsub <16 x float> %y, %a2
175 ret <16 x float> %res
178 define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
179 ; FMA-LABEL: test_8f64_fnmsub:
181 ; FMA-NEXT: vfnmsub213pd %ymm4, %ymm2, %ymm0
182 ; FMA-NEXT: vfnmsub213pd %ymm5, %ymm3, %ymm1
185 ; FMA4-LABEL: test_8f64_fnmsub:
187 ; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
188 ; FMA4-NEXT: vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1
191 ; AVX512-LABEL: test_8f64_fnmsub:
193 ; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0
195 %x = fmul <8 x double> %a0, %a1
196 %y = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
197 %res = fsub <8 x double> %y, %a2
198 ret <8 x double> %res
202 ; Load Folding Patterns
205 define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1, <16 x float> %a2) {
206 ; FMA-LABEL: test_16f32_fmadd_load:
208 ; FMA-NEXT: vfmadd132ps (%rdi), %ymm2, %ymm0
209 ; FMA-NEXT: vfmadd132ps 32(%rdi), %ymm3, %ymm1
212 ; FMA4-LABEL: test_16f32_fmadd_load:
214 ; FMA4-NEXT: vfmaddps %ymm2, (%rdi), %ymm0, %ymm0
215 ; FMA4-NEXT: vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1
218 ; AVX512-LABEL: test_16f32_fmadd_load:
220 ; AVX512-NEXT: vmovaps (%rdi), %zmm2
221 ; AVX512-NEXT: vfmadd213ps %zmm1, %zmm0, %zmm2
222 ; AVX512-NEXT: vmovaps %zmm2, %zmm0
224 %x = load <16 x float>, <16 x float>* %a0
225 %y = fmul <16 x float> %x, %a1
226 %res = fadd <16 x float> %y, %a2
227 ret <16 x float> %res
230 define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <8 x double> %a2) {
231 ; FMA-LABEL: test_8f64_fmsub_load:
233 ; FMA-NEXT: vfmsub132pd (%rdi), %ymm2, %ymm0
234 ; FMA-NEXT: vfmsub132pd 32(%rdi), %ymm3, %ymm1
237 ; FMA4-LABEL: test_8f64_fmsub_load:
239 ; FMA4-NEXT: vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0
240 ; FMA4-NEXT: vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1
243 ; AVX512-LABEL: test_8f64_fmsub_load:
245 ; AVX512-NEXT: vmovapd (%rdi), %zmm2
246 ; AVX512-NEXT: vfmsub213pd %zmm1, %zmm0, %zmm2
247 ; AVX512-NEXT: vmovaps %zmm2, %zmm0
249 %x = load <8 x double>, <8 x double>* %a0
250 %y = fmul <8 x double> %x, %a1
251 %res = fsub <8 x double> %y, %a2
252 ret <8 x double> %res
256 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
259 define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
260 ; FMA-LABEL: test_v16f32_mul_add_x_one_y:
262 ; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0
263 ; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1
266 ; FMA4-LABEL: test_v16f32_mul_add_x_one_y:
268 ; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
269 ; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
272 ; AVX512-LABEL: test_v16f32_mul_add_x_one_y:
274 ; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0
276 %a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
277 %m = fmul <16 x float> %a, %y
281 define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
282 ; FMA-LABEL: test_v8f64_mul_y_add_x_one:
284 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0
285 ; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1
288 ; FMA4-LABEL: test_v8f64_mul_y_add_x_one:
290 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
291 ; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
294 ; AVX512-LABEL: test_v8f64_mul_y_add_x_one:
296 ; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0
298 %a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
299 %m = fmul <8 x double> %y, %a
303 define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
304 ; FMA-LABEL: test_v16f32_mul_add_x_negone_y:
306 ; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0
307 ; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1
310 ; FMA4-LABEL: test_v16f32_mul_add_x_negone_y:
312 ; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
313 ; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
316 ; AVX512-LABEL: test_v16f32_mul_add_x_negone_y:
318 ; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0
320 %a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
321 %m = fmul <16 x float> %a, %y
325 define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
326 ; FMA-LABEL: test_v8f64_mul_y_add_x_negone:
328 ; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0
329 ; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1
332 ; FMA4-LABEL: test_v8f64_mul_y_add_x_negone:
334 ; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
335 ; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
338 ; AVX512-LABEL: test_v8f64_mul_y_add_x_negone:
340 ; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0
342 %a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
343 %m = fmul <8 x double> %y, %a
347 define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
348 ; FMA-LABEL: test_v16f32_mul_sub_one_x_y:
350 ; FMA-NEXT: vfnmadd213ps %ymm2, %ymm2, %ymm0
351 ; FMA-NEXT: vfnmadd213ps %ymm3, %ymm3, %ymm1
354 ; FMA4-LABEL: test_v16f32_mul_sub_one_x_y:
356 ; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
357 ; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
360 ; AVX512-LABEL: test_v16f32_mul_sub_one_x_y:
362 ; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm0
364 %s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
365 %m = fmul <16 x float> %s, %y
369 define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
370 ; FMA-LABEL: test_v8f64_mul_y_sub_one_x:
372 ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm2, %ymm0
373 ; FMA-NEXT: vfnmadd213pd %ymm3, %ymm3, %ymm1
376 ; FMA4-LABEL: test_v8f64_mul_y_sub_one_x:
378 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
379 ; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
382 ; AVX512-LABEL: test_v8f64_mul_y_sub_one_x:
384 ; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm0
386 %s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x
387 %m = fmul <8 x double> %y, %s
391 define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
392 ; FMA-LABEL: test_v16f32_mul_sub_negone_x_y:
394 ; FMA-NEXT: vfnmsub213ps %ymm2, %ymm2, %ymm0
395 ; FMA-NEXT: vfnmsub213ps %ymm3, %ymm3, %ymm1
398 ; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y:
400 ; FMA4-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
401 ; FMA4-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
404 ; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y:
406 ; AVX512-NEXT: vfnmsub213ps %zmm1, %zmm1, %zmm0
408 %s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x
409 %m = fmul <16 x float> %s, %y
413 define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
414 ; FMA-LABEL: test_v8f64_mul_y_sub_negone_x:
416 ; FMA-NEXT: vfnmsub213pd %ymm2, %ymm2, %ymm0
417 ; FMA-NEXT: vfnmsub213pd %ymm3, %ymm3, %ymm1
420 ; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x:
422 ; FMA4-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
423 ; FMA4-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
426 ; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x:
428 ; AVX512-NEXT: vfnmsub213pd %zmm1, %zmm1, %zmm0
430 %s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x
431 %m = fmul <8 x double> %y, %s
435 define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
436 ; FMA-LABEL: test_v16f32_mul_sub_x_one_y:
438 ; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0
439 ; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1
442 ; FMA4-LABEL: test_v16f32_mul_sub_x_one_y:
444 ; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
445 ; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
448 ; AVX512-LABEL: test_v16f32_mul_sub_x_one_y:
450 ; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0
452 %s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
453 %m = fmul <16 x float> %s, %y
457 define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
458 ; FMA-LABEL: test_v8f64_mul_y_sub_x_one:
460 ; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0
461 ; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1
464 ; FMA4-LABEL: test_v8f64_mul_y_sub_x_one:
466 ; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
467 ; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
470 ; AVX512-LABEL: test_v8f64_mul_y_sub_x_one:
472 ; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0
474 %s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
475 %m = fmul <8 x double> %y, %s
479 define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
480 ; FMA-LABEL: test_v16f32_mul_sub_x_negone_y:
482 ; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0
483 ; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1
486 ; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y:
488 ; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
489 ; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
492 ; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y:
494 ; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0
496 %s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
497 %m = fmul <16 x float> %s, %y
501 define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
502 ; FMA-LABEL: test_v8f64_mul_y_sub_x_negone:
504 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0
505 ; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1
508 ; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone:
510 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
511 ; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
514 ; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone:
516 ; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0
518 %s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
519 %m = fmul <8 x double> %y, %s
524 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
527 define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
528 ; FMA-LABEL: test_v16f32_interp:
530 ; FMA-NEXT: vfnmadd213ps %ymm3, %ymm5, %ymm3
531 ; FMA-NEXT: vfnmadd213ps %ymm2, %ymm4, %ymm2
532 ; FMA-NEXT: vfmadd213ps %ymm2, %ymm4, %ymm0
533 ; FMA-NEXT: vfmadd213ps %ymm3, %ymm5, %ymm1
536 ; FMA4-LABEL: test_v16f32_interp:
538 ; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3
539 ; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2
540 ; FMA4-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
541 ; FMA4-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
544 ; AVX512-LABEL: test_v16f32_interp:
546 ; AVX512-NEXT: vmovaps %zmm2, %zmm3
547 ; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm3
548 ; AVX512-NEXT: vfmadd213ps %zmm3, %zmm2, %zmm0
550 %t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
551 %tx = fmul <16 x float> %x, %t
552 %ty = fmul <16 x float> %y, %t1
553 %r = fadd <16 x float> %tx, %ty
557 define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
558 ; FMA-LABEL: test_v8f64_interp:
560 ; FMA-NEXT: vfnmadd213pd %ymm3, %ymm5, %ymm3
561 ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm4, %ymm2
562 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm4, %ymm0
563 ; FMA-NEXT: vfmadd213pd %ymm3, %ymm5, %ymm1
566 ; FMA4-LABEL: test_v8f64_interp:
568 ; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3
569 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2
570 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
571 ; FMA4-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
574 ; AVX512-LABEL: test_v8f64_interp:
576 ; AVX512-NEXT: vmovaps %zmm2, %zmm3
577 ; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm3
578 ; AVX512-NEXT: vfmadd213pd %zmm3, %zmm2, %zmm0
580 %t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
581 %tx = fmul <8 x double> %x, %t
582 %ty = fmul <8 x double> %y, %t1
583 %r = fadd <8 x double> %tx, %ty
588 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
591 define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
592 ; FMA-LABEL: test_v16f32_fneg_fmadd:
594 ; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0
595 ; FMA-NEXT: vfnmsub213ps %ymm5, %ymm3, %ymm1
598 ; FMA4-LABEL: test_v16f32_fneg_fmadd:
600 ; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
601 ; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
604 ; AVX512-LABEL: test_v16f32_fneg_fmadd:
606 ; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
608 %mul = fmul <16 x float> %a0, %a1
609 %add = fadd <16 x float> %mul, %a2
610 %neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
611 ret <16 x float> %neg
614 define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
615 ; FMA-LABEL: test_v8f64_fneg_fmsub:
617 ; FMA-NEXT: vfnmadd213pd %ymm4, %ymm2, %ymm0
618 ; FMA-NEXT: vfnmadd213pd %ymm5, %ymm3, %ymm1
621 ; FMA4-LABEL: test_v8f64_fneg_fmsub:
623 ; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
624 ; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
627 ; AVX512-LABEL: test_v8f64_fneg_fmsub:
629 ; AVX512-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0
631 %mul = fmul <8 x double> %a0, %a1
632 %sub = fsub <8 x double> %mul, %a2
633 %neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
634 ret <8 x double> %neg
637 define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
638 ; FMA-LABEL: test_v16f32_fneg_fnmadd:
640 ; FMA-NEXT: vfmsub213ps %ymm4, %ymm2, %ymm0
641 ; FMA-NEXT: vfmsub213ps %ymm5, %ymm3, %ymm1
644 ; FMA4-LABEL: test_v16f32_fneg_fnmadd:
646 ; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
647 ; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
650 ; AVX512-LABEL: test_v16f32_fneg_fnmadd:
652 ; AVX512-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
654 %mul = fmul <16 x float> %a0, %a1
655 %neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
656 %add = fadd <16 x float> %neg0, %a2
657 %neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
658 ret <16 x float> %neg1
661 define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
662 ; FMA-LABEL: test_v8f64_fneg_fnmsub:
664 ; FMA-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0
665 ; FMA-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1
668 ; FMA4-LABEL: test_v8f64_fneg_fnmsub:
670 ; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
671 ; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
674 ; AVX512-LABEL: test_v8f64_fneg_fnmsub:
676 ; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0
678 %mul = fmul <8 x double> %a0, %a1
679 %neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
680 %sub = fsub <8 x double> %neg0, %a2
681 %neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
682 ret <8 x double> %neg1
686 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
689 define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
690 ; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
692 ; FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
693 ; FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
696 ; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
698 ; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
699 ; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
702 ; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
704 ; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
706 %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
707 %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
708 %a = fadd <16 x float> %m0, %m1
713 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
716 define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 {
717 ; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
719 ; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm2, %ymm0
720 ; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm3, %ymm1
723 ; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
725 ; FMA4-NEXT: vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
726 ; FMA4-NEXT: vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
729 ; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
731 ; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %zmm0, %zmm1
732 ; AVX512-NEXT: vmovaps %zmm1, %zmm0
734 %m0 = fmul <16 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
735 %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
736 %a = fadd <16 x float> %m1, %y
740 attributes #0 = { "unsafe-fp-math"="true" }