1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
5 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
6 ; ALL-LABEL: test_x86_fmadd_ps_z:
8 ; ALL-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0
10 %x = fmul <16 x float> %a0, %a1
11 %res = fadd <16 x float> %x, %a2
15 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
16 ; ALL-LABEL: test_x86_fmsub_ps_z:
18 ; ALL-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
20 %x = fmul <16 x float> %a0, %a1
21 %res = fsub <16 x float> %x, %a2
25 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
26 ; ALL-LABEL: test_x86_fnmadd_ps_z:
28 ; ALL-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0
30 %x = fmul <16 x float> %a0, %a1
31 %res = fsub <16 x float> %a2, %x
35 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
36 ; ALL-LABEL: test_x86_fnmsub_ps_z:
38 ; ALL-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
40 %x = fmul <16 x float> %a0, %a1
41 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
42 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
43 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
44 float -0.000000e+00>, %x
45 %res = fsub <16 x float> %y, %a2
49 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
50 ; ALL-LABEL: test_x86_fmadd_pd_z:
52 ; ALL-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0
54 %x = fmul <8 x double> %a0, %a1
55 %res = fadd <8 x double> %x, %a2
59 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
60 ; ALL-LABEL: test_x86_fmsub_pd_z:
62 ; ALL-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0
64 %x = fmul <8 x double> %a0, %a1
65 %res = fsub <8 x double> %x, %a2
69 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
70 ; ALL-LABEL: test_x86_fmsub_213:
72 ; ALL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
73 ; ALL-NEXT: vmovaps %zmm1, %zmm0
75 %x = fmul double %a0, %a1
76 %res = fsub double %x, %a2
80 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
81 ; KNL-LABEL: test_x86_fmsub_213_m:
83 ; KNL-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1
84 ; KNL-NEXT: vmovaps %zmm1, %zmm0
87 ; SKX-LABEL: test_x86_fmsub_213_m:
89 ; SKX-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0
91 %a2 = load double , double *%a2_ptr
92 %x = fmul double %a0, %a1
93 %res = fsub double %x, %a2
97 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
98 ; ALL-LABEL: test_x86_fmsub_231_m:
100 ; ALL-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1
101 ; ALL-NEXT: vmovaps %zmm1, %zmm0
103 %a2 = load double , double *%a2_ptr
104 %x = fmul double %a0, %a2
105 %res = fsub double %x, %a1
109 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
110 ; ALL-LABEL: test231_br:
112 ; ALL-NEXT: vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
113 ; ALL-NEXT: vmovaps %zmm1, %zmm0
115 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
116 %b2 = fadd <16 x float> %b1, %a2
120 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
121 ; ALL-LABEL: test213_br:
123 ; ALL-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
125 %b1 = fmul <16 x float> %a1, %a2
126 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
131 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
132 ; KNL-LABEL: test_x86_fmadd132_ps:
134 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
135 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
136 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
137 ; KNL-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
140 ; SKX-LABEL: test_x86_fmadd132_ps:
142 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
143 ; SKX-NEXT: vpmovb2m %xmm2, %k1
144 ; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
146 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
147 %x = fmul <16 x float> %a0, %a2
148 %y = fadd <16 x float> %x, %a1
149 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
150 ret <16 x float> %res
154 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
155 ; KNL-LABEL: test_x86_fmadd231_ps:
157 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
158 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
159 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
160 ; KNL-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
161 ; KNL-NEXT: vmovaps %zmm1, %zmm0
164 ; SKX-LABEL: test_x86_fmadd231_ps:
166 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
167 ; SKX-NEXT: vpmovb2m %xmm2, %k1
168 ; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
169 ; SKX-NEXT: vmovaps %zmm1, %zmm0
171 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
172 %x = fmul <16 x float> %a0, %a2
173 %y = fadd <16 x float> %x, %a1
174 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
175 ret <16 x float> %res
179 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
180 ; KNL-LABEL: test_x86_fmadd213_ps:
182 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
183 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
184 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
185 ; KNL-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
186 ; KNL-NEXT: vmovaps %zmm1, %zmm0
189 ; SKX-LABEL: test_x86_fmadd213_ps:
191 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
192 ; SKX-NEXT: vpmovb2m %xmm2, %k1
193 ; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
194 ; SKX-NEXT: vmovaps %zmm1, %zmm0
196 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
197 %x = fmul <16 x float> %a1, %a0
198 %y = fadd <16 x float> %x, %a2
199 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
200 ret <16 x float> %res