1 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
2 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma,+fma4 | FileCheck %s
3 ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
5 attributes #0 = { nounwind }
7 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
8 define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
9 ; CHECK-LABEL: test_x86_fmadd_baa_ss:
11 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
12 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
13 ; CHECK-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0
15 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
19 define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
20 ; CHECK-LABEL: test_x86_fmadd_aba_ss:
22 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
23 ; CHECK-NEXT: vfmadd132ss (%rdx), %xmm0, %xmm0
25 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
29 define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
30 ; CHECK-LABEL: test_x86_fmadd_bba_ss:
32 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
33 ; CHECK-NEXT: vfmadd213ss (%rcx), %xmm0, %xmm0
35 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
39 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
40 define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
41 ; CHECK-LABEL: test_x86_fmadd_baa_ps:
43 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
44 ; CHECK-NEXT: vfmadd132ps (%rdx), %xmm0, %xmm0
46 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
50 define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
51 ; CHECK-LABEL: test_x86_fmadd_aba_ps:
53 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
54 ; CHECK-NEXT: vfmadd231ps (%rdx), %xmm0, %xmm0
56 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
60 define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
61 ; CHECK-LABEL: test_x86_fmadd_bba_ps:
63 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
64 ; CHECK-NEXT: vfmadd213ps (%rcx), %xmm0, %xmm0
66 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
70 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
71 define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
72 ; CHECK-LABEL: test_x86_fmadd_baa_ps_y:
74 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
75 ; CHECK-NEXT: vfmadd132ps (%rdx), %ymm0, %ymm0
77 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
81 define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
82 ; CHECK-LABEL: test_x86_fmadd_aba_ps_y:
84 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
85 ; CHECK-NEXT: vfmadd231ps (%rdx), %ymm0, %ymm0
87 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
91 define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
92 ; CHECK-LABEL: test_x86_fmadd_bba_ps_y:
94 ; CHECK-NEXT: vmovaps (%rdx), %ymm0
95 ; CHECK-NEXT: vfmadd213ps (%rcx), %ymm0, %ymm0
97 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
101 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
102 define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
103 ; CHECK-LABEL: test_x86_fmadd_baa_sd:
105 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
106 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
107 ; CHECK-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0
109 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
110 ret <2 x double> %res
113 define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
114 ; CHECK-LABEL: test_x86_fmadd_aba_sd:
116 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
117 ; CHECK-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0
119 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
120 ret <2 x double> %res
123 define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
124 ; CHECK-LABEL: test_x86_fmadd_bba_sd:
126 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
127 ; CHECK-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0
129 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
130 ret <2 x double> %res
133 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
134 define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
135 ; CHECK-LABEL: test_x86_fmadd_baa_pd:
137 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
138 ; CHECK-NEXT: vfmadd132pd (%rdx), %xmm0, %xmm0
140 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
141 ret <2 x double> %res
144 define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
145 ; CHECK-LABEL: test_x86_fmadd_aba_pd:
147 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
148 ; CHECK-NEXT: vfmadd231pd (%rdx), %xmm0, %xmm0
150 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
151 ret <2 x double> %res
154 define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
155 ; CHECK-LABEL: test_x86_fmadd_bba_pd:
157 ; CHECK-NEXT: vmovapd (%rdx), %xmm0
158 ; CHECK-NEXT: vfmadd213pd (%rcx), %xmm0, %xmm0
160 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
161 ret <2 x double> %res
164 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
165 define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
166 ; CHECK-LABEL: test_x86_fmadd_baa_pd_y:
168 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
169 ; CHECK-NEXT: vfmadd132pd (%rdx), %ymm0, %ymm0
171 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
172 ret <4 x double> %res
175 define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
176 ; CHECK-LABEL: test_x86_fmadd_aba_pd_y:
178 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
179 ; CHECK-NEXT: vfmadd231pd (%rdx), %ymm0, %ymm0
181 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
182 ret <4 x double> %res
185 define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
186 ; CHECK-LABEL: test_x86_fmadd_bba_pd_y:
188 ; CHECK-NEXT: vmovapd (%rdx), %ymm0
189 ; CHECK-NEXT: vfmadd213pd (%rcx), %ymm0, %ymm0
191 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
192 ret <4 x double> %res
196 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
197 define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
198 ; CHECK-LABEL: test_x86_fnmadd_baa_ss:
200 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
201 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
202 ; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0
204 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
208 define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
209 ; CHECK-LABEL: test_x86_fnmadd_aba_ss:
211 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
212 ; CHECK-NEXT: vfnmadd132ss (%rdx), %xmm0, %xmm0
214 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
218 define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
219 ; CHECK-LABEL: test_x86_fnmadd_bba_ss:
221 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
222 ; CHECK-NEXT: vfnmadd213ss (%rcx), %xmm0, %xmm0
224 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
228 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
229 define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
230 ; CHECK-LABEL: test_x86_fnmadd_baa_ps:
232 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
233 ; CHECK-NEXT: vfnmadd132ps (%rdx), %xmm0, %xmm0
235 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
239 define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
240 ; CHECK-LABEL: test_x86_fnmadd_aba_ps:
242 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
243 ; CHECK-NEXT: vfnmadd231ps (%rdx), %xmm0, %xmm0
245 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
249 define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
250 ; CHECK-LABEL: test_x86_fnmadd_bba_ps:
252 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
253 ; CHECK-NEXT: vfnmadd213ps (%rcx), %xmm0, %xmm0
255 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
259 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
260 define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
261 ; CHECK-LABEL: test_x86_fnmadd_baa_ps_y:
263 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
264 ; CHECK-NEXT: vfnmadd132ps (%rdx), %ymm0, %ymm0
266 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
270 define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
271 ; CHECK-LABEL: test_x86_fnmadd_aba_ps_y:
273 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
274 ; CHECK-NEXT: vfnmadd231ps (%rdx), %ymm0, %ymm0
276 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
280 define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
281 ; CHECK-LABEL: test_x86_fnmadd_bba_ps_y:
283 ; CHECK-NEXT: vmovaps (%rdx), %ymm0
284 ; CHECK-NEXT: vfnmadd213ps (%rcx), %ymm0, %ymm0
286 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
290 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
291 define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
292 ; CHECK-LABEL: test_x86_fnmadd_baa_sd:
294 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
295 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
296 ; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0
298 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
299 ret <2 x double> %res
302 define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
303 ; CHECK-LABEL: test_x86_fnmadd_aba_sd:
305 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
306 ; CHECK-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0
308 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
309 ret <2 x double> %res
312 define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
313 ; CHECK-LABEL: test_x86_fnmadd_bba_sd:
315 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
316 ; CHECK-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0
318 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
319 ret <2 x double> %res
322 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
323 define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
324 ; CHECK-LABEL: test_x86_fnmadd_baa_pd:
326 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
327 ; CHECK-NEXT: vfnmadd132pd (%rdx), %xmm0, %xmm0
329 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
330 ret <2 x double> %res
333 define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
334 ; CHECK-LABEL: test_x86_fnmadd_aba_pd:
336 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
337 ; CHECK-NEXT: vfnmadd231pd (%rdx), %xmm0, %xmm0
339 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
340 ret <2 x double> %res
343 define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
344 ; CHECK-LABEL: test_x86_fnmadd_bba_pd:
346 ; CHECK-NEXT: vmovapd (%rdx), %xmm0
347 ; CHECK-NEXT: vfnmadd213pd (%rcx), %xmm0, %xmm0
349 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
350 ret <2 x double> %res
353 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
354 define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
355 ; CHECK-LABEL: test_x86_fnmadd_baa_pd_y:
357 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
358 ; CHECK-NEXT: vfnmadd132pd (%rdx), %ymm0, %ymm0
360 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
361 ret <4 x double> %res
364 define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
365 ; CHECK-LABEL: test_x86_fnmadd_aba_pd_y:
367 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
368 ; CHECK-NEXT: vfnmadd231pd (%rdx), %ymm0, %ymm0
370 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
371 ret <4 x double> %res
374 define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
375 ; CHECK-LABEL: test_x86_fnmadd_bba_pd_y:
377 ; CHECK-NEXT: vmovapd (%rdx), %ymm0
378 ; CHECK-NEXT: vfnmadd213pd (%rcx), %ymm0, %ymm0
380 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
381 ret <4 x double> %res
385 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
386 define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
387 ; CHECK-LABEL: test_x86_fmsub_baa_ss:
389 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
390 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
391 ; CHECK-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0
393 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
397 define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
398 ; CHECK-LABEL: test_x86_fmsub_aba_ss:
400 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
401 ; CHECK-NEXT: vfmsub132ss (%rdx), %xmm0, %xmm0
403 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
407 define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
408 ; CHECK-LABEL: test_x86_fmsub_bba_ss:
410 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
411 ; CHECK-NEXT: vfmsub213ss (%rcx), %xmm0, %xmm0
413 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
417 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
418 define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
419 ; CHECK-LABEL: test_x86_fmsub_baa_ps:
421 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
422 ; CHECK-NEXT: vfmsub132ps (%rdx), %xmm0, %xmm0
424 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
428 define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
429 ; CHECK-LABEL: test_x86_fmsub_aba_ps:
431 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
432 ; CHECK-NEXT: vfmsub231ps (%rdx), %xmm0, %xmm0
434 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
438 define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
439 ; CHECK-LABEL: test_x86_fmsub_bba_ps:
441 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
442 ; CHECK-NEXT: vfmsub213ps (%rcx), %xmm0, %xmm0
444 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
448 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
449 define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
450 ; CHECK-LABEL: test_x86_fmsub_baa_ps_y:
452 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
453 ; CHECK-NEXT: vfmsub132ps (%rdx), %ymm0, %ymm0
455 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
459 define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
460 ; CHECK-LABEL: test_x86_fmsub_aba_ps_y:
462 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
463 ; CHECK-NEXT: vfmsub231ps (%rdx), %ymm0, %ymm0
465 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
469 define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
470 ; CHECK-LABEL: test_x86_fmsub_bba_ps_y:
472 ; CHECK-NEXT: vmovaps (%rdx), %ymm0
473 ; CHECK-NEXT: vfmsub213ps (%rcx), %ymm0, %ymm0
475 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
479 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
480 define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
481 ; CHECK-LABEL: test_x86_fmsub_baa_sd:
483 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
484 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
485 ; CHECK-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0
487 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
488 ret <2 x double> %res
491 define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
492 ; CHECK-LABEL: test_x86_fmsub_aba_sd:
494 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
495 ; CHECK-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0
497 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
498 ret <2 x double> %res
501 define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
502 ; CHECK-LABEL: test_x86_fmsub_bba_sd:
504 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
505 ; CHECK-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0
507 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
508 ret <2 x double> %res
511 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
512 define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
513 ; CHECK-LABEL: test_x86_fmsub_baa_pd:
515 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
516 ; CHECK-NEXT: vfmsub132pd (%rdx), %xmm0, %xmm0
518 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
519 ret <2 x double> %res
522 define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
523 ; CHECK-LABEL: test_x86_fmsub_aba_pd:
525 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
526 ; CHECK-NEXT: vfmsub231pd (%rdx), %xmm0, %xmm0
528 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
529 ret <2 x double> %res
532 define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
533 ; CHECK-LABEL: test_x86_fmsub_bba_pd:
535 ; CHECK-NEXT: vmovapd (%rdx), %xmm0
536 ; CHECK-NEXT: vfmsub213pd (%rcx), %xmm0, %xmm0
538 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
539 ret <2 x double> %res
542 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
543 define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
544 ; CHECK-LABEL: test_x86_fmsub_baa_pd_y:
546 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
547 ; CHECK-NEXT: vfmsub132pd (%rdx), %ymm0, %ymm0
549 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
550 ret <4 x double> %res
553 define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
554 ; CHECK-LABEL: test_x86_fmsub_aba_pd_y:
556 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
557 ; CHECK-NEXT: vfmsub231pd (%rdx), %ymm0, %ymm0
559 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
560 ret <4 x double> %res
563 define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
564 ; CHECK-LABEL: test_x86_fmsub_bba_pd_y:
566 ; CHECK-NEXT: vmovapd (%rdx), %ymm0
567 ; CHECK-NEXT: vfmsub213pd (%rcx), %ymm0, %ymm0
569 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
570 ret <4 x double> %res
574 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
575 define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
576 ; CHECK-LABEL: test_x86_fnmsub_baa_ss:
578 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
579 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
580 ; CHECK-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0
582 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
586 define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
587 ; CHECK-LABEL: test_x86_fnmsub_aba_ss:
589 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
590 ; CHECK-NEXT: vfnmsub132ss (%rdx), %xmm0, %xmm0
592 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
596 define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
597 ; CHECK-LABEL: test_x86_fnmsub_bba_ss:
599 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
600 ; CHECK-NEXT: vfnmsub213ss (%rcx), %xmm0, %xmm0
602 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
606 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
607 define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
608 ; CHECK-LABEL: test_x86_fnmsub_baa_ps:
610 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
611 ; CHECK-NEXT: vfnmsub132ps (%rdx), %xmm0, %xmm0
613 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
617 define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
618 ; CHECK-LABEL: test_x86_fnmsub_aba_ps:
620 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
621 ; CHECK-NEXT: vfnmsub231ps (%rdx), %xmm0, %xmm0
623 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
627 define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
628 ; CHECK-LABEL: test_x86_fnmsub_bba_ps:
630 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
631 ; CHECK-NEXT: vfnmsub213ps (%rcx), %xmm0, %xmm0
633 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
637 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
638 define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
639 ; CHECK-LABEL: test_x86_fnmsub_baa_ps_y:
641 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
642 ; CHECK-NEXT: vfnmsub132ps (%rdx), %ymm0, %ymm0
644 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
648 define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
649 ; CHECK-LABEL: test_x86_fnmsub_aba_ps_y:
651 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
652 ; CHECK-NEXT: vfnmsub231ps (%rdx), %ymm0, %ymm0
654 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
658 define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
659 ; CHECK-LABEL: test_x86_fnmsub_bba_ps_y:
661 ; CHECK-NEXT: vmovaps (%rdx), %ymm0
662 ; CHECK-NEXT: vfnmsub213ps (%rcx), %ymm0, %ymm0
664 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
668 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
669 define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
670 ; CHECK-LABEL: test_x86_fnmsub_baa_sd:
672 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
673 ; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
674 ; CHECK-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0
676 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
677 ret <2 x double> %res
680 define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
681 ; CHECK-LABEL: test_x86_fnmsub_aba_sd:
683 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
684 ; CHECK-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0
686 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
687 ret <2 x double> %res
690 define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
691 ; CHECK-LABEL: test_x86_fnmsub_bba_sd:
693 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
694 ; CHECK-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0
696 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
697 ret <2 x double> %res
700 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
701 define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
702 ; CHECK-LABEL: test_x86_fnmsub_baa_pd:
704 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
705 ; CHECK-NEXT: vfnmsub132pd (%rdx), %xmm0, %xmm0
707 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
708 ret <2 x double> %res
711 define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
712 ; CHECK-LABEL: test_x86_fnmsub_aba_pd:
714 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
715 ; CHECK-NEXT: vfnmsub231pd (%rdx), %xmm0, %xmm0
717 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
718 ret <2 x double> %res
721 define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
722 ; CHECK-LABEL: test_x86_fnmsub_bba_pd:
724 ; CHECK-NEXT: vmovapd (%rdx), %xmm0
725 ; CHECK-NEXT: vfnmsub213pd (%rcx), %xmm0, %xmm0
727 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
728 ret <2 x double> %res
731 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
732 define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
733 ; CHECK-LABEL: test_x86_fnmsub_baa_pd_y:
735 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
736 ; CHECK-NEXT: vfnmsub132pd (%rdx), %ymm0, %ymm0
738 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
739 ret <4 x double> %res
742 define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
743 ; CHECK-LABEL: test_x86_fnmsub_aba_pd_y:
745 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
746 ; CHECK-NEXT: vfnmsub231pd (%rdx), %ymm0, %ymm0
748 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
749 ret <4 x double> %res
752 define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
753 ; CHECK-LABEL: test_x86_fnmsub_bba_pd_y:
755 ; CHECK-NEXT: vmovapd (%rdx), %ymm0
756 ; CHECK-NEXT: vfnmsub213pd (%rcx), %ymm0, %ymm0
758 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
759 ret <4 x double> %res