1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s
3 ; CHECK-LABEL: fmaddsubpd_loop:
4 ; CHECK: vfmaddsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
5 define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
10 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
11 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
12 %cmp = icmp slt i32 %i.0, %iter
13 br i1 %cmp, label %for.body, label %for.end
19 %0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
20 %inc = add nsw i32 %i.0, 1
24 ret <4 x double> %c.addr.0
27 ; CHECK-LABEL: fmsubaddpd_loop:
28 ; CHECK: vfmsubadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
29 define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
34 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
35 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
36 %cmp = icmp slt i32 %i.0, %iter
37 br i1 %cmp, label %for.body, label %for.end
43 %0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
44 %inc = add nsw i32 %i.0, 1
48 ret <4 x double> %c.addr.0
51 ; CHECK-LABEL: fmaddpd_loop:
52 ; CHECK: vfmadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
53 define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
58 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
59 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
60 %cmp = icmp slt i32 %i.0, %iter
61 br i1 %cmp, label %for.body, label %for.end
67 %0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
68 %inc = add nsw i32 %i.0, 1
72 ret <4 x double> %c.addr.0
75 ; CHECK-LABEL: fmsubpd_loop:
76 ; CHECK: vfmsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
77 define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
82 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
83 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
84 %cmp = icmp slt i32 %i.0, %iter
85 br i1 %cmp, label %for.body, label %for.end
91 %0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
92 %inc = add nsw i32 %i.0, 1
96 ret <4 x double> %c.addr.0
99 declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
100 declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
101 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
102 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
105 ; CHECK-LABEL: fmaddsubps_loop:
106 ; CHECK: vfmaddsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
107 define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
112 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
113 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
114 %cmp = icmp slt i32 %i.0, %iter
115 br i1 %cmp, label %for.body, label %for.end
121 %0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
122 %inc = add nsw i32 %i.0, 1
126 ret <8 x float> %c.addr.0
129 ; CHECK-LABEL: fmsubaddps_loop:
130 ; CHECK: vfmsubadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
131 define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
136 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
137 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
138 %cmp = icmp slt i32 %i.0, %iter
139 br i1 %cmp, label %for.body, label %for.end
145 %0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
146 %inc = add nsw i32 %i.0, 1
150 ret <8 x float> %c.addr.0
153 ; CHECK-LABEL: fmaddps_loop:
154 ; CHECK: vfmadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
155 define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
160 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
161 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
162 %cmp = icmp slt i32 %i.0, %iter
163 br i1 %cmp, label %for.body, label %for.end
169 %0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
170 %inc = add nsw i32 %i.0, 1
174 ret <8 x float> %c.addr.0
177 ; CHECK-LABEL: fmsubps_loop:
178 ; CHECK: vfmsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
179 define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
184 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
185 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
186 %cmp = icmp slt i32 %i.0, %iter
187 br i1 %cmp, label %for.body, label %for.end
193 %0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
194 %inc = add nsw i32 %i.0, 1
198 ret <8 x float> %c.addr.0
201 declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
202 declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
203 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
204 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)