1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s
3 declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
4 declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
6 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
7 ; CHECK-LABEL: test_x86_vfnmadd_ps_z
8 ; CHECK: vfnmadd213ps %zmm
9 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
12 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
14 define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
15 ; CHECK-LABEL: test_mask_vfnmadd_ps
16 ; CHECK: vfnmadd213ps %zmm
17 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
21 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
22 ; CHECK-LABEL: test_x86_vfnmadd_pd_z
23 ; CHECK: vfnmadd213pd %zmm
24 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
27 declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
29 define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
30 ; CHECK-LABEL: test_mask_vfnmadd_pd
31 ; CHECK: vfnmadd213pd %zmm
32 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
36 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
37 ; CHECK-LABEL: test_x86_vfnmsubps_z
38 ; CHECK: vfnmsub213ps %zmm
39 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
42 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
44 define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
45 ; CHECK-LABEL: test_mask_vfnmsub_ps
46 ; CHECK: vfnmsub213ps %zmm
47 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
51 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
52 ; CHECK-LABEL: test_x86_vfnmsubpd_z
53 ; CHECK: vfnmsub213pd %zmm
54 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
57 declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
59 define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
60 ; CHECK-LABEL: test_mask_vfnmsub_pd
61 ; CHECK: vfnmsub213pd %zmm
62 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
66 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
67 ; CHECK-LABEL: test_x86_vfmaddsubps_z
68 ; CHECK: vfmaddsub213ps %zmm
69 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
73 define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
74 ; CHECK-LABEL: test_mask_fmaddsub_ps:
75 ; CHECK: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa6,0xc2]
76 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
80 declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
82 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
83 ; CHECK-LABEL: test_x86_vfmaddsubpd_z
84 ; CHECK: vfmaddsub213pd %zmm
85 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
88 declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
90 define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
91 ; CHECK-LABEL: test_mask_vfmaddsub_pd
92 ; CHECK: vfmaddsub213pd %zmm
93 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
97 define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
98 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
100 ; CHECK-NEXT: movzbl %dil, %eax
101 ; CHECK-NEXT: kmovw %eax, %k1
102 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
103 ; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1}
104 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
105 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
107 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
108 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
109 %res2 = fadd <8 x double> %res, %res1
110 ret <8 x double> %res2
113 declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
115 define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
116 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
118 ; CHECK-NEXT: movzbl %dil, %eax
119 ; CHECK-NEXT: kmovw %eax, %k1
120 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
121 ; CHECK-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1}
122 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
123 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
125 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
126 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
127 %res2 = fadd <8 x double> %res, %res1
128 ret <8 x double> %res2
131 declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
133 define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
134 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
136 ; CHECK-NEXT: movzbl %dil, %eax
137 ; CHECK-NEXT: kmovw %eax, %k1
138 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
139 ; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
140 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
141 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
143 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
144 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
145 %res2 = fadd <8 x double> %res, %res1
146 ret <8 x double> %res2
149 define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
150 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
152 ; CHECK-NEXT: kmovw %edi, %k1
153 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
154 ; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1}
155 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
156 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
158 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
159 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
160 %res2 = fadd <16 x float> %res, %res1
161 ret <16 x float> %res2
164 declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
166 define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
167 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
169 ; CHECK-NEXT: kmovw %edi, %k1
170 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
171 ; CHECK-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1}
172 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
173 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
175 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
176 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
177 %res2 = fadd <16 x float> %res, %res1
178 ret <16 x float> %res2
181 declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
183 define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
184 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
186 ; CHECK-NEXT: kmovw %edi, %k1
187 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
188 ; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
189 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
190 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
192 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
193 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
194 %res2 = fadd <16 x float> %res, %res1
195 ret <16 x float> %res2
198 declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
200 define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
201 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
203 ; CHECK-NEXT: movzbl %dil, %eax
204 ; CHECK-NEXT: kmovw %eax, %k1
205 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
206 ; CHECK-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1}
207 ; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
208 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
210 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
211 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
212 %res2 = fadd <8 x double> %res, %res1
213 ret <8 x double> %res2
216 declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
218 define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
219 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
221 ; CHECK-NEXT: kmovw %edi, %k1
222 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
223 ; CHECK-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1}
224 ; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
225 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
227 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
228 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
229 %res2 = fadd <16 x float> %res, %res1
230 ret <16 x float> %res2
233 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
234 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
235 ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
236 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
237 ret <16 x float> %res
240 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
241 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
242 ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
243 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
244 ret <16 x float> %res
247 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
248 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
249 ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
250 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
251 ret <16 x float> %res
254 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
255 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
256 ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
257 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
258 ret <16 x float> %res
261 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
262 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
263 ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2]
264 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
265 ret <16 x float> %res
268 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
269 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
270 ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
271 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
272 ret <16 x float> %res
275 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
276 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
277 ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
278 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
279 ret <16 x float> %res
282 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
283 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
284 ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
285 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
286 ret <16 x float> %res
289 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
290 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
291 ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
292 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
293 ret <16 x float> %res
296 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
297 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
298 ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
299 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
300 ret <16 x float> %res
303 declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
305 define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
306 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
308 ; CHECK-NEXT: movzbl %dil, %eax
309 ; CHECK-NEXT: kmovw %eax, %k1
310 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
311 ; CHECK-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
312 ; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
313 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
315 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
316 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
317 %res2 = fadd <8 x double> %res, %res1
318 ret <8 x double> %res2
321 declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
323 define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
324 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
326 ; CHECK-NEXT: kmovw %edi, %k1
327 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
328 ; CHECK-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
329 ; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
330 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
332 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
333 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
334 %res2 = fadd <16 x float> %res, %res1
335 ret <16 x float> %res2
338 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
339 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
340 ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
341 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
342 ret <8 x double> %res
345 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
346 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
347 ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
348 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
349 ret <8 x double> %res
352 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
353 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
354 ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
355 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
356 ret <8 x double> %res
359 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
360 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
361 ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
362 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
363 ret <8 x double> %res
366 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
367 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
368 ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
369 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
370 ret <8 x double> %res
373 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
374 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
375 ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
376 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
377 ret <8 x double> %res
380 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
381 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
382 ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
383 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
384 ret <8 x double> %res
387 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
388 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
389 ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
390 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
391 ret <8 x double> %res
394 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
395 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
396 ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
397 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
398 ret <8 x double> %res
401 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
402 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
403 ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
404 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
405 ret <8 x double> %res
408 define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
409 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
411 ; CHECK-NEXT: movzbl %dil, %eax
412 ; CHECK-NEXT: kmovw %eax, %k1
413 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
414 ; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
415 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
416 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
418 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
419 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
420 %res2 = fadd <8 x double> %res, %res1
421 ret <8 x double> %res2
424 declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
426 define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
427 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
429 ; CHECK-NEXT: movzbl %dil, %eax
430 ; CHECK-NEXT: kmovw %eax, %k1
431 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
432 ; CHECK-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1}
433 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
434 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
436 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
437 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
438 %res2 = fadd <8 x double> %res, %res1
439 ret <8 x double> %res2
442 declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
444 define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
445 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
447 ; CHECK-NEXT: movzbl %dil, %eax
448 ; CHECK-NEXT: kmovw %eax, %k1
449 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
450 ; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
451 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
452 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
454 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
455 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
456 %res2 = fadd <8 x double> %res, %res1
457 ret <8 x double> %res2
460 define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
461 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
463 ; CHECK-NEXT: kmovw %edi, %k1
464 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
465 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
466 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
467 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
469 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
470 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
471 %res2 = fadd <16 x float> %res, %res1
472 ret <16 x float> %res2
475 declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
477 define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
478 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
480 ; CHECK-NEXT: kmovw %edi, %k1
481 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
482 ; CHECK-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1}
483 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
484 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
486 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
487 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
488 %res2 = fadd <16 x float> %res, %res1
489 ret <16 x float> %res2
492 declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
494 define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
495 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
497 ; CHECK-NEXT: kmovw %edi, %k1
498 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
499 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
500 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
501 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
503 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
504 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
505 %res2 = fadd <16 x float> %res, %res1
506 ret <16 x float> %res2
510 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
511 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
512 ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
513 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
514 ret <8 x double> %res
517 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
518 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
519 ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
520 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
521 ret <8 x double> %res
524 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
525 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
526 ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
527 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
528 ret <8 x double> %res
531 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
532 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
533 ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
534 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
535 ret <8 x double> %res
538 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
539 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
540 ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2]
541 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
542 ret <8 x double> %res
545 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
546 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
547 ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
548 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
549 ret <8 x double> %res
552 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
553 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
554 ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
555 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
556 ret <8 x double> %res
559 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
560 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
561 ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
562 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
563 ret <8 x double> %res
566 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
567 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
568 ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
569 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
570 ret <8 x double> %res
573 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
574 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
575 ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
576 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
577 ret <8 x double> %res
580 define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
581 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
583 ; CHECK-NEXT: movzbl %dil, %eax
584 ; CHECK-NEXT: kmovw %eax, %k1
585 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
586 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1}
587 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
588 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
590 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
591 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
592 %res2 = fadd <8 x double> %res, %res1
593 ret <8 x double> %res2
596 declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
598 define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
599 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
601 ; CHECK-NEXT: movzbl %dil, %eax
602 ; CHECK-NEXT: kmovw %eax, %k1
603 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
604 ; CHECK-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
605 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
606 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
608 %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
609 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
610 %res2 = fadd <8 x double> %res, %res1
611 ret <8 x double> %res2
614 define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
615 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
617 ; CHECK-NEXT: kmovw %edi, %k1
618 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
619 ; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1}
620 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
621 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
623 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
624 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
625 %res2 = fadd <16 x float> %res, %res1
626 ret <16 x float> %res2
629 declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
631 define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
632 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
634 ; CHECK-NEXT: kmovw %edi, %k1
635 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
636 ; CHECK-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
637 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
638 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
640 %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
641 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
642 %res2 = fadd <16 x float> %res, %res1
643 ret <16 x float> %res2
646 define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
647 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
649 ; CHECK-NEXT: movzbl %dil, %eax
650 ; CHECK-NEXT: kmovw %eax, %k1
651 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
652 ; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
653 ; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
654 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
656 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
657 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
658 %res2 = fadd <8 x double> %res, %res1
659 ret <8 x double> %res2
662 define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
663 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
665 ; CHECK-NEXT: kmovw %edi, %k1
666 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
667 ; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
668 ; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
669 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
671 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
672 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
673 %res2 = fadd <16 x float> %res, %res1
674 ret <16 x float> %res2