1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
101 ; CHECK-LABEL: test_sqrt_pd_512
103 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
104 ret <8 x double> %res
106 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
108 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 ; CHECK-LABEL: test_sqrt_ps_512
111 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
112 ret <16 x float> %res
114 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
115 ; CHECK-LABEL: test_sqrt_round_ps_512
116 ; CHECK: vsqrtps {rz-sae}
117 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
118 ret <16 x float> %res
120 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
122 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
123 ; CHECK-LABEL: test_getexp_pd_512
125 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
126 ret <8 x double> %res
128 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
129 ; CHECK-LABEL: test_getexp_round_pd_512
130 ; CHECK: vgetexppd {sae}
131 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
132 ret <8 x double> %res
134 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
136 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
137 ; CHECK-LABEL: test_getexp_ps_512
139 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
140 ret <16 x float> %res
143 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
144 ; CHECK-LABEL: test_getexp_round_ps_512
145 ; CHECK: vgetexpps {sae}
146 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
147 ret <16 x float> %res
149 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
151 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
152 ; CHECK: vsqrtss {{.*}}encoding: [0x62
153 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
156 declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
158 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
159 ; CHECK: vsqrtsd {{.*}}encoding: [0x62
160 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
161 ret <2 x double> %res
163 declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
165 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
166 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
167 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
170 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
172 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
173 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
174 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
175 ret <2 x double> %res
177 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
179 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
180 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
181 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
184 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
187 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
188 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
189 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
192 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
195 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
196 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
197 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
200 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
203 define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
204 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
205 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
208 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
210 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
211 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
212 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
215 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
217 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
218 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
219 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
220 ret <16 x float> %res
222 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
225 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
226 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
227 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
231 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
233 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
234 ; CHECK: vbroadcastss
235 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
236 ret <16 x float> %res
238 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
240 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
241 ; CHECK: vbroadcastsd
242 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
243 ret <8 x double> %res
245 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
247 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
248 ; CHECK: vbroadcastss
249 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
250 ret <16 x float> %res
252 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
254 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
255 ; CHECK: vbroadcastsd
256 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
257 ret <8 x double> %res
259 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
261 define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
262 ; CHECK: vpbroadcastd
263 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
266 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
268 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
269 ; CHECK: vpbroadcastd
270 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
273 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
275 define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
276 ; CHECK: vpbroadcastq
277 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
280 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
282 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
283 ; CHECK: vpbroadcastq
284 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
287 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
289 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
290 ; CHECK: movw $-1, %ax
293 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
297 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
299 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
300 ; CHECK: movb $-1, %al
303 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
307 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
309 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
311 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
315 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
317 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
321 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
322 ; CHECK: movw $-1, %ax
325 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
329 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
331 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
332 ; CHECK: movb $-1, %al
335 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
339 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
342 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
344 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
348 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
350 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
354 define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
355 ; CHECK-LABEL: test_ctlz_d
357 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
361 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
363 define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
364 ; CHECK-LABEL: test_ctlz_q
366 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
370 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
372 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
373 ; CHECK: vblendmps %zmm1, %zmm0
374 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
375 ret <16 x float> %res
378 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
380 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
381 ; CHECK: vblendmpd %zmm1, %zmm0
382 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
383 ret <8 x double> %res
386 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
387 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
388 ; CHECK: vblendmpd (%
389 %b = load <8 x double>, <8 x double>* %ptr
390 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
391 ret <8 x double> %res
393 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
395 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
397 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
400 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
402 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
404 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
407 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
409 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) {
410 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0]
411 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2)
414 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
416 define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
417 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
418 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
421 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
423 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
424 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
425 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
428 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
430 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
431 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
432 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
435 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
438 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
439 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0]
440 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
443 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
445 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) {
446 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0]
447 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
450 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
452 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) {
453 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
454 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
457 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
459 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) {
460 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
461 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
464 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
467 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
469 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
470 <8 x double>zeroinitializer, i8 -1, i32 4)
471 ret <8 x double> %res
473 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
474 <8 x double>, i8, i32)
476 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
478 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
479 <8 x double>zeroinitializer, i8 -1, i32 4)
480 ret <8 x double> %res
482 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
483 <8 x double>, i8, i32)
485 define <8 x float> @test_cvtpd2ps(<8 x double> %a) {
486 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0]
487 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1)
490 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
492 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
494 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512
497 ; CHECK: vpabsd{{.*}}{%k1}
498 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
499 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
500 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
501 %res2 = add <16 x i32> %res, %res1
505 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
507 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512
510 ; CHECK: vpabsq{{.*}}{%k1}
511 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
512 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
513 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
514 %res2 = add <8 x i64> %res, %res1
518 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
519 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
520 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
523 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
525 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
526 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
527 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
530 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
532 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
533 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
534 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
538 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
540 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
541 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
542 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
546 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
548 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
549 ; CHECK-LABEL: test_mask_store_aligned_ps:
551 ; CHECK-NEXT: kmovw %esi, %k1
552 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
554 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
558 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
560 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
561 ; CHECK-LABEL: test_mask_store_aligned_pd:
563 ; CHECK-NEXT: kmovw %esi, %k1
564 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
566 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
570 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
572 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
573 ; CHECK-LABEL: test_maskz_load_aligned_ps:
575 ; CHECK-NEXT: kmovw %esi, %k1
576 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
578 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
579 ret <16 x float> %res
582 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
584 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
585 ; CHECK-LABEL: test_maskz_load_aligned_pd:
587 ; CHECK-NEXT: kmovw %esi, %k1
588 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
590 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
591 ret <8 x double> %res
594 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
596 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
597 ; CHECK-LABEL: test_load_aligned_ps:
599 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
601 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
602 ret <16 x float> %res
605 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
606 ; CHECK-LABEL: test_load_aligned_pd:
608 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
610 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
611 ret <8 x double> %res
614 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
616 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
617 ; CHECK-LABEL: test_valign_q:
618 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
619 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
623 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
624 ; CHECK-LABEL: test_mask_valign_q:
625 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
626 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
630 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
632 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
633 ; CHECK-LABEL: test_maskz_valign_d:
634 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
635 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
639 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
641 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
642 ; CHECK-LABEL: test_mask_store_ss
643 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
644 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
648 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
650 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
651 ; CHECK-LABEL: test_pcmpeq_d
652 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
653 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
657 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
658 ; CHECK-LABEL: test_mask_pcmpeq_d
659 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
660 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
664 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
666 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
667 ; CHECK-LABEL: test_pcmpeq_q
668 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
669 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
673 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
674 ; CHECK-LABEL: test_mask_pcmpeq_q
675 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
676 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
680 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
682 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
683 ; CHECK-LABEL: test_pcmpgt_d
684 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
685 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
689 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
690 ; CHECK-LABEL: test_mask_pcmpgt_d
691 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
692 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
696 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
698 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
699 ; CHECK-LABEL: test_pcmpgt_q
700 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
701 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
705 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
706 ; CHECK-LABEL: test_mask_pcmpgt_q
707 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
708 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
712 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
714 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
715 ; CHECK_LABEL: test_cmp_d_512
716 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
717 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
718 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
719 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
720 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
721 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
722 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
723 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
724 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
725 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
726 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
727 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
728 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
729 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
730 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
731 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
732 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
733 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
734 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
735 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
736 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
737 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
738 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
739 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
743 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
744 ; CHECK_LABEL: test_mask_cmp_d_512
745 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
746 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
747 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
748 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
749 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
750 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
751 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
752 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
753 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
754 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
755 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
756 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
757 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
758 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
759 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
760 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
761 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
762 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
763 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
764 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
765 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
766 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
767 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
768 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
772 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
774 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
775 ; CHECK_LABEL: test_ucmp_d_512
776 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
777 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
778 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
779 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
780 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
781 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
782 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
783 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
784 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
785 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
786 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
787 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
788 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
789 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
790 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
791 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
792 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
793 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
794 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
795 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
796 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
797 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
798 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
799 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
803 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
804 ; CHECK_LABEL: test_mask_ucmp_d_512
805 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
806 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
807 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
808 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
809 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
810 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
811 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
812 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
813 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
814 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
815 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
816 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
817 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
818 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
819 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
820 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
821 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
822 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
823 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
824 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
825 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
826 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
827 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
828 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
832 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
834 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
835 ; CHECK_LABEL: test_cmp_q_512
836 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
837 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
838 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
839 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
840 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
841 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
842 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
843 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
844 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
845 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
846 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
847 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
848 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
849 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
850 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
851 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
852 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
853 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
854 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
855 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
856 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
857 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
858 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
859 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
863 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
864 ; CHECK_LABEL: test_mask_cmp_q_512
865 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
866 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
867 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
868 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
869 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
870 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
871 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
872 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
873 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
874 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
875 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
876 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
877 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
878 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
879 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
880 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
881 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
882 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
883 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
884 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
885 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
886 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
887 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
888 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
892 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
894 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
895 ; CHECK_LABEL: test_ucmp_q_512
896 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
897 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
898 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
899 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
900 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
901 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
902 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
903 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
904 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
905 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
906 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
907 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
908 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
909 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
910 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
911 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
912 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
913 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
914 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
915 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
916 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
917 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
918 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
919 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
923 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
924 ; CHECK_LABEL: test_mask_ucmp_q_512
925 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
926 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
927 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
928 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
929 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
930 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
931 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
932 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
933 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
934 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
935 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
936 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
937 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
938 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
939 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
940 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
941 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
942 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
943 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
944 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
945 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
946 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
947 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
948 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
952 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
954 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
955 ; CHECK-LABEL: test_mask_vextractf32x4:
956 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
957 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
961 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
963 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
964 ; CHECK-LABEL: test_mask_vextracti64x4:
965 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
966 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
970 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
972 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
973 ; CHECK-LABEL: test_maskz_vextracti32x4:
974 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
975 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
979 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
981 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
982 ; CHECK-LABEL: test_vextractf64x4:
983 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
984 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
985 ret <4 x double> %res
988 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
990 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
991 ; CHECK-LABEL: test_x86_avx512_pslli_d
993 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
997 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
998 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
999 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
1000 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1004 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1005 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
1006 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1007 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1011 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1013 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1014 ; CHECK-LABEL: test_x86_avx512_pslli_q
1016 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1020 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1021 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
1022 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
1023 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1027 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1028 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
1029 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1030 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1034 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1036 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1037 ; CHECK-LABEL: test_x86_avx512_psrli_d
1039 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1043 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1044 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1045 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1046 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1050 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1051 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1052 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1053 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1057 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1059 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1060 ; CHECK-LABEL: test_x86_avx512_psrli_q
1062 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1066 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1067 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1068 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1069 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1073 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1074 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1075 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1076 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1080 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1082 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1083 ; CHECK-LABEL: test_x86_avx512_psrai_d
1085 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1089 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1090 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1091 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1092 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1096 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1097 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1098 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1099 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1103 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1105 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1106 ; CHECK-LABEL: test_x86_avx512_psrai_q
1108 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1112 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1113 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1114 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1115 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1119 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1120 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1121 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1122 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1126 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1128 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1129 ; CHECK-LABEL: test_x86_avx512_psll_d
1131 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1135 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1136 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1137 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1138 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1142 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1143 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1144 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1145 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1149 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1151 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1152 ; CHECK-LABEL: test_x86_avx512_psll_q
1154 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1158 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1159 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1160 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1161 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1165 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1166 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1167 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1168 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1172 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1174 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1175 ; CHECK-LABEL: test_x86_avx512_psrl_d
1177 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1181 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1182 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1183 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1184 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1188 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1189 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1190 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1191 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1195 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1197 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1198 ; CHECK-LABEL: test_x86_avx512_psrl_q
1200 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1204 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1205 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1206 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1207 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1211 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1212 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1213 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1214 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1218 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1220 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1221 ; CHECK-LABEL: test_x86_avx512_psra_d
1223 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1227 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1228 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1229 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1230 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1234 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1235 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1236 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1237 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1241 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1243 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1244 ; CHECK-LABEL: test_x86_avx512_psra_q
1246 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1250 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1251 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1252 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1253 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1257 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1258 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1259 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1260 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1264 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1266 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1267 ; CHECK-LABEL: test_x86_avx512_psllv_d
1269 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1273 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1274 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1275 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1276 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1280 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1281 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1282 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1283 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1287 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1289 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1290 ; CHECK-LABEL: test_x86_avx512_psllv_q
1292 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1296 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1297 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1298 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1299 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1303 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1304 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1305 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1306 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1310 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1313 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1314 ; CHECK-LABEL: test_x86_avx512_psrav_d
1316 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1320 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1321 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1322 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1323 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1327 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1328 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1329 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1330 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1334 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1336 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1337 ; CHECK-LABEL: test_x86_avx512_psrav_q
1339 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1343 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1344 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1345 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1346 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1350 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1351 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1352 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1353 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1357 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1359 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1360 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1362 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1366 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1367 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1368 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1369 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1373 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1374 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1375 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1376 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1380 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1382 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1383 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1385 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1389 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1390 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1391 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1392 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1396 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1397 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1398 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1399 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1403 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1405 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1406 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1408 %b = load <8 x i64>, <8 x i64>* %ptr
1409 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1413 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1414 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1415 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1417 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1418 ; CHECK-LABEL: test_vsubps_rn
1419 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
1420 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1421 <16 x float> zeroinitializer, i16 -1, i32 0)
1422 ret <16 x float> %res
1425 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1426 ; CHECK-LABEL: test_vsubps_rd
1427 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
1428 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1429 <16 x float> zeroinitializer, i16 -1, i32 1)
1430 ret <16 x float> %res
1433 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1434 ; CHECK-LABEL: test_vsubps_ru
1435 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
1436 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1437 <16 x float> zeroinitializer, i16 -1, i32 2)
1438 ret <16 x float> %res
1441 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1442 ; CHECK-LABEL: test_vsubps_rz
1443 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
1444 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1445 <16 x float> zeroinitializer, i16 -1, i32 3)
1446 ret <16 x float> %res
1449 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1450 ; CHECK-LABEL: test_vmulps_rn
1451 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
1452 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1453 <16 x float> zeroinitializer, i16 -1, i32 0)
1454 ret <16 x float> %res
1457 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1458 ; CHECK-LABEL: test_vmulps_rd
1459 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
1460 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1461 <16 x float> zeroinitializer, i16 -1, i32 1)
1462 ret <16 x float> %res
1465 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1466 ; CHECK-LABEL: test_vmulps_ru
1467 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
1468 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1469 <16 x float> zeroinitializer, i16 -1, i32 2)
1470 ret <16 x float> %res
1473 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1474 ; CHECK-LABEL: test_vmulps_rz
1475 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
1476 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1477 <16 x float> zeroinitializer, i16 -1, i32 3)
1478 ret <16 x float> %res
1482 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1483 ; CHECK-LABEL: test_vmulps_mask_rn
1484 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
1485 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1486 <16 x float> zeroinitializer, i16 %mask, i32 0)
1487 ret <16 x float> %res
1490 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1491 ; CHECK-LABEL: test_vmulps_mask_rd
1492 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
1493 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1494 <16 x float> zeroinitializer, i16 %mask, i32 1)
1495 ret <16 x float> %res
1498 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1499 ; CHECK-LABEL: test_vmulps_mask_ru
1500 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
1501 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1502 <16 x float> zeroinitializer, i16 %mask, i32 2)
1503 ret <16 x float> %res
1506 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1507 ; CHECK-LABEL: test_vmulps_mask_rz
1508 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
1509 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1510 <16 x float> zeroinitializer, i16 %mask, i32 3)
1511 ret <16 x float> %res
1514 ;; With Passthru value
1515 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1516 ; CHECK-LABEL: test_vmulps_mask_passthru_rn
1517 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
1518 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1519 <16 x float> %passthru, i16 %mask, i32 0)
1520 ret <16 x float> %res
1523 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1524 ; CHECK-LABEL: test_vmulps_mask_passthru_rd
1525 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
1526 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1527 <16 x float> %passthru, i16 %mask, i32 1)
1528 ret <16 x float> %res
1531 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1532 ; CHECK-LABEL: test_vmulps_mask_passthru_ru
1533 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
1534 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1535 <16 x float> %passthru, i16 %mask, i32 2)
1536 ret <16 x float> %res
1539 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1540 ; CHECK-LABEL: test_vmulps_mask_passthru_rz
1541 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
1542 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1543 <16 x float> %passthru, i16 %mask, i32 3)
1544 ret <16 x float> %res
1548 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1549 ; CHECK-LABEL: test_vmulpd_mask_rn
1550 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
1551 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1552 <8 x double> zeroinitializer, i8 %mask, i32 0)
1553 ret <8 x double> %res
1556 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1557 ; CHECK-LABEL: test_vmulpd_mask_rd
1558 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
1559 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1560 <8 x double> zeroinitializer, i8 %mask, i32 1)
1561 ret <8 x double> %res
1564 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1565 ; CHECK-LABEL: test_vmulpd_mask_ru
1566 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
1567 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1568 <8 x double> zeroinitializer, i8 %mask, i32 2)
1569 ret <8 x double> %res
1572 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1573 ; CHECK-LABEL: test_vmulpd_mask_rz
1574 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
1575 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1576 <8 x double> zeroinitializer, i8 %mask, i32 3)
1577 ret <8 x double> %res
1580 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1581 ;CHECK-LABEL: test_xor_epi32
1582 ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1583 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1584 ret < 16 x i32> %res
1587 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1588 ;CHECK-LABEL: test_mask_xor_epi32
1589 ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1590 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1591 ret < 16 x i32> %res
1594 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1596 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1597 ;CHECK-LABEL: test_or_epi32
1598 ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1599 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1600 ret < 16 x i32> %res
1603 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1604 ;CHECK-LABEL: test_mask_or_epi32
1605 ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1606 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1607 ret < 16 x i32> %res
1610 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1612 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1613 ;CHECK-LABEL: test_and_epi32
1614 ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1615 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1616 ret < 16 x i32> %res
1619 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1620 ;CHECK-LABEL: test_mask_and_epi32
1621 ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1622 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1623 ret < 16 x i32> %res
1626 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1628 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1629 ;CHECK-LABEL: test_xor_epi64
1630 ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1631 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1635 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1636 ;CHECK-LABEL: test_mask_xor_epi64
1637 ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1638 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1642 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1644 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1645 ;CHECK-LABEL: test_or_epi64
1646 ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1647 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1651 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1652 ;CHECK-LABEL: test_mask_or_epi64
1653 ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1654 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1658 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1660 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
1661 ;CHECK-LABEL: test_and_epi64
1662 ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
1663 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1667 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1668 ;CHECK-LABEL: test_mask_and_epi64
1669 ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1670 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1674 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1677 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1678 ;CHECK-LABEL: test_mask_add_epi32_rr
1679 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
1680 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1681 ret < 16 x i32> %res
1684 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1685 ;CHECK-LABEL: test_mask_add_epi32_rrk
1686 ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1687 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1688 ret < 16 x i32> %res
1691 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1692 ;CHECK-LABEL: test_mask_add_epi32_rrkz
1693 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1694 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1695 ret < 16 x i32> %res
1698 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1699 ;CHECK-LABEL: test_mask_add_epi32_rm
1700 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
1701 %b = load <16 x i32>, <16 x i32>* %ptr_b
1702 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1703 ret < 16 x i32> %res
1706 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1707 ;CHECK-LABEL: test_mask_add_epi32_rmk
1708 ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
1709 %b = load <16 x i32>, <16 x i32>* %ptr_b
1710 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1711 ret < 16 x i32> %res
1714 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1715 ;CHECK-LABEL: test_mask_add_epi32_rmkz
1716 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
1717 %b = load <16 x i32>, <16 x i32>* %ptr_b
1718 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1719 ret < 16 x i32> %res
1722 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1723 ;CHECK-LABEL: test_mask_add_epi32_rmb
1724 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
1725 %q = load i32, i32* %ptr_b
1726 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1727 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1728 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1729 ret < 16 x i32> %res
1732 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1733 ;CHECK-LABEL: test_mask_add_epi32_rmbk
1734 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
1735 %q = load i32, i32* %ptr_b
1736 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1737 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1738 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1739 ret < 16 x i32> %res
1742 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1743 ;CHECK-LABEL: test_mask_add_epi32_rmbkz
1744 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
1745 %q = load i32, i32* %ptr_b
1746 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1747 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1748 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1749 ret < 16 x i32> %res
1752 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1754 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1755 ;CHECK-LABEL: test_mask_sub_epi32_rr
1756 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
1757 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1758 ret < 16 x i32> %res
1761 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1762 ;CHECK-LABEL: test_mask_sub_epi32_rrk
1763 ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
1764 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1765 ret < 16 x i32> %res
1768 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1769 ;CHECK-LABEL: test_mask_sub_epi32_rrkz
1770 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
1771 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1772 ret < 16 x i32> %res
1775 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1776 ;CHECK-LABEL: test_mask_sub_epi32_rm
1777 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
1778 %b = load <16 x i32>, <16 x i32>* %ptr_b
1779 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1780 ret < 16 x i32> %res
1783 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1784 ;CHECK-LABEL: test_mask_sub_epi32_rmk
1785 ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
1786 %b = load <16 x i32>, <16 x i32>* %ptr_b
1787 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1788 ret < 16 x i32> %res
1791 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1792 ;CHECK-LABEL: test_mask_sub_epi32_rmkz
1793 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
1794 %b = load <16 x i32>, <16 x i32>* %ptr_b
1795 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1796 ret < 16 x i32> %res
1799 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1800 ;CHECK-LABEL: test_mask_sub_epi32_rmb
1801 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
1802 %q = load i32, i32* %ptr_b
1803 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1804 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1805 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1806 ret < 16 x i32> %res
1809 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1810 ;CHECK-LABEL: test_mask_sub_epi32_rmbk
1811 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
1812 %q = load i32, i32* %ptr_b
1813 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1814 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1815 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1816 ret < 16 x i32> %res
1819 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1820 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
1821 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
1822 %q = load i32, i32* %ptr_b
1823 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1824 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1825 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1826 ret < 16 x i32> %res
1829 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1831 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1832 ;CHECK-LABEL: test_mask_add_epi64_rr
1833 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
1834 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1838 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1839 ;CHECK-LABEL: test_mask_add_epi64_rrk
1840 ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
1841 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1845 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1846 ;CHECK-LABEL: test_mask_add_epi64_rrkz
1847 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
1848 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1852 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1853 ;CHECK-LABEL: test_mask_add_epi64_rm
1854 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
1855 %b = load <8 x i64>, <8 x i64>* %ptr_b
1856 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1860 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1861 ;CHECK-LABEL: test_mask_add_epi64_rmk
1862 ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
1863 %b = load <8 x i64>, <8 x i64>* %ptr_b
1864 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1868 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1869 ;CHECK-LABEL: test_mask_add_epi64_rmkz
1870 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
1871 %b = load <8 x i64>, <8 x i64>* %ptr_b
1872 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1876 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1877 ;CHECK-LABEL: test_mask_add_epi64_rmb
1878 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
1879 %q = load i64, i64* %ptr_b
1880 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1881 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1882 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1886 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1887 ;CHECK-LABEL: test_mask_add_epi64_rmbk
1888 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
1889 %q = load i64, i64* %ptr_b
1890 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1891 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1892 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1896 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1897 ;CHECK-LABEL: test_mask_add_epi64_rmbkz
1898 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
1899 %q = load i64, i64* %ptr_b
1900 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1901 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1902 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1906 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1908 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1909 ;CHECK-LABEL: test_mask_sub_epi64_rr
1910 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
1911 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1915 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1916 ;CHECK-LABEL: test_mask_sub_epi64_rrk
1917 ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
1918 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1922 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1923 ;CHECK-LABEL: test_mask_sub_epi64_rrkz
1924 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
1925 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1929 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1930 ;CHECK-LABEL: test_mask_sub_epi64_rm
1931 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
1932 %b = load <8 x i64>, <8 x i64>* %ptr_b
1933 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1937 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1938 ;CHECK-LABEL: test_mask_sub_epi64_rmk
1939 ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
1940 %b = load <8 x i64>, <8 x i64>* %ptr_b
1941 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1945 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1946 ;CHECK-LABEL: test_mask_sub_epi64_rmkz
1947 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
1948 %b = load <8 x i64>, <8 x i64>* %ptr_b
1949 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1953 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1954 ;CHECK-LABEL: test_mask_sub_epi64_rmb
1955 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
1956 %q = load i64, i64* %ptr_b
1957 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1958 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1959 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1963 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1964 ;CHECK-LABEL: test_mask_sub_epi64_rmbk
1965 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
1966 %q = load i64, i64* %ptr_b
1967 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1968 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1969 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1973 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1974 ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
1975 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
1976 %q = load i64, i64* %ptr_b
1977 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1978 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1979 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1983 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1985 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1986 ;CHECK-LABEL: test_mask_mul_epi32_rr
1987 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
1988 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
1992 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
1993 ;CHECK-LABEL: test_mask_mul_epi32_rrk
1994 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
1995 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
1999 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2000 ;CHECK-LABEL: test_mask_mul_epi32_rrkz
2001 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
2002 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2006 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2007 ;CHECK-LABEL: test_mask_mul_epi32_rm
2008 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
2009 %b = load <16 x i32>, <16 x i32>* %ptr_b
2010 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2014 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2015 ;CHECK-LABEL: test_mask_mul_epi32_rmk
2016 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
2017 %b = load <16 x i32>, <16 x i32>* %ptr_b
2018 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2022 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2023 ;CHECK-LABEL: test_mask_mul_epi32_rmkz
2024 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
2025 %b = load <16 x i32>, <16 x i32>* %ptr_b
2026 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2030 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
2031 ;CHECK-LABEL: test_mask_mul_epi32_rmb
2032 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
2033 %q = load i64, i64* %ptr_b
2034 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2035 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2036 %b = bitcast <8 x i64> %b64 to <16 x i32>
2037 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2041 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2042 ;CHECK-LABEL: test_mask_mul_epi32_rmbk
2043 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
2044 %q = load i64, i64* %ptr_b
2045 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2046 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2047 %b = bitcast <8 x i64> %b64 to <16 x i32>
2048 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2052 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2053 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
2054 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
2055 %q = load i64, i64* %ptr_b
2056 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2057 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2058 %b = bitcast <8 x i64> %b64 to <16 x i32>
2059 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2063 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2065 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
2066 ;CHECK-LABEL: test_mask_mul_epu32_rr
2067 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
2068 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2072 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2073 ;CHECK-LABEL: test_mask_mul_epu32_rrk
2074 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
2075 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2079 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2080 ;CHECK-LABEL: test_mask_mul_epu32_rrkz
2081 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
2082 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2086 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2087 ;CHECK-LABEL: test_mask_mul_epu32_rm
2088 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
2089 %b = load <16 x i32>, <16 x i32>* %ptr_b
2090 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2094 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2095 ;CHECK-LABEL: test_mask_mul_epu32_rmk
2096 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
2097 %b = load <16 x i32>, <16 x i32>* %ptr_b
2098 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2102 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2103 ;CHECK-LABEL: test_mask_mul_epu32_rmkz
2104 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
2105 %b = load <16 x i32>, <16 x i32>* %ptr_b
2106 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2110 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
2111 ;CHECK-LABEL: test_mask_mul_epu32_rmb
2112 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
2113 %q = load i64, i64* %ptr_b
2114 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2115 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2116 %b = bitcast <8 x i64> %b64 to <16 x i32>
2117 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2121 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2122 ;CHECK-LABEL: test_mask_mul_epu32_rmbk
2123 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
2124 %q = load i64, i64* %ptr_b
2125 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2126 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2127 %b = bitcast <8 x i64> %b64 to <16 x i32>
2128 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2132 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2133 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
2134 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
2135 %q = load i64, i64* %ptr_b
2136 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2137 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2138 %b = bitcast <8 x i64> %b64 to <16 x i32>
2139 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2143 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2145 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2146 ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
2147 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2148 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2152 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2153 ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
2154 ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2155 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2156 ret < 16 x i32> %res
2159 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2160 ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
2161 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2162 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2163 ret < 16 x i32> %res
2166 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2167 ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
2168 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2169 %b = load <16 x i32>, <16 x i32>* %ptr_b
2170 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2171 ret < 16 x i32> %res
2174 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2175 ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
2176 ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2177 %b = load <16 x i32>, <16 x i32>* %ptr_b
2178 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2179 ret < 16 x i32> %res
2182 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2183 ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
2184 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2185 %b = load <16 x i32>, <16 x i32>* %ptr_b
2186 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2187 ret < 16 x i32> %res
2190 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2191 ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
2192 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2193 %q = load i32, i32* %ptr_b
2194 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2195 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2196 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2197 ret < 16 x i32> %res
2200 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2201 ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
2202 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2203 %q = load i32, i32* %ptr_b
2204 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2205 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2206 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2207 ret < 16 x i32> %res
2210 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2211 ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
2212 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2213 %q = load i32, i32* %ptr_b
2214 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2215 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2216 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2217 ret < 16 x i32> %res
2220 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2222 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2223 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
2224 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2225 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2226 ret <16 x float> %res
2228 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2229 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
2230 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2231 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2232 ret <16 x float> %res
2234 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2235 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
2236 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2237 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2238 ret <16 x float> %res
2241 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2242 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
2243 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2244 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2245 ret <16 x float> %res
2249 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2250 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
2251 ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
2252 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2253 ret <16 x float> %res
2256 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2257 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
2258 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2259 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2260 ret <16 x float> %res
2262 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2263 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
2264 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2265 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2266 ret <16 x float> %res
2268 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2269 ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
2270 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2271 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2272 ret <16 x float> %res
2275 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2276 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
2277 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2278 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2279 ret <16 x float> %res
2283 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2284 ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
2285 ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
2286 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2287 ret <16 x float> %res
2291 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2292 ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
2293 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
2294 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2295 ret <16 x float> %res
2297 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2298 ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
2299 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2300 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2301 ret <16 x float> %res
2303 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2304 ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
2305 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
2306 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2307 ret <16 x float> %res
2310 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2311 ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
2312 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
2313 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2314 ret <16 x float> %res
2317 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2318 ;CHECK-LABEL: test_mm512_add_round_ps_current
2319 ;CHECK: vaddps %zmm1, %zmm0, %zmm0
2320 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2321 ret <16 x float> %res
2323 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2325 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2326 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
2327 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2328 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2329 ret <16 x float> %res
2331 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2332 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
2333 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2334 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2335 ret <16 x float> %res
2337 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2338 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
2339 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2340 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2341 ret <16 x float> %res
2344 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2345 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
2346 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2347 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2348 ret <16 x float> %res
2352 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2353 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
2354 ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
2355 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2356 ret <16 x float> %res
2359 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2360 ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
2361 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2362 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2363 ret <16 x float> %res
2365 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2366 ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
2367 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2368 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2369 ret <16 x float> %res
2371 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2372 ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
2373 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2374 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2375 ret <16 x float> %res
2378 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2379 ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
2380 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2381 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2382 ret <16 x float> %res
2385 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2386 ;CHECK-LABEL: test_mm512_sub_round_ps_current
2387 ;CHECK: vsubps %zmm1, %zmm0, %zmm0
2388 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2389 ret <16 x float> %res
2392 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2393 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
2394 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2395 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2396 ret <16 x float> %res
2398 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2399 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
2400 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2401 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2402 ret <16 x float> %res
2404 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2405 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
2406 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2407 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2408 ret <16 x float> %res
2411 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2412 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
2413 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2414 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2415 ret <16 x float> %res
2419 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2420 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
2421 ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
2422 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2423 ret <16 x float> %res
2426 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2427 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
2428 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2429 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2430 ret <16 x float> %res
2432 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2433 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
2434 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2435 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2436 ret <16 x float> %res
2438 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2439 ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
2440 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2441 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2442 ret <16 x float> %res
2445 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2446 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
2447 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2448 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2449 ret <16 x float> %res
2453 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2454 ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
2455 ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
2456 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2457 ret <16 x float> %res
2461 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2462 ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
2463 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
2464 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2465 ret <16 x float> %res
2467 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2468 ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
2469 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2470 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2471 ret <16 x float> %res
2473 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2474 ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
2475 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
2476 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2477 ret <16 x float> %res
2480 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2481 ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
2482 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
2483 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2484 ret <16 x float> %res
2487 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2488 ;CHECK-LABEL: test_mm512_div_round_ps_current
2489 ;CHECK: vdivps %zmm1, %zmm0, %zmm0
2490 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2491 ret <16 x float> %res
2493 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2495 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2496 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
2497 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2498 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2499 ret <16 x float> %res
2502 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2503 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
2504 ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
2505 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2506 ret <16 x float> %res
2509 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2510 ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
2511 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2512 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2513 ret <16 x float> %res
2516 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2517 ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
2518 ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
2519 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2520 ret <16 x float> %res
2523 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2524 ;CHECK-LABEL: test_mm512_min_round_ps_sae
2525 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
2526 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2527 ret <16 x float> %res
2530 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2531 ;CHECK-LABEL: test_mm512_min_round_ps_current
2532 ;CHECK: vminps %zmm1, %zmm0, %zmm0
2533 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2534 ret <16 x float> %res
2536 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2538 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2539 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
2540 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2541 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2542 ret <16 x float> %res
2545 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2546 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
2547 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
2548 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2549 ret <16 x float> %res
2552 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2553 ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
2554 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2555 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2556 ret <16 x float> %res
2559 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2560 ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
2561 ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
2562 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2563 ret <16 x float> %res
2566 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2567 ;CHECK-LABEL: test_mm512_max_round_ps_sae
2568 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
2569 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2570 ret <16 x float> %res
2573 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2574 ;CHECK-LABEL: test_mm512_max_round_ps_current
2575 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
2576 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2577 ret <16 x float> %res
2579 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2581 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2583 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2584 ; CHECK-LABEL: test_mask_add_ss_rn
2585 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2586 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
2587 ret <4 x float> %res
2590 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2591 ; CHECK-LABEL: test_mask_add_ss_rd
2592 ; CHECK: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2593 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
2594 ret <4 x float> %res
2597 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2598 ; CHECK-LABEL: test_mask_add_ss_ru
2599 ; CHECK: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2600 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
2601 ret <4 x float> %res
2604 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2605 ; CHECK-LABEL: test_mask_add_ss_rz
2606 ; CHECK: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2607 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
2608 ret <4 x float> %res
2611 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2612 ; CHECK-LABEL: test_mask_add_ss_current
2613 ; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
2614 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2615 ret <4 x float> %res
2618 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2619 ; CHECK-LABEL: test_maskz_add_ss_rn
2620 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2621 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
2622 ret <4 x float> %res
2625 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
2626 ; CHECK-LABEL: test_add_ss_rn
2627 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
2628 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
2629 ret <4 x float> %res
2632 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2634 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2635 ; CHECK-LABEL: test_mask_add_sd_rn
2636 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2637 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
2638 ret <2 x double> %res
2641 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2642 ; CHECK-LABEL: test_mask_add_sd_rd
2643 ; CHECK: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2644 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
2645 ret <2 x double> %res
2648 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2649 ; CHECK-LABEL: test_mask_add_sd_ru
2650 ; CHECK: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2651 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
2652 ret <2 x double> %res
2655 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2656 ; CHECK-LABEL: test_mask_add_sd_rz
2657 ; CHECK: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2658 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
2659 ret <2 x double> %res
2662 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2663 ; CHECK-LABEL: test_mask_add_sd_current
2664 ; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
2665 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2666 ret <2 x double> %res
2669 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2670 ; CHECK-LABEL: test_maskz_add_sd_rn
2671 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2672 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
2673 ret <2 x double> %res
2676 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
2677 ; CHECK-LABEL: test_add_sd_rn
2678 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
2679 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
2680 ret <2 x double> %res
2683 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2685 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2686 ; CHECK-LABEL: test_mask_max_ss_sae
2687 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2688 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
2689 ret <4 x float> %res
2692 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2693 ; CHECK-LABEL: test_maskz_max_ss_sae
2694 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2695 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
2696 ret <4 x float> %res
2699 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
2700 ; CHECK-LABEL: test_max_ss_sae
2701 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0
2702 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
2703 ret <4 x float> %res
2706 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2707 ; CHECK-LABEL: test_mask_max_ss
2708 ; CHECK: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
2709 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2710 ret <4 x float> %res
2713 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2714 ; CHECK-LABEL: test_maskz_max_ss
2715 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
2716 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
2717 ret <4 x float> %res
2720 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
2721 ; CHECK-LABEL: test_max_ss
2722 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0
2723 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
2724 ret <4 x float> %res
2726 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2728 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2729 ; CHECK-LABEL: test_mask_max_sd_sae
2730 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2731 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
2732 ret <2 x double> %res
2735 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2736 ; CHECK-LABEL: test_maskz_max_sd_sae
2737 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2738 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
2739 ret <2 x double> %res
2742 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
2743 ; CHECK-LABEL: test_max_sd_sae
2744 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
2745 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
2746 ret <2 x double> %res
2749 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2750 ; CHECK-LABEL: test_mask_max_sd
2751 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
2752 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2753 ret <2 x double> %res
2756 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2757 ; CHECK-LABEL: test_maskz_max_sd
2758 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
2759 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
2760 ret <2 x double> %res
2763 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
2764 ; CHECK-LABEL: test_max_sd
2765 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0
2766 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
2767 ret <2 x double> %res
2770 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
2771 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
2773 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
2775 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
2776 ret <2 x double> %res
2778 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
2780 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
2781 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
2783 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
2785 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
2786 ret <2 x double> %res
2788 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
2790 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
2791 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
2793 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
2795 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
2796 ret <4 x float> %res
2798 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
2800 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
2801 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
2803 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
2805 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
2806 ret <4 x float> %res
2808 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
2810 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
2811 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
2813 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
2816 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2817 ret <4 x float> %res
2820 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
2821 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
2823 ; CHECK-NEXT: movl (%rdi), %eax
2824 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
2827 %b = load i32, i32* %ptr
2828 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2829 ret <4 x float> %res
2832 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
2833 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
2835 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
2838 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2839 ret <4 x float> %res
2842 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
2843 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
2845 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
2848 %b = load i32, i32* %ptr
2849 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2850 ret <4 x float> %res
2852 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
2854 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
2855 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
2857 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
2860 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
2861 ret <4 x float> %res
2864 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
2865 ; CHECK-LABEL: _mm_cvtu64_ss:
2867 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
2870 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
2871 ret <4 x float> %res
2873 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
2875 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
2876 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
2878 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
2881 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
2882 ret <2 x double> %res
2884 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
2886 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
2887 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
2889 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
2892 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
2893 ret <2 x double> %res
2896 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
2897 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
2899 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
2902 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
2903 ret <2 x double> %res
2905 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
2907 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
2908 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
2909 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
2910 <8 x i64>zeroinitializer, i8 -1)
2913 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2915 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
2916 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
2917 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
2918 <16 x i32>zeroinitializer, i16 -1)
2921 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2923 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
2924 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
2925 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
2926 <16 x i32>zeroinitializer, i16 -1)
2929 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2931 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512
2933 ; CHECK: vpmaxsd %zmm
2935 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2936 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2937 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2938 %res2 = add <16 x i32> %res, %res1
2939 ret <16 x i32> %res2
2942 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512
2944 ; CHECK: vpmaxsq %zmm
2946 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2947 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2948 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2949 %res2 = add <8 x i64> %res, %res1
2953 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2955 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512
2957 ; CHECK: vpmaxud %zmm
2959 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2960 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2961 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2962 %res2 = add <16 x i32> %res, %res1
2963 ret <16 x i32> %res2
2966 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2968 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512
2970 ; CHECK: vpmaxuq %zmm
2972 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2973 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2974 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2975 %res2 = add <8 x i64> %res, %res1
2979 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2981 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512
2983 ; CHECK: vpminsd %zmm
2985 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2986 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2987 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2988 %res2 = add <16 x i32> %res, %res1
2989 ret <16 x i32> %res2
2992 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2994 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512
2996 ; CHECK: vpminsq %zmm
2998 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2999 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3000 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3001 %res2 = add <8 x i64> %res, %res1
3005 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512
3007 ; CHECK: vpminud %zmm
3009 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3010 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3011 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3012 %res2 = add <16 x i32> %res, %res1
3013 ret <16 x i32> %res2
3016 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3018 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512
3020 ; CHECK: vpminuq %zmm
3022 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3023 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3024 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3025 %res2 = add <8 x i64> %res, %res1
3029 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3031 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
3034 ; CHECK: vpermi2d {{.*}}{%k1}
3035 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3036 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3037 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3038 %res2 = add <16 x i32> %res, %res1
3039 ret <16 x i32> %res2
3042 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
3044 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
3047 ; CHECK: vpermi2pd {{.*}}{%k1}
3048 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
3049 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
3050 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
3051 %res2 = fadd <8 x double> %res, %res1
3052 ret <8 x double> %res2
3055 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
3057 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
3060 ; CHECK: vpermi2ps {{.*}}{%k1}
3061 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
3062 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
3063 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
3064 %res2 = fadd <16 x float> %res, %res1
3065 ret <16 x float> %res2
3068 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3070 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
3073 ; CHECK: vpermi2q {{.*}}{%k1}
3074 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3075 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3076 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3077 %res2 = add <8 x i64> %res, %res1
3081 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3083 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
3086 ; CHECK: vpermt2d {{.*}}{%k1} {z}
3087 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3088 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3089 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3090 %res2 = add <16 x i32> %res, %res1
3091 ret <16 x i32> %res2
3094 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
3096 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
3099 ; CHECK: vpermt2pd {{.*}}{%k1} {z}
3100 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3101 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3102 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3103 %res2 = fadd <8 x double> %res, %res1
3104 ret <8 x double> %res2
3107 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
3109 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
3112 ; CHECK: vpermt2ps {{.*}}{%k1} {z}
3113 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3114 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3115 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3116 %res2 = fadd <16 x float> %res, %res1
3117 ret <16 x float> %res2
3121 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3123 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
3126 ; CHECK: vpermt2q {{.*}}{%k1} {z}
3127 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3128 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3129 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3130 %res2 = add <8 x i64> %res, %res1
3134 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3136 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
3139 ; CHECK: vpermt2d {{.*}}{%k1}
3141 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3142 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3143 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3144 %res2 = add <16 x i32> %res, %res1
3145 ret <16 x i32> %res2