1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
101 ; CHECK-LABEL: test_sqrt_pd_512
103 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
104 ret <8 x double> %res
106 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
108 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 ; CHECK-LABEL: test_sqrt_ps_512
111 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
112 ret <16 x float> %res
114 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
115 ; CHECK-LABEL: test_sqrt_round_ps_512
116 ; CHECK: vsqrtps {rz-sae}
117 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
118 ret <16 x float> %res
120 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
122 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
123 ; CHECK-LABEL: test_getexp_pd_512
125 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
126 ret <8 x double> %res
128 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
129 ; CHECK-LABEL: test_getexp_round_pd_512
130 ; CHECK: vgetexppd {sae}
131 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
132 ret <8 x double> %res
134 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
136 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
137 ; CHECK-LABEL: test_getexp_ps_512
139 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
140 ret <16 x float> %res
143 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
144 ; CHECK-LABEL: test_getexp_round_ps_512
145 ; CHECK: vgetexpps {sae}
146 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
147 ret <16 x float> %res
149 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
151 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
152 ; CHECK: vsqrtss {{.*}}encoding: [0x62
153 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
156 declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
158 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
159 ; CHECK: vsqrtsd {{.*}}encoding: [0x62
160 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
161 ret <2 x double> %res
163 declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
165 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
166 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
167 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
170 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
172 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
173 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
174 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
175 ret <2 x double> %res
177 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
179 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
180 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
181 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
184 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
187 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
188 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
189 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
192 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
195 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
196 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
197 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
200 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
203 define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
204 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
205 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
208 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
210 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
211 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
212 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
215 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
217 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
218 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
219 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
220 ret <16 x float> %res
222 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
225 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
226 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
227 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
231 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
233 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
234 ; CHECK: vbroadcastss
235 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
236 ret <16 x float> %res
238 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
240 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
241 ; CHECK: vbroadcastsd
242 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
243 ret <8 x double> %res
245 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
247 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
248 ; CHECK: vbroadcastss
249 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
250 ret <16 x float> %res
252 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
254 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
255 ; CHECK: vbroadcastsd
256 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
257 ret <8 x double> %res
259 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
261 define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
262 ; CHECK: vpbroadcastd
263 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
266 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
268 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
269 ; CHECK: vpbroadcastd
270 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
273 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
275 define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
276 ; CHECK: vpbroadcastq
277 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
280 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
282 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
283 ; CHECK: vpbroadcastq
284 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
287 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
289 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
290 ; CHECK: movw $-1, %ax
293 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
297 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
299 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
300 ; CHECK: movb $-1, %al
303 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
307 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
309 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
311 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
315 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
317 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
321 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
322 ; CHECK: movw $-1, %ax
325 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
329 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
331 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
332 ; CHECK: movb $-1, %al
335 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
339 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
342 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
344 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
348 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
350 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
354 define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
355 ; CHECK-LABEL: test_ctlz_d
357 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
361 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
363 define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
364 ; CHECK-LABEL: test_ctlz_q
366 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
370 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
372 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
373 ; CHECK: vblendmps %zmm1, %zmm0
374 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
375 ret <16 x float> %res
378 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
380 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
381 ; CHECK: vblendmpd %zmm1, %zmm0
382 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
383 ret <8 x double> %res
386 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
387 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
388 ; CHECK: vblendmpd (%
389 %b = load <8 x double>, <8 x double>* %ptr
390 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
391 ret <8 x double> %res
393 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
395 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
397 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
400 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
402 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
404 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
407 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
409 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) {
410 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0]
411 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2)
414 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
416 define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
417 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
418 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
421 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
423 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
424 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
425 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
428 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
430 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
431 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
432 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
435 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
438 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
439 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0]
440 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
443 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
445 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) {
446 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0]
447 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
450 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
452 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) {
453 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
454 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
457 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
459 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) {
460 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
461 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
464 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
467 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
469 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
470 <8 x double>zeroinitializer, i8 -1, i32 4)
471 ret <8 x double> %res
473 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
474 <8 x double>, i8, i32)
476 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
478 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
479 <8 x double>zeroinitializer, i8 -1, i32 4)
480 ret <8 x double> %res
482 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
483 <8 x double>, i8, i32)
485 define <8 x float> @test_cvtpd2ps(<8 x double> %a) {
486 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0]
487 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1)
490 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
492 define <16 x i32> @test_pabsd(<16 x i32> %a) {
493 ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
494 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1)
497 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
499 define <8 x i64> @test_pabsq(<8 x i64> %a) {
500 ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
501 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1)
504 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
506 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
507 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
508 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
511 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
513 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
514 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
515 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
518 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
520 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
521 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
522 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
526 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
528 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
529 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
530 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
534 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
536 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
537 ; CHECK-LABEL: test_mask_store_aligned_ps:
539 ; CHECK-NEXT: kmovw %esi, %k1
540 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
542 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
546 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
548 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
549 ; CHECK-LABEL: test_mask_store_aligned_pd:
551 ; CHECK-NEXT: kmovw %esi, %k1
552 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
554 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
558 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
560 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
561 ; CHECK-LABEL: test_maskz_load_aligned_ps:
563 ; CHECK-NEXT: kmovw %esi, %k1
564 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
566 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
567 ret <16 x float> %res
570 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
572 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
573 ; CHECK-LABEL: test_maskz_load_aligned_pd:
575 ; CHECK-NEXT: kmovw %esi, %k1
576 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
578 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
579 ret <8 x double> %res
582 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
584 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
585 ; CHECK-LABEL: test_load_aligned_ps:
587 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
589 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
590 ret <16 x float> %res
593 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
594 ; CHECK-LABEL: test_load_aligned_pd:
596 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
598 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
599 ret <8 x double> %res
602 define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
603 ; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
604 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1)
605 ret <16 x float> %res
608 define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) {
609 ; CHECK-LABEL: test_vpermt2ps_mask:
610 ; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1]
611 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask)
612 ret <16 x float> %res
615 declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
617 define <8 x i64> @test_vmovntdqa(i8 *%x) {
618 ; CHECK-LABEL: test_vmovntdqa:
619 ; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
620 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x)
624 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
626 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
627 ; CHECK-LABEL: test_valign_q:
628 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
629 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
633 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
634 ; CHECK-LABEL: test_mask_valign_q:
635 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
636 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
640 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
642 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
643 ; CHECK-LABEL: test_maskz_valign_d:
644 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
645 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
649 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
651 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
652 ; CHECK-LABEL: test_mask_store_ss
653 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
654 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
658 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
660 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
661 ; CHECK-LABEL: test_pcmpeq_d
662 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
663 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
667 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
668 ; CHECK-LABEL: test_mask_pcmpeq_d
669 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
670 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
674 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
676 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
677 ; CHECK-LABEL: test_pcmpeq_q
678 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
679 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
683 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
684 ; CHECK-LABEL: test_mask_pcmpeq_q
685 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
686 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
690 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
692 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
693 ; CHECK-LABEL: test_pcmpgt_d
694 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
695 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
699 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
700 ; CHECK-LABEL: test_mask_pcmpgt_d
701 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
702 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
706 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
708 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
709 ; CHECK-LABEL: test_pcmpgt_q
710 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
711 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
715 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
716 ; CHECK-LABEL: test_mask_pcmpgt_q
717 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
718 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
722 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
724 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
725 ; CHECK_LABEL: test_cmp_d_512
726 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
727 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
728 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
729 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
730 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
731 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
732 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
733 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
734 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
735 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
736 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
737 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
738 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
739 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
740 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
741 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
742 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
743 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
744 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
745 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
746 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
747 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
748 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
749 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
753 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
754 ; CHECK_LABEL: test_mask_cmp_d_512
755 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
756 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
757 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
758 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
759 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
760 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
761 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
762 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
763 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
764 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
765 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
766 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
767 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
768 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
769 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
770 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
771 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
772 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
773 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
774 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
775 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
776 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
777 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
778 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
782 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
784 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
785 ; CHECK_LABEL: test_ucmp_d_512
786 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
787 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
788 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
789 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
790 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
791 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
792 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
793 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
794 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
795 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
796 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
797 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
798 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
799 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
800 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
801 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
802 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
803 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
804 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
805 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
806 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
807 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
808 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
809 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
813 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
814 ; CHECK_LABEL: test_mask_ucmp_d_512
815 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
816 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
817 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
818 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
819 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
820 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
821 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
822 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
823 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
824 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
825 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
826 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
827 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
828 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
829 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
830 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
831 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
832 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
833 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
834 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
835 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
836 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
837 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
838 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
842 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
844 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
845 ; CHECK_LABEL: test_cmp_q_512
846 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
847 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
848 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
849 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
850 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
851 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
852 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
853 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
854 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
855 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
856 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
857 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
858 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
859 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
860 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
861 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
862 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
863 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
864 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
865 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
866 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
867 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
868 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
869 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
873 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
874 ; CHECK_LABEL: test_mask_cmp_q_512
875 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
876 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
877 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
878 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
879 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
880 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
881 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
882 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
883 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
884 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
885 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
886 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
887 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
888 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
889 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
890 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
891 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
892 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
893 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
894 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
895 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
896 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
897 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
898 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
902 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
904 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
905 ; CHECK_LABEL: test_ucmp_q_512
906 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
907 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
908 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
909 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
910 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
911 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
912 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
913 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
914 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
915 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
916 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
917 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
918 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
919 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
920 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
921 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
922 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
923 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
924 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
925 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
926 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
927 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
928 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
929 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
933 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
934 ; CHECK_LABEL: test_mask_ucmp_q_512
935 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
936 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
937 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
938 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
939 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
940 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
941 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
942 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
943 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
944 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
945 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
946 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
947 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
948 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
949 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
950 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
951 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
952 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
953 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
954 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
955 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
956 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
957 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
958 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
962 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
964 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
965 ; CHECK-LABEL: test_mask_vextractf32x4:
966 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
967 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
971 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
973 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
974 ; CHECK-LABEL: test_mask_vextracti64x4:
975 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
976 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
980 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
982 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
983 ; CHECK-LABEL: test_maskz_vextracti32x4:
984 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
985 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
989 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
991 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
992 ; CHECK-LABEL: test_vextractf64x4:
993 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
994 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
995 ret <4 x double> %res
998 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
1000 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1001 ; CHECK-LABEL: test_x86_avx512_pslli_d
1003 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1007 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1008 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
1009 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
1010 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1014 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1015 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
1016 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1017 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1021 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1023 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1024 ; CHECK-LABEL: test_x86_avx512_pslli_q
1026 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1030 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1031 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
1032 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
1033 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1037 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1038 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
1039 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1040 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1044 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1046 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1047 ; CHECK-LABEL: test_x86_avx512_psrli_d
1049 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1053 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1054 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1055 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1056 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1060 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1061 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1062 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1063 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1067 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1069 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1070 ; CHECK-LABEL: test_x86_avx512_psrli_q
1072 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1076 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1077 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1078 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1079 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1083 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1084 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1085 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1086 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1090 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1092 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1093 ; CHECK-LABEL: test_x86_avx512_psrai_d
1095 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1099 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1100 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1101 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1102 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1106 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1107 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1108 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1109 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1113 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1115 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1116 ; CHECK-LABEL: test_x86_avx512_psrai_q
1118 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1122 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1123 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1124 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1125 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1129 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1130 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1131 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1132 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1136 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1138 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1139 ; CHECK-LABEL: test_x86_avx512_psll_d
1141 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1145 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1146 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1147 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1148 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1152 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1153 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1154 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1155 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1159 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1161 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1162 ; CHECK-LABEL: test_x86_avx512_psll_q
1164 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1168 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1169 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1170 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1171 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1175 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1176 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1177 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1178 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1182 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1184 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1185 ; CHECK-LABEL: test_x86_avx512_psrl_d
1187 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1191 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1192 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1193 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1194 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1198 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1199 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1200 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1201 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1205 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1207 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1208 ; CHECK-LABEL: test_x86_avx512_psrl_q
1210 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1214 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1215 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1216 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1217 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1221 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1222 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1223 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1224 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1228 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1230 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1231 ; CHECK-LABEL: test_x86_avx512_psra_d
1233 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1237 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1238 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1239 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1240 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1244 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1245 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1246 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1247 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1251 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1253 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1254 ; CHECK-LABEL: test_x86_avx512_psra_q
1256 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1260 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1261 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1262 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1263 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1267 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1268 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1269 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1270 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1274 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1276 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1277 ; CHECK-LABEL: test_x86_avx512_psllv_d
1279 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1283 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1284 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1285 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1286 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1290 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1291 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1292 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1293 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1297 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1299 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1300 ; CHECK-LABEL: test_x86_avx512_psllv_q
1302 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1306 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1307 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1308 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1309 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1313 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1314 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1315 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1316 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1320 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1323 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1324 ; CHECK-LABEL: test_x86_avx512_psrav_d
1326 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1330 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1331 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1332 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1333 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1337 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1338 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1339 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1340 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1344 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1346 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1347 ; CHECK-LABEL: test_x86_avx512_psrav_q
1349 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1353 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1354 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1355 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1356 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1360 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1361 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1362 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1363 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1367 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1369 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1370 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1372 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1376 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1377 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1378 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1379 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1383 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1384 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1385 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1386 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1390 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1392 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1393 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1395 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1399 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1400 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1401 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1402 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1406 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1407 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1408 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1409 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1413 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1415 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1416 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1418 %b = load <8 x i64>, <8 x i64>* %ptr
1419 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1423 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1424 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1425 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1427 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1428 ; CHECK-LABEL: test_vsubps_rn
1429 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
1430 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1431 <16 x float> zeroinitializer, i16 -1, i32 0)
1432 ret <16 x float> %res
1435 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1436 ; CHECK-LABEL: test_vsubps_rd
1437 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
1438 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1439 <16 x float> zeroinitializer, i16 -1, i32 1)
1440 ret <16 x float> %res
1443 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1444 ; CHECK-LABEL: test_vsubps_ru
1445 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
1446 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1447 <16 x float> zeroinitializer, i16 -1, i32 2)
1448 ret <16 x float> %res
1451 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1452 ; CHECK-LABEL: test_vsubps_rz
1453 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
1454 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1455 <16 x float> zeroinitializer, i16 -1, i32 3)
1456 ret <16 x float> %res
1459 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1460 ; CHECK-LABEL: test_vmulps_rn
1461 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
1462 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1463 <16 x float> zeroinitializer, i16 -1, i32 0)
1464 ret <16 x float> %res
1467 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1468 ; CHECK-LABEL: test_vmulps_rd
1469 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
1470 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1471 <16 x float> zeroinitializer, i16 -1, i32 1)
1472 ret <16 x float> %res
1475 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1476 ; CHECK-LABEL: test_vmulps_ru
1477 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
1478 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1479 <16 x float> zeroinitializer, i16 -1, i32 2)
1480 ret <16 x float> %res
1483 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1484 ; CHECK-LABEL: test_vmulps_rz
1485 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
1486 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1487 <16 x float> zeroinitializer, i16 -1, i32 3)
1488 ret <16 x float> %res
1492 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1493 ; CHECK-LABEL: test_vmulps_mask_rn
1494 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
1495 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1496 <16 x float> zeroinitializer, i16 %mask, i32 0)
1497 ret <16 x float> %res
1500 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1501 ; CHECK-LABEL: test_vmulps_mask_rd
1502 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
1503 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1504 <16 x float> zeroinitializer, i16 %mask, i32 1)
1505 ret <16 x float> %res
1508 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1509 ; CHECK-LABEL: test_vmulps_mask_ru
1510 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
1511 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1512 <16 x float> zeroinitializer, i16 %mask, i32 2)
1513 ret <16 x float> %res
1516 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1517 ; CHECK-LABEL: test_vmulps_mask_rz
1518 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
1519 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1520 <16 x float> zeroinitializer, i16 %mask, i32 3)
1521 ret <16 x float> %res
1524 ;; With Passthru value
1525 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1526 ; CHECK-LABEL: test_vmulps_mask_passthru_rn
1527 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
1528 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1529 <16 x float> %passthru, i16 %mask, i32 0)
1530 ret <16 x float> %res
1533 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1534 ; CHECK-LABEL: test_vmulps_mask_passthru_rd
1535 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
1536 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1537 <16 x float> %passthru, i16 %mask, i32 1)
1538 ret <16 x float> %res
1541 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1542 ; CHECK-LABEL: test_vmulps_mask_passthru_ru
1543 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
1544 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1545 <16 x float> %passthru, i16 %mask, i32 2)
1546 ret <16 x float> %res
1549 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1550 ; CHECK-LABEL: test_vmulps_mask_passthru_rz
1551 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
1552 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1553 <16 x float> %passthru, i16 %mask, i32 3)
1554 ret <16 x float> %res
1558 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1559 ; CHECK-LABEL: test_vmulpd_mask_rn
1560 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
1561 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1562 <8 x double> zeroinitializer, i8 %mask, i32 0)
1563 ret <8 x double> %res
1566 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1567 ; CHECK-LABEL: test_vmulpd_mask_rd
1568 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
1569 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1570 <8 x double> zeroinitializer, i8 %mask, i32 1)
1571 ret <8 x double> %res
1574 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1575 ; CHECK-LABEL: test_vmulpd_mask_ru
1576 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
1577 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1578 <8 x double> zeroinitializer, i8 %mask, i32 2)
1579 ret <8 x double> %res
1582 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1583 ; CHECK-LABEL: test_vmulpd_mask_rz
1584 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
1585 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1586 <8 x double> zeroinitializer, i8 %mask, i32 3)
1587 ret <8 x double> %res
1590 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1591 ;CHECK-LABEL: test_xor_epi32
1592 ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1593 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1594 ret < 16 x i32> %res
1597 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1598 ;CHECK-LABEL: test_mask_xor_epi32
1599 ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1600 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1601 ret < 16 x i32> %res
1604 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1606 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1607 ;CHECK-LABEL: test_or_epi32
1608 ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1609 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1610 ret < 16 x i32> %res
1613 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1614 ;CHECK-LABEL: test_mask_or_epi32
1615 ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1616 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1617 ret < 16 x i32> %res
1620 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1622 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1623 ;CHECK-LABEL: test_and_epi32
1624 ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1625 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1626 ret < 16 x i32> %res
1629 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1630 ;CHECK-LABEL: test_mask_and_epi32
1631 ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1632 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1633 ret < 16 x i32> %res
1636 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1638 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1639 ;CHECK-LABEL: test_xor_epi64
1640 ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1641 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1645 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1646 ;CHECK-LABEL: test_mask_xor_epi64
1647 ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1648 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1652 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1654 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1655 ;CHECK-LABEL: test_or_epi64
1656 ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1657 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1661 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1662 ;CHECK-LABEL: test_mask_or_epi64
1663 ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1664 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1668 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1670 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
1671 ;CHECK-LABEL: test_and_epi64
1672 ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
1673 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1677 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1678 ;CHECK-LABEL: test_mask_and_epi64
1679 ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1680 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1684 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1687 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1688 ;CHECK-LABEL: test_mask_add_epi32_rr
1689 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
1690 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1691 ret < 16 x i32> %res
1694 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1695 ;CHECK-LABEL: test_mask_add_epi32_rrk
1696 ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1697 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1698 ret < 16 x i32> %res
1701 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1702 ;CHECK-LABEL: test_mask_add_epi32_rrkz
1703 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1704 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1705 ret < 16 x i32> %res
1708 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1709 ;CHECK-LABEL: test_mask_add_epi32_rm
1710 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
1711 %b = load <16 x i32>, <16 x i32>* %ptr_b
1712 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1713 ret < 16 x i32> %res
1716 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1717 ;CHECK-LABEL: test_mask_add_epi32_rmk
1718 ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
1719 %b = load <16 x i32>, <16 x i32>* %ptr_b
1720 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1721 ret < 16 x i32> %res
1724 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1725 ;CHECK-LABEL: test_mask_add_epi32_rmkz
1726 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
1727 %b = load <16 x i32>, <16 x i32>* %ptr_b
1728 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1729 ret < 16 x i32> %res
1732 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1733 ;CHECK-LABEL: test_mask_add_epi32_rmb
1734 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
1735 %q = load i32, i32* %ptr_b
1736 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1737 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1738 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1739 ret < 16 x i32> %res
1742 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1743 ;CHECK-LABEL: test_mask_add_epi32_rmbk
1744 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
1745 %q = load i32, i32* %ptr_b
1746 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1747 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1748 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1749 ret < 16 x i32> %res
1752 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1753 ;CHECK-LABEL: test_mask_add_epi32_rmbkz
1754 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
1755 %q = load i32, i32* %ptr_b
1756 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1757 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1758 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1759 ret < 16 x i32> %res
1762 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1764 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1765 ;CHECK-LABEL: test_mask_sub_epi32_rr
1766 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
1767 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1768 ret < 16 x i32> %res
1771 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1772 ;CHECK-LABEL: test_mask_sub_epi32_rrk
1773 ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
1774 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1775 ret < 16 x i32> %res
1778 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1779 ;CHECK-LABEL: test_mask_sub_epi32_rrkz
1780 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
1781 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1782 ret < 16 x i32> %res
1785 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1786 ;CHECK-LABEL: test_mask_sub_epi32_rm
1787 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
1788 %b = load <16 x i32>, <16 x i32>* %ptr_b
1789 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1790 ret < 16 x i32> %res
1793 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1794 ;CHECK-LABEL: test_mask_sub_epi32_rmk
1795 ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
1796 %b = load <16 x i32>, <16 x i32>* %ptr_b
1797 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1798 ret < 16 x i32> %res
1801 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1802 ;CHECK-LABEL: test_mask_sub_epi32_rmkz
1803 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
1804 %b = load <16 x i32>, <16 x i32>* %ptr_b
1805 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1806 ret < 16 x i32> %res
1809 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1810 ;CHECK-LABEL: test_mask_sub_epi32_rmb
1811 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
1812 %q = load i32, i32* %ptr_b
1813 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1814 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1815 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1816 ret < 16 x i32> %res
1819 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1820 ;CHECK-LABEL: test_mask_sub_epi32_rmbk
1821 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
1822 %q = load i32, i32* %ptr_b
1823 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1824 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1825 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1826 ret < 16 x i32> %res
1829 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1830 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
1831 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
1832 %q = load i32, i32* %ptr_b
1833 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1834 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1835 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1836 ret < 16 x i32> %res
1839 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1841 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1842 ;CHECK-LABEL: test_mask_add_epi64_rr
1843 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
1844 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1848 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1849 ;CHECK-LABEL: test_mask_add_epi64_rrk
1850 ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
1851 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1855 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1856 ;CHECK-LABEL: test_mask_add_epi64_rrkz
1857 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
1858 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1862 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1863 ;CHECK-LABEL: test_mask_add_epi64_rm
1864 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
1865 %b = load <8 x i64>, <8 x i64>* %ptr_b
1866 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1870 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1871 ;CHECK-LABEL: test_mask_add_epi64_rmk
1872 ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
1873 %b = load <8 x i64>, <8 x i64>* %ptr_b
1874 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1878 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1879 ;CHECK-LABEL: test_mask_add_epi64_rmkz
1880 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
1881 %b = load <8 x i64>, <8 x i64>* %ptr_b
1882 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1886 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1887 ;CHECK-LABEL: test_mask_add_epi64_rmb
1888 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
1889 %q = load i64, i64* %ptr_b
1890 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1891 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1892 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1896 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1897 ;CHECK-LABEL: test_mask_add_epi64_rmbk
1898 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
1899 %q = load i64, i64* %ptr_b
1900 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1901 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1902 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1906 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1907 ;CHECK-LABEL: test_mask_add_epi64_rmbkz
1908 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
1909 %q = load i64, i64* %ptr_b
1910 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1911 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1912 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1916 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1918 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1919 ;CHECK-LABEL: test_mask_sub_epi64_rr
1920 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
1921 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1925 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1926 ;CHECK-LABEL: test_mask_sub_epi64_rrk
1927 ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
1928 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1932 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1933 ;CHECK-LABEL: test_mask_sub_epi64_rrkz
1934 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
1935 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1939 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1940 ;CHECK-LABEL: test_mask_sub_epi64_rm
1941 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
1942 %b = load <8 x i64>, <8 x i64>* %ptr_b
1943 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1947 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1948 ;CHECK-LABEL: test_mask_sub_epi64_rmk
1949 ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
1950 %b = load <8 x i64>, <8 x i64>* %ptr_b
1951 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1955 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1956 ;CHECK-LABEL: test_mask_sub_epi64_rmkz
1957 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
1958 %b = load <8 x i64>, <8 x i64>* %ptr_b
1959 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1963 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1964 ;CHECK-LABEL: test_mask_sub_epi64_rmb
1965 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
1966 %q = load i64, i64* %ptr_b
1967 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1968 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1969 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1973 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1974 ;CHECK-LABEL: test_mask_sub_epi64_rmbk
1975 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
1976 %q = load i64, i64* %ptr_b
1977 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1978 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1979 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1983 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1984 ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
1985 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
1986 %q = load i64, i64* %ptr_b
1987 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1988 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1989 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1993 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1995 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1996 ;CHECK-LABEL: test_mask_mul_epi32_rr
1997 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
1998 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2002 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2003 ;CHECK-LABEL: test_mask_mul_epi32_rrk
2004 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
2005 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2009 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2010 ;CHECK-LABEL: test_mask_mul_epi32_rrkz
2011 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
2012 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2016 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2017 ;CHECK-LABEL: test_mask_mul_epi32_rm
2018 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
2019 %b = load <16 x i32>, <16 x i32>* %ptr_b
2020 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2024 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2025 ;CHECK-LABEL: test_mask_mul_epi32_rmk
2026 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
2027 %b = load <16 x i32>, <16 x i32>* %ptr_b
2028 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2032 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2033 ;CHECK-LABEL: test_mask_mul_epi32_rmkz
2034 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
2035 %b = load <16 x i32>, <16 x i32>* %ptr_b
2036 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2040 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
2041 ;CHECK-LABEL: test_mask_mul_epi32_rmb
2042 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
2043 %q = load i64, i64* %ptr_b
2044 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2045 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2046 %b = bitcast <8 x i64> %b64 to <16 x i32>
2047 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2051 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2052 ;CHECK-LABEL: test_mask_mul_epi32_rmbk
2053 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
2054 %q = load i64, i64* %ptr_b
2055 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2056 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2057 %b = bitcast <8 x i64> %b64 to <16 x i32>
2058 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2062 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2063 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
2064 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
2065 %q = load i64, i64* %ptr_b
2066 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2067 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2068 %b = bitcast <8 x i64> %b64 to <16 x i32>
2069 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2073 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2075 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
2076 ;CHECK-LABEL: test_mask_mul_epu32_rr
2077 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
2078 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2082 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2083 ;CHECK-LABEL: test_mask_mul_epu32_rrk
2084 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
2085 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2089 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2090 ;CHECK-LABEL: test_mask_mul_epu32_rrkz
2091 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
2092 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2096 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2097 ;CHECK-LABEL: test_mask_mul_epu32_rm
2098 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
2099 %b = load <16 x i32>, <16 x i32>* %ptr_b
2100 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2104 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2105 ;CHECK-LABEL: test_mask_mul_epu32_rmk
2106 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
2107 %b = load <16 x i32>, <16 x i32>* %ptr_b
2108 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2112 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2113 ;CHECK-LABEL: test_mask_mul_epu32_rmkz
2114 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
2115 %b = load <16 x i32>, <16 x i32>* %ptr_b
2116 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2120 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
2121 ;CHECK-LABEL: test_mask_mul_epu32_rmb
2122 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
2123 %q = load i64, i64* %ptr_b
2124 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2125 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2126 %b = bitcast <8 x i64> %b64 to <16 x i32>
2127 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2131 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2132 ;CHECK-LABEL: test_mask_mul_epu32_rmbk
2133 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
2134 %q = load i64, i64* %ptr_b
2135 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2136 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2137 %b = bitcast <8 x i64> %b64 to <16 x i32>
2138 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2142 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2143 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
2144 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
2145 %q = load i64, i64* %ptr_b
2146 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2147 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2148 %b = bitcast <8 x i64> %b64 to <16 x i32>
2149 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2153 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2155 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2156 ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
2157 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2158 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2162 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2163 ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
2164 ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2165 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2166 ret < 16 x i32> %res
2169 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2170 ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
2171 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2172 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2173 ret < 16 x i32> %res
2176 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2177 ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
2178 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2179 %b = load <16 x i32>, <16 x i32>* %ptr_b
2180 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2181 ret < 16 x i32> %res
2184 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2185 ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
2186 ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2187 %b = load <16 x i32>, <16 x i32>* %ptr_b
2188 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2189 ret < 16 x i32> %res
2192 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2193 ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
2194 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2195 %b = load <16 x i32>, <16 x i32>* %ptr_b
2196 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2197 ret < 16 x i32> %res
2200 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2201 ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
2202 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2203 %q = load i32, i32* %ptr_b
2204 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2205 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2206 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2207 ret < 16 x i32> %res
2210 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2211 ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
2212 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2213 %q = load i32, i32* %ptr_b
2214 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2215 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2216 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2217 ret < 16 x i32> %res
2220 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2221 ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
2222 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2223 %q = load i32, i32* %ptr_b
2224 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2225 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2226 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2227 ret < 16 x i32> %res
2230 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2232 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2233 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
2234 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2235 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2236 ret <16 x float> %res
2238 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2239 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
2240 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2241 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2242 ret <16 x float> %res
2244 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2245 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
2246 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2247 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2248 ret <16 x float> %res
2251 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2252 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
2253 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2254 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2255 ret <16 x float> %res
2259 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2260 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
2261 ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
2262 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2263 ret <16 x float> %res
2266 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2267 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
2268 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2269 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2270 ret <16 x float> %res
2272 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2273 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
2274 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2275 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2276 ret <16 x float> %res
2278 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2279 ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
2280 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2281 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2282 ret <16 x float> %res
2285 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2286 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
2287 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2288 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2289 ret <16 x float> %res
2293 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2294 ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
2295 ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
2296 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2297 ret <16 x float> %res
2301 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2302 ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
2303 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
2304 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2305 ret <16 x float> %res
2307 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2308 ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
2309 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2310 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2311 ret <16 x float> %res
2313 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2314 ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
2315 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
2316 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2317 ret <16 x float> %res
2320 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2321 ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
2322 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
2323 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2324 ret <16 x float> %res
2327 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2328 ;CHECK-LABEL: test_mm512_add_round_ps_current
2329 ;CHECK: vaddps %zmm1, %zmm0, %zmm0
2330 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2331 ret <16 x float> %res
2333 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2335 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2336 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
2337 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2338 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2339 ret <16 x float> %res
2341 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2342 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
2343 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2344 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2345 ret <16 x float> %res
2347 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2348 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
2349 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2350 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2351 ret <16 x float> %res
2354 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2355 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
2356 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2357 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2358 ret <16 x float> %res
2362 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2363 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
2364 ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
2365 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2366 ret <16 x float> %res
2369 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2370 ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
2371 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2372 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2373 ret <16 x float> %res
2375 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2376 ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
2377 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2378 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2379 ret <16 x float> %res
2381 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2382 ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
2383 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2384 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2385 ret <16 x float> %res
2388 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2389 ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
2390 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2391 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2392 ret <16 x float> %res
2395 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2396 ;CHECK-LABEL: test_mm512_sub_round_ps_current
2397 ;CHECK: vsubps %zmm1, %zmm0, %zmm0
2398 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2399 ret <16 x float> %res
2402 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2403 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
2404 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2405 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2406 ret <16 x float> %res
2408 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2409 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
2410 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2411 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2412 ret <16 x float> %res
2414 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2415 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
2416 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2417 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2418 ret <16 x float> %res
2421 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2422 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
2423 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2424 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2425 ret <16 x float> %res
2429 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2430 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
2431 ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
2432 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2433 ret <16 x float> %res
2436 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2437 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
2438 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2439 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2440 ret <16 x float> %res
2442 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2443 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
2444 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2445 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2446 ret <16 x float> %res
2448 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2449 ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
2450 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2451 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2452 ret <16 x float> %res
2455 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2456 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
2457 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2458 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2459 ret <16 x float> %res
2463 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2464 ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
2465 ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
2466 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2467 ret <16 x float> %res
2471 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2472 ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
2473 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
2474 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2475 ret <16 x float> %res
2477 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2478 ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
2479 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2480 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2481 ret <16 x float> %res
2483 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2484 ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
2485 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
2486 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2487 ret <16 x float> %res
2490 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2491 ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
2492 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
2493 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2494 ret <16 x float> %res
2497 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2498 ;CHECK-LABEL: test_mm512_div_round_ps_current
2499 ;CHECK: vdivps %zmm1, %zmm0, %zmm0
2500 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2501 ret <16 x float> %res
2503 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2505 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2506 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
2507 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2508 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2509 ret <16 x float> %res
2512 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2513 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
2514 ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
2515 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2516 ret <16 x float> %res
2519 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2520 ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
2521 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2522 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2523 ret <16 x float> %res
2526 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2527 ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
2528 ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
2529 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2530 ret <16 x float> %res
2533 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2534 ;CHECK-LABEL: test_mm512_min_round_ps_sae
2535 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
2536 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2537 ret <16 x float> %res
2540 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2541 ;CHECK-LABEL: test_mm512_min_round_ps_current
2542 ;CHECK: vminps %zmm1, %zmm0, %zmm0
2543 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2544 ret <16 x float> %res
2546 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2548 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2549 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
2550 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2551 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2552 ret <16 x float> %res
2555 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2556 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
2557 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
2558 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2559 ret <16 x float> %res
2562 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2563 ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
2564 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2565 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2566 ret <16 x float> %res
2569 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2570 ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
2571 ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
2572 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2573 ret <16 x float> %res
2576 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2577 ;CHECK-LABEL: test_mm512_max_round_ps_sae
2578 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
2579 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2580 ret <16 x float> %res
2583 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2584 ;CHECK-LABEL: test_mm512_max_round_ps_current
2585 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
2586 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2587 ret <16 x float> %res
2589 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2591 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2593 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2594 ; CHECK-LABEL: test_mask_add_ss_rn
2595 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2596 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
2597 ret <4 x float> %res
2600 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2601 ; CHECK-LABEL: test_mask_add_ss_rd
2602 ; CHECK: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2603 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
2604 ret <4 x float> %res
2607 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2608 ; CHECK-LABEL: test_mask_add_ss_ru
2609 ; CHECK: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2610 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
2611 ret <4 x float> %res
2614 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2615 ; CHECK-LABEL: test_mask_add_ss_rz
2616 ; CHECK: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2617 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
2618 ret <4 x float> %res
2621 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2622 ; CHECK-LABEL: test_mask_add_ss_current
2623 ; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
2624 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2625 ret <4 x float> %res
2628 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2629 ; CHECK-LABEL: test_maskz_add_ss_rn
2630 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2631 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
2632 ret <4 x float> %res
2635 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
2636 ; CHECK-LABEL: test_add_ss_rn
2637 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
2638 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
2639 ret <4 x float> %res
2642 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2644 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2645 ; CHECK-LABEL: test_mask_add_sd_rn
2646 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2647 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
2648 ret <2 x double> %res
2651 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2652 ; CHECK-LABEL: test_mask_add_sd_rd
2653 ; CHECK: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2654 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
2655 ret <2 x double> %res
2658 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2659 ; CHECK-LABEL: test_mask_add_sd_ru
2660 ; CHECK: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2661 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
2662 ret <2 x double> %res
2665 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2666 ; CHECK-LABEL: test_mask_add_sd_rz
2667 ; CHECK: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2668 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
2669 ret <2 x double> %res
2672 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2673 ; CHECK-LABEL: test_mask_add_sd_current
2674 ; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
2675 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2676 ret <2 x double> %res
2679 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2680 ; CHECK-LABEL: test_maskz_add_sd_rn
2681 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2682 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
2683 ret <2 x double> %res
2686 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
2687 ; CHECK-LABEL: test_add_sd_rn
2688 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
2689 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
2690 ret <2 x double> %res
2693 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2695 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2696 ; CHECK-LABEL: test_mask_max_ss_sae
2697 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2698 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
2699 ret <4 x float> %res
2702 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2703 ; CHECK-LABEL: test_maskz_max_ss_sae
2704 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2705 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
2706 ret <4 x float> %res
2709 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
2710 ; CHECK-LABEL: test_max_ss_sae
2711 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0
2712 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
2713 ret <4 x float> %res
2716 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2717 ; CHECK-LABEL: test_mask_max_ss
2718 ; CHECK: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
2719 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2720 ret <4 x float> %res
2723 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2724 ; CHECK-LABEL: test_maskz_max_ss
2725 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
2726 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
2727 ret <4 x float> %res
2730 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
2731 ; CHECK-LABEL: test_max_ss
2732 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0
2733 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
2734 ret <4 x float> %res
2736 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2738 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2739 ; CHECK-LABEL: test_mask_max_sd_sae
2740 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2741 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
2742 ret <2 x double> %res
2745 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2746 ; CHECK-LABEL: test_maskz_max_sd_sae
2747 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2748 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
2749 ret <2 x double> %res
2752 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
2753 ; CHECK-LABEL: test_max_sd_sae
2754 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
2755 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
2756 ret <2 x double> %res
2759 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2760 ; CHECK-LABEL: test_mask_max_sd
2761 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
2762 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2763 ret <2 x double> %res
2766 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2767 ; CHECK-LABEL: test_maskz_max_sd
2768 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
2769 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
2770 ret <2 x double> %res
2773 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
2774 ; CHECK-LABEL: test_max_sd
2775 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0
2776 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
2777 ret <2 x double> %res
2780 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
2781 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
2783 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
2785 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
2786 ret <2 x double> %res
2788 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
2790 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
2791 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
2793 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
2795 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
2796 ret <2 x double> %res
2798 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
2800 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
2801 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
2803 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
2805 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
2806 ret <4 x float> %res
2808 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
2810 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
2811 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
2813 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
2815 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
2816 ret <4 x float> %res
2818 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
2820 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
2821 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
2823 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
2826 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2827 ret <4 x float> %res
2830 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
2831 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
2833 ; CHECK-NEXT: movl (%rdi), %eax
2834 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
2837 %b = load i32, i32* %ptr
2838 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2839 ret <4 x float> %res
2842 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
2843 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
2845 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
2848 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2849 ret <4 x float> %res
2852 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
2853 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
2855 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
2858 %b = load i32, i32* %ptr
2859 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2860 ret <4 x float> %res
2862 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
2864 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
2865 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
2867 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
2870 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
2871 ret <4 x float> %res
2874 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
2875 ; CHECK-LABEL: _mm_cvtu64_ss:
2877 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
2880 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
2881 ret <4 x float> %res
2883 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
2885 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
2886 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
2888 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
2891 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
2892 ret <2 x double> %res
2894 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
2896 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
2897 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
2899 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
2902 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
2903 ret <2 x double> %res
2906 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
2907 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
2909 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
2912 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
2913 ret <2 x double> %res
2915 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
2917 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
2918 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
2919 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
2920 <8 x i64>zeroinitializer, i8 -1)
2923 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2925 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
2926 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
2927 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
2928 <16 x i32>zeroinitializer, i16 -1)
2931 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2933 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
2934 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
2935 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
2936 <16 x i32>zeroinitializer, i16 -1)
2939 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2941 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512
2943 ; CHECK: vpmaxsd %zmm
2945 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2946 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2947 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2948 %res2 = add <16 x i32> %res, %res1
2949 ret <16 x i32> %res2
2952 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512
2954 ; CHECK: vpmaxsq %zmm
2956 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2957 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2958 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2959 %res2 = add <8 x i64> %res, %res1
2963 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2965 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512
2967 ; CHECK: vpmaxud %zmm
2969 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2970 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2971 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2972 %res2 = add <16 x i32> %res, %res1
2973 ret <16 x i32> %res2
2976 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2978 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512
2980 ; CHECK: vpmaxuq %zmm
2982 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2983 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2984 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2985 %res2 = add <8 x i64> %res, %res1
2989 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2991 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512
2993 ; CHECK: vpminsd %zmm
2995 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2996 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2997 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2998 %res2 = add <16 x i32> %res, %res1
2999 ret <16 x i32> %res2
3002 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3004 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512
3006 ; CHECK: vpminsq %zmm
3008 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3009 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3010 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3011 %res2 = add <8 x i64> %res, %res1
3015 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512
3017 ; CHECK: vpminud %zmm
3019 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3020 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3021 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3022 %res2 = add <16 x i32> %res, %res1
3023 ret <16 x i32> %res2
3026 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3028 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512
3030 ; CHECK: vpminuq %zmm
3032 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3033 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3034 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3035 %res2 = add <8 x i64> %res, %res1