1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
101 ; CHECK-LABEL: test_sqrt_pd_512
103 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
104 ret <8 x double> %res
106 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
108 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 ; CHECK-LABEL: test_sqrt_ps_512
111 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
112 ret <16 x float> %res
114 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
115 ; CHECK-LABEL: test_sqrt_round_ps_512
116 ; CHECK: vsqrtps {rz-sae}
117 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
118 ret <16 x float> %res
120 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
122 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
123 ; CHECK-LABEL: test_getexp_pd_512
125 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
126 ret <8 x double> %res
128 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
129 ; CHECK-LABEL: test_getexp_round_pd_512
130 ; CHECK: vgetexppd {sae}
131 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
132 ret <8 x double> %res
134 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
136 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
137 ; CHECK-LABEL: test_getexp_ps_512
139 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
140 ret <16 x float> %res
143 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
144 ; CHECK-LABEL: test_getexp_round_ps_512
145 ; CHECK: vgetexpps {sae}
146 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
147 ret <16 x float> %res
149 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
151 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
152 ; CHECK: vsqrtss {{.*}}encoding: [0x62
153 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
156 declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
158 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
159 ; CHECK: vsqrtsd {{.*}}encoding: [0x62
160 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
161 ret <2 x double> %res
163 declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
165 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
166 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
167 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
170 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
172 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
173 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
174 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
175 ret <2 x double> %res
177 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
179 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
180 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
181 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
184 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
187 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
188 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
189 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
192 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
195 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
196 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
197 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
200 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
203 define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
204 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
205 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
208 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
210 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
211 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
212 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
215 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
217 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
218 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
219 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
220 ret <16 x float> %res
222 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
225 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
226 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
227 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
231 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
233 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
234 ; CHECK: vbroadcastss
235 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
236 ret <16 x float> %res
238 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
240 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
241 ; CHECK: vbroadcastsd
242 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
243 ret <8 x double> %res
245 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
247 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
248 ; CHECK: vbroadcastss
249 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
250 ret <16 x float> %res
252 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
254 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
255 ; CHECK: vbroadcastsd
256 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
257 ret <8 x double> %res
259 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
261 define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
262 ; CHECK: vpbroadcastd
263 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
266 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
268 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
269 ; CHECK: vpbroadcastd
270 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
273 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
275 define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
276 ; CHECK: vpbroadcastq
277 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
280 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
282 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
283 ; CHECK: vpbroadcastq
284 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
287 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
289 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
290 ; CHECK: movw $-1, %ax
293 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
297 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
299 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
300 ; CHECK: movb $-1, %al
303 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
307 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
309 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
311 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
315 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
317 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
321 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
322 ; CHECK: movw $-1, %ax
325 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
329 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
331 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
332 ; CHECK: movb $-1, %al
335 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
339 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
342 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
344 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
348 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
350 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
354 define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
355 ; CHECK-LABEL: test_ctlz_d
357 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
361 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
363 define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
364 ; CHECK-LABEL: test_ctlz_q
366 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
370 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
372 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
373 ; CHECK: vblendmps %zmm1, %zmm0
374 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
375 ret <16 x float> %res
378 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
380 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
381 ; CHECK: vblendmpd %zmm1, %zmm0
382 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
383 ret <8 x double> %res
386 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
387 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
388 ; CHECK: vblendmpd (%
389 %b = load <8 x double>, <8 x double>* %ptr
390 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
391 ret <8 x double> %res
393 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
395 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
397 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
400 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
402 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
404 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
407 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
409 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) {
410 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0]
411 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2)
414 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
416 define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
417 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
418 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
421 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
423 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
424 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
425 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
428 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
430 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
431 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
432 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
435 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
438 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
439 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0]
440 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
443 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
445 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) {
446 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0]
447 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
450 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
452 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) {
453 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
454 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
457 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
459 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) {
460 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
461 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
464 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
467 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
469 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
470 <8 x double>zeroinitializer, i8 -1, i32 4)
471 ret <8 x double> %res
473 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
474 <8 x double>, i8, i32)
476 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
478 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
479 <8 x double>zeroinitializer, i8 -1, i32 4)
480 ret <8 x double> %res
482 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
483 <8 x double>, i8, i32)
485 define <8 x float> @test_cvtpd2ps(<8 x double> %a) {
486 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0]
487 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1)
490 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
492 define <16 x i32> @test_pabsd(<16 x i32> %a) {
493 ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
494 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1)
497 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
499 define <8 x i64> @test_pabsq(<8 x i64> %a) {
500 ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
501 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1)
504 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
506 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
507 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
508 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
511 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
513 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
514 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
515 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
518 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
520 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
521 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
522 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
526 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
528 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
529 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
530 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
534 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
536 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
537 ; CHECK-LABEL: test_mask_store_aligned_ps:
539 ; CHECK-NEXT: kmovw %esi, %k1
540 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
542 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
546 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
548 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
549 ; CHECK-LABEL: test_mask_store_aligned_pd:
551 ; CHECK-NEXT: kmovw %esi, %k1
552 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
554 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
558 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
560 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
561 ; CHECK-LABEL: test_maskz_load_aligned_ps:
563 ; CHECK-NEXT: kmovw %esi, %k1
564 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
566 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
567 ret <16 x float> %res
570 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
572 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
573 ; CHECK-LABEL: test_maskz_load_aligned_pd:
575 ; CHECK-NEXT: kmovw %esi, %k1
576 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
578 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
579 ret <8 x double> %res
582 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
584 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
585 ; CHECK-LABEL: test_load_aligned_ps:
587 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
589 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
590 ret <16 x float> %res
593 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
594 ; CHECK-LABEL: test_load_aligned_pd:
596 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
598 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
599 ret <8 x double> %res
602 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
604 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
605 ; CHECK-LABEL: test_valign_q:
606 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
607 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
611 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
612 ; CHECK-LABEL: test_mask_valign_q:
613 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
614 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
618 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
620 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
621 ; CHECK-LABEL: test_maskz_valign_d:
622 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
623 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
627 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
629 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
630 ; CHECK-LABEL: test_mask_store_ss
631 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
632 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
636 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
638 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
639 ; CHECK-LABEL: test_pcmpeq_d
640 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
641 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
645 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
646 ; CHECK-LABEL: test_mask_pcmpeq_d
647 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
648 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
652 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
654 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
655 ; CHECK-LABEL: test_pcmpeq_q
656 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
657 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
661 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
662 ; CHECK-LABEL: test_mask_pcmpeq_q
663 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
664 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
668 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
670 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
671 ; CHECK-LABEL: test_pcmpgt_d
672 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
673 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
677 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
678 ; CHECK-LABEL: test_mask_pcmpgt_d
679 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
680 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
684 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
686 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
687 ; CHECK-LABEL: test_pcmpgt_q
688 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
689 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
693 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
694 ; CHECK-LABEL: test_mask_pcmpgt_q
695 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
696 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
700 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
702 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
703 ; CHECK_LABEL: test_cmp_d_512
704 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
705 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
706 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
707 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
708 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
709 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
710 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
711 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
712 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
713 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
714 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
715 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
716 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
717 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
718 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
719 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
720 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
721 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
722 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
723 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
724 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
725 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
726 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
727 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
731 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
732 ; CHECK_LABEL: test_mask_cmp_d_512
733 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
734 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
735 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
736 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
737 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
738 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
739 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
740 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
741 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
742 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
743 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
744 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
745 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
746 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
747 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
748 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
749 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
750 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
751 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
752 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
753 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
754 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
755 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
756 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
760 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
762 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
763 ; CHECK_LABEL: test_ucmp_d_512
764 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
765 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
766 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
767 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
768 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
769 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
770 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
771 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
772 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
773 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
774 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
775 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
776 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
777 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
778 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
779 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
780 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
781 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
782 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
783 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
784 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
785 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
786 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
787 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
791 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
792 ; CHECK_LABEL: test_mask_ucmp_d_512
793 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
794 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
795 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
796 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
797 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
798 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
799 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
800 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
801 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
802 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
803 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
804 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
805 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
806 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
807 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
808 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
809 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
810 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
811 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
812 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
813 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
814 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
815 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
816 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
820 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
822 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
823 ; CHECK_LABEL: test_cmp_q_512
824 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
825 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
826 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
827 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
828 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
829 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
830 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
831 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
832 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
833 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
834 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
835 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
836 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
837 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
838 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
839 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
840 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
841 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
842 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
843 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
844 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
845 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
846 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
847 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
851 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
852 ; CHECK_LABEL: test_mask_cmp_q_512
853 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
854 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
855 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
856 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
857 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
858 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
859 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
860 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
861 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
862 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
863 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
864 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
865 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
866 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
867 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
868 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
869 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
870 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
871 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
872 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
873 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
874 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
875 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
876 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
880 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
882 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
883 ; CHECK_LABEL: test_ucmp_q_512
884 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
885 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
886 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
887 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
888 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
889 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
890 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
891 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
892 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
893 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
894 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
895 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
896 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
897 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
898 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
899 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
900 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
901 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
902 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
903 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
904 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
905 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
906 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
907 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
911 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
912 ; CHECK_LABEL: test_mask_ucmp_q_512
913 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
914 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
915 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
916 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
917 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
918 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
919 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
920 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
921 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
922 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
923 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
924 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
925 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
926 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
927 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
928 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
929 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
930 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
931 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
932 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
933 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
934 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
935 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
936 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
940 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
942 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
943 ; CHECK-LABEL: test_mask_vextractf32x4:
944 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
945 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
949 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
951 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
952 ; CHECK-LABEL: test_mask_vextracti64x4:
953 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
954 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
958 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
960 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
961 ; CHECK-LABEL: test_maskz_vextracti32x4:
962 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
963 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
967 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
969 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
970 ; CHECK-LABEL: test_vextractf64x4:
971 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
972 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
973 ret <4 x double> %res
976 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
978 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
979 ; CHECK-LABEL: test_x86_avx512_pslli_d
981 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
985 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
986 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
987 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
988 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
992 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
993 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
994 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
995 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
999 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1001 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1002 ; CHECK-LABEL: test_x86_avx512_pslli_q
1004 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1008 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1009 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
1010 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
1011 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1015 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1016 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
1017 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1018 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1022 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1024 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1025 ; CHECK-LABEL: test_x86_avx512_psrli_d
1027 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1031 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1032 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1033 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1034 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1038 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1039 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1040 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1041 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1045 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1047 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1048 ; CHECK-LABEL: test_x86_avx512_psrli_q
1050 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1054 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1055 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1056 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1057 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1061 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1062 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1063 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1064 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1068 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1070 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1071 ; CHECK-LABEL: test_x86_avx512_psrai_d
1073 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1077 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1078 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1079 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1080 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1084 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1085 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1086 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1087 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1091 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1093 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1094 ; CHECK-LABEL: test_x86_avx512_psrai_q
1096 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1100 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1101 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1102 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1103 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1107 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1108 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1109 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1110 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1114 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1116 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1117 ; CHECK-LABEL: test_x86_avx512_psll_d
1119 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1123 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1124 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1125 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1126 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1130 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1131 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1132 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1133 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1137 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1139 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1140 ; CHECK-LABEL: test_x86_avx512_psll_q
1142 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1146 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1147 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1148 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1149 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1153 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1154 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1155 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1156 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1160 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1162 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1163 ; CHECK-LABEL: test_x86_avx512_psrl_d
1165 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1169 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1170 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1171 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1172 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1176 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1177 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1178 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1179 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1183 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1185 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1186 ; CHECK-LABEL: test_x86_avx512_psrl_q
1188 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1192 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1193 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1194 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1195 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1199 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1200 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1201 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1202 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1206 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1208 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1209 ; CHECK-LABEL: test_x86_avx512_psra_d
1211 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1215 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1216 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1217 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1218 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1222 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1223 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1224 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1225 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1229 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1231 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1232 ; CHECK-LABEL: test_x86_avx512_psra_q
1234 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1238 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1239 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1240 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1241 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1245 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1246 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1247 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1248 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1252 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1254 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1255 ; CHECK-LABEL: test_x86_avx512_psllv_d
1257 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1261 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1262 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1263 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1264 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1268 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1269 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1270 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1271 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1275 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1277 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1278 ; CHECK-LABEL: test_x86_avx512_psllv_q
1280 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1284 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1285 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1286 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1287 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1291 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1292 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1293 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1294 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1298 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1301 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1302 ; CHECK-LABEL: test_x86_avx512_psrav_d
1304 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1308 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1309 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1310 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1311 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1315 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1316 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1317 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1318 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1322 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1324 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1325 ; CHECK-LABEL: test_x86_avx512_psrav_q
1327 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1331 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1332 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1333 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1334 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1338 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1339 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1340 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1341 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1345 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1347 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1348 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1350 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1354 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1355 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1356 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1357 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1361 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1362 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1363 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1364 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1368 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1370 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1371 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1373 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1377 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1378 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1379 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1380 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1384 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1385 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1386 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1387 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1391 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1393 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1394 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1396 %b = load <8 x i64>, <8 x i64>* %ptr
1397 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1401 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1402 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1403 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1405 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1406 ; CHECK-LABEL: test_vsubps_rn
1407 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
1408 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1409 <16 x float> zeroinitializer, i16 -1, i32 0)
1410 ret <16 x float> %res
1413 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1414 ; CHECK-LABEL: test_vsubps_rd
1415 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
1416 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1417 <16 x float> zeroinitializer, i16 -1, i32 1)
1418 ret <16 x float> %res
1421 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1422 ; CHECK-LABEL: test_vsubps_ru
1423 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
1424 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1425 <16 x float> zeroinitializer, i16 -1, i32 2)
1426 ret <16 x float> %res
1429 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1430 ; CHECK-LABEL: test_vsubps_rz
1431 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
1432 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1433 <16 x float> zeroinitializer, i16 -1, i32 3)
1434 ret <16 x float> %res
1437 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1438 ; CHECK-LABEL: test_vmulps_rn
1439 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
1440 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1441 <16 x float> zeroinitializer, i16 -1, i32 0)
1442 ret <16 x float> %res
1445 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1446 ; CHECK-LABEL: test_vmulps_rd
1447 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
1448 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1449 <16 x float> zeroinitializer, i16 -1, i32 1)
1450 ret <16 x float> %res
1453 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1454 ; CHECK-LABEL: test_vmulps_ru
1455 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
1456 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1457 <16 x float> zeroinitializer, i16 -1, i32 2)
1458 ret <16 x float> %res
1461 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1462 ; CHECK-LABEL: test_vmulps_rz
1463 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
1464 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1465 <16 x float> zeroinitializer, i16 -1, i32 3)
1466 ret <16 x float> %res
1470 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1471 ; CHECK-LABEL: test_vmulps_mask_rn
1472 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
1473 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1474 <16 x float> zeroinitializer, i16 %mask, i32 0)
1475 ret <16 x float> %res
1478 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1479 ; CHECK-LABEL: test_vmulps_mask_rd
1480 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
1481 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1482 <16 x float> zeroinitializer, i16 %mask, i32 1)
1483 ret <16 x float> %res
1486 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1487 ; CHECK-LABEL: test_vmulps_mask_ru
1488 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
1489 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1490 <16 x float> zeroinitializer, i16 %mask, i32 2)
1491 ret <16 x float> %res
1494 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1495 ; CHECK-LABEL: test_vmulps_mask_rz
1496 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
1497 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1498 <16 x float> zeroinitializer, i16 %mask, i32 3)
1499 ret <16 x float> %res
1502 ;; With Passthru value
1503 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1504 ; CHECK-LABEL: test_vmulps_mask_passthru_rn
1505 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
1506 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1507 <16 x float> %passthru, i16 %mask, i32 0)
1508 ret <16 x float> %res
1511 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1512 ; CHECK-LABEL: test_vmulps_mask_passthru_rd
1513 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
1514 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1515 <16 x float> %passthru, i16 %mask, i32 1)
1516 ret <16 x float> %res
1519 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1520 ; CHECK-LABEL: test_vmulps_mask_passthru_ru
1521 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
1522 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1523 <16 x float> %passthru, i16 %mask, i32 2)
1524 ret <16 x float> %res
1527 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1528 ; CHECK-LABEL: test_vmulps_mask_passthru_rz
1529 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
1530 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1531 <16 x float> %passthru, i16 %mask, i32 3)
1532 ret <16 x float> %res
1536 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1537 ; CHECK-LABEL: test_vmulpd_mask_rn
1538 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
1539 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1540 <8 x double> zeroinitializer, i8 %mask, i32 0)
1541 ret <8 x double> %res
1544 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1545 ; CHECK-LABEL: test_vmulpd_mask_rd
1546 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
1547 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1548 <8 x double> zeroinitializer, i8 %mask, i32 1)
1549 ret <8 x double> %res
1552 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1553 ; CHECK-LABEL: test_vmulpd_mask_ru
1554 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
1555 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1556 <8 x double> zeroinitializer, i8 %mask, i32 2)
1557 ret <8 x double> %res
1560 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1561 ; CHECK-LABEL: test_vmulpd_mask_rz
1562 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
1563 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1564 <8 x double> zeroinitializer, i8 %mask, i32 3)
1565 ret <8 x double> %res
1568 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1569 ;CHECK-LABEL: test_xor_epi32
1570 ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1571 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1572 ret < 16 x i32> %res
1575 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1576 ;CHECK-LABEL: test_mask_xor_epi32
1577 ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1578 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1579 ret < 16 x i32> %res
1582 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1584 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1585 ;CHECK-LABEL: test_or_epi32
1586 ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1587 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1588 ret < 16 x i32> %res
1591 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1592 ;CHECK-LABEL: test_mask_or_epi32
1593 ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1594 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1595 ret < 16 x i32> %res
1598 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1600 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1601 ;CHECK-LABEL: test_and_epi32
1602 ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1603 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1604 ret < 16 x i32> %res
1607 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1608 ;CHECK-LABEL: test_mask_and_epi32
1609 ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1610 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1611 ret < 16 x i32> %res
1614 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1616 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1617 ;CHECK-LABEL: test_xor_epi64
1618 ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1619 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1623 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1624 ;CHECK-LABEL: test_mask_xor_epi64
1625 ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1626 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1630 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1632 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1633 ;CHECK-LABEL: test_or_epi64
1634 ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1635 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1639 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1640 ;CHECK-LABEL: test_mask_or_epi64
1641 ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1642 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1646 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1648 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
1649 ;CHECK-LABEL: test_and_epi64
1650 ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
1651 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1655 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1656 ;CHECK-LABEL: test_mask_and_epi64
1657 ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1658 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1662 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1665 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1666 ;CHECK-LABEL: test_mask_add_epi32_rr
1667 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
1668 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1669 ret < 16 x i32> %res
1672 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1673 ;CHECK-LABEL: test_mask_add_epi32_rrk
1674 ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1675 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1676 ret < 16 x i32> %res
1679 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1680 ;CHECK-LABEL: test_mask_add_epi32_rrkz
1681 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1682 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1683 ret < 16 x i32> %res
1686 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1687 ;CHECK-LABEL: test_mask_add_epi32_rm
1688 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
1689 %b = load <16 x i32>, <16 x i32>* %ptr_b
1690 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1691 ret < 16 x i32> %res
1694 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1695 ;CHECK-LABEL: test_mask_add_epi32_rmk
1696 ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
1697 %b = load <16 x i32>, <16 x i32>* %ptr_b
1698 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1699 ret < 16 x i32> %res
1702 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1703 ;CHECK-LABEL: test_mask_add_epi32_rmkz
1704 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
1705 %b = load <16 x i32>, <16 x i32>* %ptr_b
1706 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1707 ret < 16 x i32> %res
1710 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1711 ;CHECK-LABEL: test_mask_add_epi32_rmb
1712 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
1713 %q = load i32, i32* %ptr_b
1714 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1715 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1716 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1717 ret < 16 x i32> %res
1720 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1721 ;CHECK-LABEL: test_mask_add_epi32_rmbk
1722 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
1723 %q = load i32, i32* %ptr_b
1724 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1725 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1726 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1727 ret < 16 x i32> %res
1730 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1731 ;CHECK-LABEL: test_mask_add_epi32_rmbkz
1732 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
1733 %q = load i32, i32* %ptr_b
1734 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1735 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1736 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1737 ret < 16 x i32> %res
1740 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1742 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1743 ;CHECK-LABEL: test_mask_sub_epi32_rr
1744 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
1745 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1746 ret < 16 x i32> %res
1749 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1750 ;CHECK-LABEL: test_mask_sub_epi32_rrk
1751 ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
1752 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1753 ret < 16 x i32> %res
1756 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1757 ;CHECK-LABEL: test_mask_sub_epi32_rrkz
1758 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
1759 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1760 ret < 16 x i32> %res
1763 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1764 ;CHECK-LABEL: test_mask_sub_epi32_rm
1765 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
1766 %b = load <16 x i32>, <16 x i32>* %ptr_b
1767 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1768 ret < 16 x i32> %res
1771 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1772 ;CHECK-LABEL: test_mask_sub_epi32_rmk
1773 ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
1774 %b = load <16 x i32>, <16 x i32>* %ptr_b
1775 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1776 ret < 16 x i32> %res
1779 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1780 ;CHECK-LABEL: test_mask_sub_epi32_rmkz
1781 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
1782 %b = load <16 x i32>, <16 x i32>* %ptr_b
1783 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1784 ret < 16 x i32> %res
1787 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1788 ;CHECK-LABEL: test_mask_sub_epi32_rmb
1789 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
1790 %q = load i32, i32* %ptr_b
1791 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1792 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1793 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1794 ret < 16 x i32> %res
1797 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1798 ;CHECK-LABEL: test_mask_sub_epi32_rmbk
1799 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
1800 %q = load i32, i32* %ptr_b
1801 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1802 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1803 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1804 ret < 16 x i32> %res
1807 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1808 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
1809 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
1810 %q = load i32, i32* %ptr_b
1811 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1812 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1813 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1814 ret < 16 x i32> %res
1817 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1819 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1820 ;CHECK-LABEL: test_mask_add_epi64_rr
1821 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
1822 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1826 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1827 ;CHECK-LABEL: test_mask_add_epi64_rrk
1828 ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
1829 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1833 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1834 ;CHECK-LABEL: test_mask_add_epi64_rrkz
1835 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
1836 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1840 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1841 ;CHECK-LABEL: test_mask_add_epi64_rm
1842 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
1843 %b = load <8 x i64>, <8 x i64>* %ptr_b
1844 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1848 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1849 ;CHECK-LABEL: test_mask_add_epi64_rmk
1850 ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
1851 %b = load <8 x i64>, <8 x i64>* %ptr_b
1852 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1856 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1857 ;CHECK-LABEL: test_mask_add_epi64_rmkz
1858 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
1859 %b = load <8 x i64>, <8 x i64>* %ptr_b
1860 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1864 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1865 ;CHECK-LABEL: test_mask_add_epi64_rmb
1866 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
1867 %q = load i64, i64* %ptr_b
1868 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1869 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1870 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1874 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1875 ;CHECK-LABEL: test_mask_add_epi64_rmbk
1876 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
1877 %q = load i64, i64* %ptr_b
1878 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1879 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1880 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1884 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1885 ;CHECK-LABEL: test_mask_add_epi64_rmbkz
1886 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
1887 %q = load i64, i64* %ptr_b
1888 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1889 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1890 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1894 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1896 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1897 ;CHECK-LABEL: test_mask_sub_epi64_rr
1898 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
1899 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1903 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1904 ;CHECK-LABEL: test_mask_sub_epi64_rrk
1905 ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
1906 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1910 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1911 ;CHECK-LABEL: test_mask_sub_epi64_rrkz
1912 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
1913 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1917 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1918 ;CHECK-LABEL: test_mask_sub_epi64_rm
1919 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
1920 %b = load <8 x i64>, <8 x i64>* %ptr_b
1921 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1925 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1926 ;CHECK-LABEL: test_mask_sub_epi64_rmk
1927 ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
1928 %b = load <8 x i64>, <8 x i64>* %ptr_b
1929 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1933 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1934 ;CHECK-LABEL: test_mask_sub_epi64_rmkz
1935 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
1936 %b = load <8 x i64>, <8 x i64>* %ptr_b
1937 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1941 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1942 ;CHECK-LABEL: test_mask_sub_epi64_rmb
1943 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
1944 %q = load i64, i64* %ptr_b
1945 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1946 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1947 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1951 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1952 ;CHECK-LABEL: test_mask_sub_epi64_rmbk
1953 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
1954 %q = load i64, i64* %ptr_b
1955 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1956 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1957 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1961 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1962 ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
1963 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
1964 %q = load i64, i64* %ptr_b
1965 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1966 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1967 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1971 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1973 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1974 ;CHECK-LABEL: test_mask_mul_epi32_rr
1975 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
1976 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
1980 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
1981 ;CHECK-LABEL: test_mask_mul_epi32_rrk
1982 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
1983 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
1987 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
1988 ;CHECK-LABEL: test_mask_mul_epi32_rrkz
1989 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
1990 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
1994 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1995 ;CHECK-LABEL: test_mask_mul_epi32_rm
1996 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
1997 %b = load <16 x i32>, <16 x i32>* %ptr_b
1998 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2002 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2003 ;CHECK-LABEL: test_mask_mul_epi32_rmk
2004 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
2005 %b = load <16 x i32>, <16 x i32>* %ptr_b
2006 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2010 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2011 ;CHECK-LABEL: test_mask_mul_epi32_rmkz
2012 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
2013 %b = load <16 x i32>, <16 x i32>* %ptr_b
2014 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2018 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
2019 ;CHECK-LABEL: test_mask_mul_epi32_rmb
2020 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
2021 %q = load i64, i64* %ptr_b
2022 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2023 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2024 %b = bitcast <8 x i64> %b64 to <16 x i32>
2025 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2029 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2030 ;CHECK-LABEL: test_mask_mul_epi32_rmbk
2031 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
2032 %q = load i64, i64* %ptr_b
2033 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2034 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2035 %b = bitcast <8 x i64> %b64 to <16 x i32>
2036 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2040 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2041 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
2042 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
2043 %q = load i64, i64* %ptr_b
2044 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2045 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2046 %b = bitcast <8 x i64> %b64 to <16 x i32>
2047 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2051 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2053 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
2054 ;CHECK-LABEL: test_mask_mul_epu32_rr
2055 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
2056 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2060 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2061 ;CHECK-LABEL: test_mask_mul_epu32_rrk
2062 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
2063 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2067 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2068 ;CHECK-LABEL: test_mask_mul_epu32_rrkz
2069 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
2070 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2074 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2075 ;CHECK-LABEL: test_mask_mul_epu32_rm
2076 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
2077 %b = load <16 x i32>, <16 x i32>* %ptr_b
2078 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2082 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2083 ;CHECK-LABEL: test_mask_mul_epu32_rmk
2084 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
2085 %b = load <16 x i32>, <16 x i32>* %ptr_b
2086 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2090 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2091 ;CHECK-LABEL: test_mask_mul_epu32_rmkz
2092 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
2093 %b = load <16 x i32>, <16 x i32>* %ptr_b
2094 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2098 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
2099 ;CHECK-LABEL: test_mask_mul_epu32_rmb
2100 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
2101 %q = load i64, i64* %ptr_b
2102 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2103 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2104 %b = bitcast <8 x i64> %b64 to <16 x i32>
2105 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2109 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2110 ;CHECK-LABEL: test_mask_mul_epu32_rmbk
2111 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
2112 %q = load i64, i64* %ptr_b
2113 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2114 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2115 %b = bitcast <8 x i64> %b64 to <16 x i32>
2116 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2120 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2121 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
2122 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
2123 %q = load i64, i64* %ptr_b
2124 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2125 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2126 %b = bitcast <8 x i64> %b64 to <16 x i32>
2127 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2131 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2133 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2134 ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
2135 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2136 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2140 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2141 ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
2142 ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2143 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2144 ret < 16 x i32> %res
2147 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2148 ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
2149 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2150 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2151 ret < 16 x i32> %res
2154 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2155 ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
2156 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2157 %b = load <16 x i32>, <16 x i32>* %ptr_b
2158 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2159 ret < 16 x i32> %res
2162 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2163 ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
2164 ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2165 %b = load <16 x i32>, <16 x i32>* %ptr_b
2166 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2167 ret < 16 x i32> %res
2170 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2171 ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
2172 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2173 %b = load <16 x i32>, <16 x i32>* %ptr_b
2174 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2175 ret < 16 x i32> %res
2178 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2179 ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
2180 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2181 %q = load i32, i32* %ptr_b
2182 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2183 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2184 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2185 ret < 16 x i32> %res
2188 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2189 ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
2190 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2191 %q = load i32, i32* %ptr_b
2192 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2193 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2194 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2195 ret < 16 x i32> %res
2198 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2199 ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
2200 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2201 %q = load i32, i32* %ptr_b
2202 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2203 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2204 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2205 ret < 16 x i32> %res
2208 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2210 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2211 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
2212 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2213 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2214 ret <16 x float> %res
2216 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2217 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
2218 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2219 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2220 ret <16 x float> %res
2222 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2223 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
2224 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2225 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2226 ret <16 x float> %res
2229 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2230 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
2231 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2232 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2233 ret <16 x float> %res
2237 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2238 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
2239 ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
2240 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2241 ret <16 x float> %res
2244 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2245 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
2246 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2247 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2248 ret <16 x float> %res
2250 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2251 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
2252 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2253 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2254 ret <16 x float> %res
2256 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2257 ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
2258 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2259 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2260 ret <16 x float> %res
2263 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2264 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
2265 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2266 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2267 ret <16 x float> %res
2271 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2272 ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
2273 ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
2274 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2275 ret <16 x float> %res
2279 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2280 ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
2281 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
2282 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2283 ret <16 x float> %res
2285 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2286 ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
2287 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2288 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2289 ret <16 x float> %res
2291 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2292 ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
2293 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
2294 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2295 ret <16 x float> %res
2298 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2299 ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
2300 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
2301 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2302 ret <16 x float> %res
2305 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2306 ;CHECK-LABEL: test_mm512_add_round_ps_current
2307 ;CHECK: vaddps %zmm1, %zmm0, %zmm0
2308 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2309 ret <16 x float> %res
2311 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2313 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2314 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
2315 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2316 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2317 ret <16 x float> %res
2319 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2320 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
2321 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2322 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2323 ret <16 x float> %res
2325 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2326 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
2327 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2328 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2329 ret <16 x float> %res
2332 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2333 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
2334 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2335 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2336 ret <16 x float> %res
2340 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2341 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
2342 ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
2343 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2344 ret <16 x float> %res
2347 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2348 ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
2349 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2350 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2351 ret <16 x float> %res
2353 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2354 ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
2355 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2356 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2357 ret <16 x float> %res
2359 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2360 ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
2361 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2362 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2363 ret <16 x float> %res
2366 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2367 ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
2368 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2369 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2370 ret <16 x float> %res
2373 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2374 ;CHECK-LABEL: test_mm512_sub_round_ps_current
2375 ;CHECK: vsubps %zmm1, %zmm0, %zmm0
2376 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2377 ret <16 x float> %res
2380 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2381 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
2382 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2383 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2384 ret <16 x float> %res
2386 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2387 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
2388 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2389 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2390 ret <16 x float> %res
2392 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2393 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
2394 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2395 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2396 ret <16 x float> %res
2399 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2400 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
2401 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2402 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2403 ret <16 x float> %res
2407 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2408 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
2409 ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
2410 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2411 ret <16 x float> %res
2414 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2415 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
2416 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2417 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2418 ret <16 x float> %res
2420 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2421 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
2422 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2423 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2424 ret <16 x float> %res
2426 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2427 ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
2428 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2429 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2430 ret <16 x float> %res
2433 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2434 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
2435 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2436 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2437 ret <16 x float> %res
2441 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2442 ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
2443 ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
2444 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2445 ret <16 x float> %res
2449 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2450 ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
2451 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
2452 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2453 ret <16 x float> %res
2455 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2456 ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
2457 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2458 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2459 ret <16 x float> %res
2461 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2462 ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
2463 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
2464 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2465 ret <16 x float> %res
2468 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2469 ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
2470 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
2471 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2472 ret <16 x float> %res
2475 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2476 ;CHECK-LABEL: test_mm512_div_round_ps_current
2477 ;CHECK: vdivps %zmm1, %zmm0, %zmm0
2478 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2479 ret <16 x float> %res
2481 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2483 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2484 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
2485 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2486 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2487 ret <16 x float> %res
2490 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2491 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
2492 ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
2493 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2494 ret <16 x float> %res
2497 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2498 ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
2499 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2500 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2501 ret <16 x float> %res
2504 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2505 ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
2506 ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
2507 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2508 ret <16 x float> %res
2511 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2512 ;CHECK-LABEL: test_mm512_min_round_ps_sae
2513 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
2514 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2515 ret <16 x float> %res
2518 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2519 ;CHECK-LABEL: test_mm512_min_round_ps_current
2520 ;CHECK: vminps %zmm1, %zmm0, %zmm0
2521 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2522 ret <16 x float> %res
2524 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2526 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2527 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
2528 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2529 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2530 ret <16 x float> %res
2533 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2534 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
2535 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
2536 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2537 ret <16 x float> %res
2540 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2541 ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
2542 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2543 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2544 ret <16 x float> %res
2547 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2548 ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
2549 ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
2550 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2551 ret <16 x float> %res
2554 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2555 ;CHECK-LABEL: test_mm512_max_round_ps_sae
2556 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
2557 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2558 ret <16 x float> %res
2561 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2562 ;CHECK-LABEL: test_mm512_max_round_ps_current
2563 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
2564 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2565 ret <16 x float> %res
2567 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2569 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2571 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2572 ; CHECK-LABEL: test_mask_add_ss_rn
2573 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2574 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
2575 ret <4 x float> %res
2578 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2579 ; CHECK-LABEL: test_mask_add_ss_rd
2580 ; CHECK: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2581 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
2582 ret <4 x float> %res
2585 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2586 ; CHECK-LABEL: test_mask_add_ss_ru
2587 ; CHECK: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2588 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
2589 ret <4 x float> %res
2592 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2593 ; CHECK-LABEL: test_mask_add_ss_rz
2594 ; CHECK: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2595 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
2596 ret <4 x float> %res
2599 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2600 ; CHECK-LABEL: test_mask_add_ss_current
2601 ; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
2602 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2603 ret <4 x float> %res
2606 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2607 ; CHECK-LABEL: test_maskz_add_ss_rn
2608 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2609 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
2610 ret <4 x float> %res
2613 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
2614 ; CHECK-LABEL: test_add_ss_rn
2615 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
2616 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
2617 ret <4 x float> %res
2620 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2622 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2623 ; CHECK-LABEL: test_mask_add_sd_rn
2624 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2625 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
2626 ret <2 x double> %res
2629 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2630 ; CHECK-LABEL: test_mask_add_sd_rd
2631 ; CHECK: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2632 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
2633 ret <2 x double> %res
2636 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2637 ; CHECK-LABEL: test_mask_add_sd_ru
2638 ; CHECK: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2639 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
2640 ret <2 x double> %res
2643 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2644 ; CHECK-LABEL: test_mask_add_sd_rz
2645 ; CHECK: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2646 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
2647 ret <2 x double> %res
2650 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2651 ; CHECK-LABEL: test_mask_add_sd_current
2652 ; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
2653 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2654 ret <2 x double> %res
2657 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2658 ; CHECK-LABEL: test_maskz_add_sd_rn
2659 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2660 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
2661 ret <2 x double> %res
2664 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
2665 ; CHECK-LABEL: test_add_sd_rn
2666 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
2667 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
2668 ret <2 x double> %res
2671 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2673 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2674 ; CHECK-LABEL: test_mask_max_ss_sae
2675 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2676 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
2677 ret <4 x float> %res
2680 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2681 ; CHECK-LABEL: test_maskz_max_ss_sae
2682 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2683 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
2684 ret <4 x float> %res
2687 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
2688 ; CHECK-LABEL: test_max_ss_sae
2689 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0
2690 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
2691 ret <4 x float> %res
2694 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2695 ; CHECK-LABEL: test_mask_max_ss
2696 ; CHECK: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
2697 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2698 ret <4 x float> %res
2701 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2702 ; CHECK-LABEL: test_maskz_max_ss
2703 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
2704 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
2705 ret <4 x float> %res
2708 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
2709 ; CHECK-LABEL: test_max_ss
2710 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0
2711 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
2712 ret <4 x float> %res
2714 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2716 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2717 ; CHECK-LABEL: test_mask_max_sd_sae
2718 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2719 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
2720 ret <2 x double> %res
2723 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2724 ; CHECK-LABEL: test_maskz_max_sd_sae
2725 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2726 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
2727 ret <2 x double> %res
2730 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
2731 ; CHECK-LABEL: test_max_sd_sae
2732 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
2733 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
2734 ret <2 x double> %res
2737 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2738 ; CHECK-LABEL: test_mask_max_sd
2739 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
2740 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2741 ret <2 x double> %res
2744 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2745 ; CHECK-LABEL: test_maskz_max_sd
2746 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
2747 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
2748 ret <2 x double> %res
2751 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
2752 ; CHECK-LABEL: test_max_sd
2753 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0
2754 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
2755 ret <2 x double> %res
2758 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
2759 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
2761 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
2763 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
2764 ret <2 x double> %res
2766 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
2768 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
2769 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
2771 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
2773 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
2774 ret <2 x double> %res
2776 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
2778 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
2779 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
2781 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
2783 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
2784 ret <4 x float> %res
2786 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
2788 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
2789 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
2791 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
2793 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
2794 ret <4 x float> %res
2796 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
2798 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
2799 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
2801 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
2804 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2805 ret <4 x float> %res
2808 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
2809 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
2811 ; CHECK-NEXT: movl (%rdi), %eax
2812 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
2815 %b = load i32, i32* %ptr
2816 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2817 ret <4 x float> %res
2820 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
2821 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
2823 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
2826 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2827 ret <4 x float> %res
2830 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
2831 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
2833 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
2836 %b = load i32, i32* %ptr
2837 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2838 ret <4 x float> %res
2840 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
2842 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
2843 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
2845 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
2848 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
2849 ret <4 x float> %res
2852 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
2853 ; CHECK-LABEL: _mm_cvtu64_ss:
2855 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
2858 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
2859 ret <4 x float> %res
2861 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
2863 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
2864 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
2866 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
2869 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
2870 ret <2 x double> %res
2872 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
2874 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
2875 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
2877 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
2880 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
2881 ret <2 x double> %res
2884 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
2885 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
2887 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
2890 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
2891 ret <2 x double> %res
2893 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
2895 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
2896 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
2897 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
2898 <8 x i64>zeroinitializer, i8 -1)
2901 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2903 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
2904 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
2905 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
2906 <16 x i32>zeroinitializer, i16 -1)
2909 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2911 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
2912 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
2913 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
2914 <16 x i32>zeroinitializer, i16 -1)
2917 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2919 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512
2921 ; CHECK: vpmaxsd %zmm
2923 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2924 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2925 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2926 %res2 = add <16 x i32> %res, %res1
2927 ret <16 x i32> %res2
2930 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512
2932 ; CHECK: vpmaxsq %zmm
2934 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2935 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2936 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2937 %res2 = add <8 x i64> %res, %res1
2941 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2943 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512
2945 ; CHECK: vpmaxud %zmm
2947 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2948 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2949 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2950 %res2 = add <16 x i32> %res, %res1
2951 ret <16 x i32> %res2
2954 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2956 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512
2958 ; CHECK: vpmaxuq %zmm
2960 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2961 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2962 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2963 %res2 = add <8 x i64> %res, %res1
2967 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2969 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512
2971 ; CHECK: vpminsd %zmm
2973 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2974 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2975 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2976 %res2 = add <16 x i32> %res, %res1
2977 ret <16 x i32> %res2
2980 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2982 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512
2984 ; CHECK: vpminsq %zmm
2986 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2987 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2988 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2989 %res2 = add <8 x i64> %res, %res1
2993 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512
2995 ; CHECK: vpminud %zmm
2997 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2998 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2999 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3000 %res2 = add <16 x i32> %res, %res1
3001 ret <16 x i32> %res2
3004 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3006 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512
3008 ; CHECK: vpminuq %zmm
3010 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3011 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3012 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3013 %res2 = add <8 x i64> %res, %res1
3017 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3019 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
3022 ; CHECK: vpermi2d {{.*}}{%k1}
3023 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3024 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3025 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3026 %res2 = add <16 x i32> %res, %res1
3027 ret <16 x i32> %res2
3030 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
3032 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
3035 ; CHECK: vpermi2pd {{.*}}{%k1}
3036 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
3037 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
3038 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
3039 %res2 = fadd <8 x double> %res, %res1
3040 ret <8 x double> %res2
3043 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
3045 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
3048 ; CHECK: vpermi2ps {{.*}}{%k1}
3049 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
3050 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
3051 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
3052 %res2 = fadd <16 x float> %res, %res1
3053 ret <16 x float> %res2
3056 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3058 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
3061 ; CHECK: vpermi2q {{.*}}{%k1}
3062 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3063 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3064 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3065 %res2 = add <8 x i64> %res, %res1
3069 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3071 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
3074 ; CHECK: vpermt2d {{.*}}{%k1} {z}
3075 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3076 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3077 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3078 %res2 = add <16 x i32> %res, %res1
3079 ret <16 x i32> %res2
3082 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
3084 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
3087 ; CHECK: vpermt2pd {{.*}}{%k1} {z}
3088 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3089 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3090 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3091 %res2 = fadd <8 x double> %res, %res1
3092 ret <8 x double> %res2
3095 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
3097 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
3100 ; CHECK: vpermt2ps {{.*}}{%k1} {z}
3101 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3102 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3103 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3104 %res2 = fadd <16 x float> %res, %res1
3105 ret <16 x float> %res2
3109 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3111 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
3114 ; CHECK: vpermt2q {{.*}}{%k1} {z}
3115 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3116 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3117 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3118 %res2 = add <8 x i64> %res, %res1
3122 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3124 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
3127 ; CHECK: vpermt2d {{.*}}{%k1}
3129 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3130 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3131 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3132 %res2 = add <16 x i32> %res, %res1
3133 ret <16 x i32> %res2