1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
101 ; CHECK-LABEL: test_sqrt_pd_512
103 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
104 ret <8 x double> %res
106 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
108 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 ; CHECK-LABEL: test_sqrt_ps_512
111 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
112 ret <16 x float> %res
114 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
115 ; CHECK-LABEL: test_sqrt_round_ps_512
116 ; CHECK: vsqrtps {rz-sae}
117 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
118 ret <16 x float> %res
120 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
122 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
123 ; CHECK-LABEL: test_getexp_pd_512
125 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
126 ret <8 x double> %res
128 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
129 ; CHECK-LABEL: test_getexp_round_pd_512
130 ; CHECK: vgetexppd {sae}
131 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
132 ret <8 x double> %res
134 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
136 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
137 ; CHECK-LABEL: test_getexp_ps_512
139 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
140 ret <16 x float> %res
143 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
144 ; CHECK-LABEL: test_getexp_round_ps_512
145 ; CHECK: vgetexpps {sae}
146 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
147 ret <16 x float> %res
149 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
151 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
152 ; CHECK: vsqrtss {{.*}}encoding: [0x62
153 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
156 declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
158 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
159 ; CHECK: vsqrtsd {{.*}}encoding: [0x62
160 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
161 ret <2 x double> %res
163 declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
165 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
166 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
167 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
170 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
172 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
173 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
174 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
175 ret <2 x double> %res
177 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
179 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
180 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
181 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
184 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
187 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
188 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
189 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
192 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
195 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
196 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
197 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
200 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
203 define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
204 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
205 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
208 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
210 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
211 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
212 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
215 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
217 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
218 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
219 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
220 ret <16 x float> %res
222 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
225 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
226 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
227 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
231 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
233 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
234 ; CHECK: vbroadcastss
235 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
236 ret <16 x float> %res
238 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
240 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
241 ; CHECK: vbroadcastsd
242 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
243 ret <8 x double> %res
245 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
247 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
248 ; CHECK: vbroadcastss
249 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
250 ret <16 x float> %res
252 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
254 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
255 ; CHECK: vbroadcastsd
256 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
257 ret <8 x double> %res
259 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
261 define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
262 ; CHECK: vpbroadcastd
263 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
266 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
268 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
269 ; CHECK: vpbroadcastd
270 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
273 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
275 define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
276 ; CHECK: vpbroadcastq
277 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
280 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
282 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
283 ; CHECK: vpbroadcastq
284 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
287 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
289 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
290 ; CHECK: movw $-1, %ax
293 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
297 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
299 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
300 ; CHECK: movb $-1, %al
303 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
307 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
309 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
311 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
315 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
317 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
321 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
322 ; CHECK: movw $-1, %ax
325 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
329 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
331 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
332 ; CHECK: movb $-1, %al
335 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
339 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
342 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
344 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
348 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
350 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
354 define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
355 ; CHECK-LABEL: test_ctlz_d
357 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
361 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
363 define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
364 ; CHECK-LABEL: test_ctlz_q
366 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
370 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
372 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
373 ; CHECK: vblendmps %zmm1, %zmm0
374 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
375 ret <16 x float> %res
378 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
380 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
381 ; CHECK: vblendmpd %zmm1, %zmm0
382 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
383 ret <8 x double> %res
386 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
387 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
388 ; CHECK: vblendmpd (%
389 %b = load <8 x double>, <8 x double>* %ptr
390 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
391 ret <8 x double> %res
393 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
395 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
397 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
400 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
402 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
404 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
407 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
409 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
410 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
411 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
414 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
416 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
417 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
418 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
421 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
424 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
426 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
427 <8 x double>zeroinitializer, i8 -1, i32 4)
428 ret <8 x double> %res
430 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
431 <8 x double>, i8, i32)
433 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
435 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
436 <8 x double>zeroinitializer, i8 -1, i32 4)
437 ret <8 x double> %res
439 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
440 <8 x double>, i8, i32)
442 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
444 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512
447 ; CHECK: vpabsd{{.*}}{%k1}
448 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
449 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
450 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
451 %res2 = add <16 x i32> %res, %res1
455 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
457 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512
460 ; CHECK: vpabsq{{.*}}{%k1}
461 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
462 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
463 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
464 %res2 = add <8 x i64> %res, %res1
468 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
469 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
470 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
473 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
475 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
476 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
477 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
480 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
482 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
483 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
484 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
488 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
490 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
491 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
492 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
496 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
498 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
499 ; CHECK-LABEL: test_mask_store_aligned_ps:
501 ; CHECK-NEXT: kmovw %esi, %k1
502 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
504 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
508 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
510 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
511 ; CHECK-LABEL: test_mask_store_aligned_pd:
513 ; CHECK-NEXT: kmovw %esi, %k1
514 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
516 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
520 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
522 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
523 ; CHECK-LABEL: test_maskz_load_aligned_ps:
525 ; CHECK-NEXT: kmovw %esi, %k1
526 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
528 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
529 ret <16 x float> %res
532 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
534 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
535 ; CHECK-LABEL: test_maskz_load_aligned_pd:
537 ; CHECK-NEXT: kmovw %esi, %k1
538 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
540 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
541 ret <8 x double> %res
544 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
546 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
547 ; CHECK-LABEL: test_load_aligned_ps:
549 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
551 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
552 ret <16 x float> %res
555 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
556 ; CHECK-LABEL: test_load_aligned_pd:
558 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
560 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
561 ret <8 x double> %res
564 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
566 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
567 ; CHECK-LABEL: test_valign_q:
568 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
569 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
573 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
574 ; CHECK-LABEL: test_mask_valign_q:
575 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
576 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
580 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
582 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
583 ; CHECK-LABEL: test_maskz_valign_d:
584 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
585 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
589 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
591 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
592 ; CHECK-LABEL: test_mask_store_ss
593 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
594 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
598 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
600 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
601 ; CHECK-LABEL: test_pcmpeq_d
602 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
603 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
607 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
608 ; CHECK-LABEL: test_mask_pcmpeq_d
609 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
610 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
614 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
616 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
617 ; CHECK-LABEL: test_pcmpeq_q
618 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
619 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
623 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
624 ; CHECK-LABEL: test_mask_pcmpeq_q
625 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
626 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
630 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
632 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
633 ; CHECK-LABEL: test_pcmpgt_d
634 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
635 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
639 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
640 ; CHECK-LABEL: test_mask_pcmpgt_d
641 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
642 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
646 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
648 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
649 ; CHECK-LABEL: test_pcmpgt_q
650 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
651 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
655 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
656 ; CHECK-LABEL: test_mask_pcmpgt_q
657 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
658 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
662 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
664 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
665 ; CHECK_LABEL: test_cmp_d_512
666 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
667 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
668 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
669 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
670 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
671 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
672 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
673 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
674 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
675 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
676 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
677 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
678 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
679 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
680 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
681 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
682 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
683 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
684 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
685 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
686 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
687 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
688 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
689 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
693 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
694 ; CHECK_LABEL: test_mask_cmp_d_512
695 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
696 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
697 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
698 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
699 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
700 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
701 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
702 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
703 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
704 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
705 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
706 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
707 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
708 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
709 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
710 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
711 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
712 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
713 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
714 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
715 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
716 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
717 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
718 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
722 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
724 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
725 ; CHECK_LABEL: test_ucmp_d_512
726 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
727 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
728 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
729 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
730 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
731 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
732 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
733 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
734 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
735 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
736 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
737 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
738 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
739 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
740 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
741 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
742 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
743 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
744 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
745 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
746 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
747 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
748 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
749 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
753 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
754 ; CHECK_LABEL: test_mask_ucmp_d_512
755 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
756 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
757 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
758 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
759 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
760 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
761 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
762 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
763 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
764 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
765 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
766 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
767 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
768 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
769 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
770 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
771 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
772 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
773 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
774 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
775 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
776 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
777 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
778 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
782 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
784 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
785 ; CHECK_LABEL: test_cmp_q_512
786 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
787 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
788 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
789 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
790 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
791 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
792 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
793 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
794 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
795 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
796 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
797 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
798 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
799 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
800 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
801 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
802 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
803 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
804 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
805 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
806 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
807 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
808 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
809 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
813 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
814 ; CHECK_LABEL: test_mask_cmp_q_512
815 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
816 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
817 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
818 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
819 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
820 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
821 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
822 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
823 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
824 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
825 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
826 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
827 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
828 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
829 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
830 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
831 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
832 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
833 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
834 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
835 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
836 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
837 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
838 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
842 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
844 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
845 ; CHECK_LABEL: test_ucmp_q_512
846 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
847 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
848 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
849 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
850 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
851 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
852 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
853 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
854 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
855 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
856 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
857 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
858 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
859 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
860 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
861 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
862 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
863 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
864 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
865 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
866 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
867 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
868 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
869 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
873 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
874 ; CHECK_LABEL: test_mask_ucmp_q_512
875 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
876 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
877 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
878 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
879 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
880 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
881 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
882 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
883 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
884 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
885 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
886 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
887 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
888 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
889 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
890 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
891 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
892 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
893 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
894 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
895 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
896 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
897 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
898 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
902 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
904 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
905 ; CHECK-LABEL: test_mask_vextractf32x4:
906 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
907 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
911 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
913 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
914 ; CHECK-LABEL: test_mask_vextracti64x4:
915 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
916 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
920 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
922 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
923 ; CHECK-LABEL: test_maskz_vextracti32x4:
924 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
925 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
929 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
931 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
932 ; CHECK-LABEL: test_vextractf64x4:
933 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
934 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
935 ret <4 x double> %res
938 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
940 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
941 ; CHECK-LABEL: test_x86_avx512_pslli_d
943 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
947 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
948 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
949 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
950 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
954 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
955 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
956 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
957 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
961 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
963 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
964 ; CHECK-LABEL: test_x86_avx512_pslli_q
966 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
970 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
971 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
972 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
973 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
977 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
978 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
979 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
980 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
984 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
986 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
987 ; CHECK-LABEL: test_x86_avx512_psrli_d
989 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
993 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
994 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
995 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
996 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1000 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1001 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1002 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1003 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1007 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1009 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1010 ; CHECK-LABEL: test_x86_avx512_psrli_q
1012 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1016 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1017 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1018 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1019 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1023 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1024 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1025 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1026 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1030 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1032 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1033 ; CHECK-LABEL: test_x86_avx512_psrai_d
1035 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1039 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1040 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1041 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1042 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1046 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1047 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1048 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1049 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1053 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1055 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1056 ; CHECK-LABEL: test_x86_avx512_psrai_q
1058 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1062 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1063 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1064 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1065 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1069 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1070 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1071 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1072 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1076 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1078 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1079 ; CHECK-LABEL: test_x86_avx512_psll_d
1081 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1085 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1086 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1087 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1088 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1092 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1093 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1094 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1095 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1099 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1101 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1102 ; CHECK-LABEL: test_x86_avx512_psll_q
1104 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1108 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1109 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1110 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1111 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1115 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1116 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1117 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1118 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1122 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1124 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1125 ; CHECK-LABEL: test_x86_avx512_psrl_d
1127 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1131 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1132 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1133 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1134 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1138 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1139 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1140 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1141 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1145 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1147 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1148 ; CHECK-LABEL: test_x86_avx512_psrl_q
1150 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1154 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1155 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1156 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1157 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1161 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1162 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1163 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1164 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1168 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1170 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1171 ; CHECK-LABEL: test_x86_avx512_psra_d
1173 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1177 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1178 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1179 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1180 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1184 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1185 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1186 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1187 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1191 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1193 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1194 ; CHECK-LABEL: test_x86_avx512_psra_q
1196 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1200 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1201 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1202 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1203 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1207 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1208 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1209 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1210 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1214 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1216 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1217 ; CHECK-LABEL: test_x86_avx512_psllv_d
1219 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1223 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1224 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1225 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1226 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1230 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1231 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1232 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1233 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1237 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1239 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1240 ; CHECK-LABEL: test_x86_avx512_psllv_q
1242 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1246 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1247 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1248 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1249 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1253 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1254 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1255 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1256 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1260 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1263 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1264 ; CHECK-LABEL: test_x86_avx512_psrav_d
1266 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1270 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1271 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1272 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1273 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1277 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1278 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1279 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1280 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1284 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1286 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1287 ; CHECK-LABEL: test_x86_avx512_psrav_q
1289 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1293 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1294 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1295 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1296 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1300 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1301 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1302 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1303 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1307 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1309 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1310 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1312 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1316 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1317 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1318 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1319 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1323 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1324 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1325 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1326 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1330 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1332 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1333 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1335 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1339 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1340 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1341 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1342 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1346 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1347 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1348 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1349 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1353 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1355 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1356 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1358 %b = load <8 x i64>, <8 x i64>* %ptr
1359 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1363 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1364 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1365 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1367 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1368 ; CHECK-LABEL: test_vsubps_rn
1369 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
1370 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1371 <16 x float> zeroinitializer, i16 -1, i32 0)
1372 ret <16 x float> %res
1375 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1376 ; CHECK-LABEL: test_vsubps_rd
1377 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
1378 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1379 <16 x float> zeroinitializer, i16 -1, i32 1)
1380 ret <16 x float> %res
1383 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1384 ; CHECK-LABEL: test_vsubps_ru
1385 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
1386 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1387 <16 x float> zeroinitializer, i16 -1, i32 2)
1388 ret <16 x float> %res
1391 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1392 ; CHECK-LABEL: test_vsubps_rz
1393 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
1394 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1395 <16 x float> zeroinitializer, i16 -1, i32 3)
1396 ret <16 x float> %res
1399 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1400 ; CHECK-LABEL: test_vmulps_rn
1401 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
1402 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1403 <16 x float> zeroinitializer, i16 -1, i32 0)
1404 ret <16 x float> %res
1407 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1408 ; CHECK-LABEL: test_vmulps_rd
1409 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
1410 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1411 <16 x float> zeroinitializer, i16 -1, i32 1)
1412 ret <16 x float> %res
1415 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1416 ; CHECK-LABEL: test_vmulps_ru
1417 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
1418 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1419 <16 x float> zeroinitializer, i16 -1, i32 2)
1420 ret <16 x float> %res
1423 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1424 ; CHECK-LABEL: test_vmulps_rz
1425 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
1426 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1427 <16 x float> zeroinitializer, i16 -1, i32 3)
1428 ret <16 x float> %res
1432 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1433 ; CHECK-LABEL: test_vmulps_mask_rn
1434 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
1435 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1436 <16 x float> zeroinitializer, i16 %mask, i32 0)
1437 ret <16 x float> %res
1440 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1441 ; CHECK-LABEL: test_vmulps_mask_rd
1442 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
1443 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1444 <16 x float> zeroinitializer, i16 %mask, i32 1)
1445 ret <16 x float> %res
1448 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1449 ; CHECK-LABEL: test_vmulps_mask_ru
1450 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
1451 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1452 <16 x float> zeroinitializer, i16 %mask, i32 2)
1453 ret <16 x float> %res
1456 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1457 ; CHECK-LABEL: test_vmulps_mask_rz
1458 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
1459 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1460 <16 x float> zeroinitializer, i16 %mask, i32 3)
1461 ret <16 x float> %res
1464 ;; With Passthru value
1465 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1466 ; CHECK-LABEL: test_vmulps_mask_passthru_rn
1467 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
1468 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1469 <16 x float> %passthru, i16 %mask, i32 0)
1470 ret <16 x float> %res
1473 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1474 ; CHECK-LABEL: test_vmulps_mask_passthru_rd
1475 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
1476 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1477 <16 x float> %passthru, i16 %mask, i32 1)
1478 ret <16 x float> %res
1481 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1482 ; CHECK-LABEL: test_vmulps_mask_passthru_ru
1483 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
1484 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1485 <16 x float> %passthru, i16 %mask, i32 2)
1486 ret <16 x float> %res
1489 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1490 ; CHECK-LABEL: test_vmulps_mask_passthru_rz
1491 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
1492 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1493 <16 x float> %passthru, i16 %mask, i32 3)
1494 ret <16 x float> %res
1498 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1499 ; CHECK-LABEL: test_vmulpd_mask_rn
1500 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
1501 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1502 <8 x double> zeroinitializer, i8 %mask, i32 0)
1503 ret <8 x double> %res
1506 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1507 ; CHECK-LABEL: test_vmulpd_mask_rd
1508 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
1509 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1510 <8 x double> zeroinitializer, i8 %mask, i32 1)
1511 ret <8 x double> %res
1514 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1515 ; CHECK-LABEL: test_vmulpd_mask_ru
1516 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
1517 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1518 <8 x double> zeroinitializer, i8 %mask, i32 2)
1519 ret <8 x double> %res
1522 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1523 ; CHECK-LABEL: test_vmulpd_mask_rz
1524 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
1525 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1526 <8 x double> zeroinitializer, i8 %mask, i32 3)
1527 ret <8 x double> %res
1530 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1531 ;CHECK-LABEL: test_xor_epi32
1532 ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1533 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1534 ret < 16 x i32> %res
1537 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1538 ;CHECK-LABEL: test_mask_xor_epi32
1539 ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1540 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1541 ret < 16 x i32> %res
1544 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1546 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1547 ;CHECK-LABEL: test_or_epi32
1548 ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1549 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1550 ret < 16 x i32> %res
1553 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1554 ;CHECK-LABEL: test_mask_or_epi32
1555 ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1556 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1557 ret < 16 x i32> %res
1560 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1562 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1563 ;CHECK-LABEL: test_and_epi32
1564 ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1565 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1566 ret < 16 x i32> %res
1569 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1570 ;CHECK-LABEL: test_mask_and_epi32
1571 ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1572 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1573 ret < 16 x i32> %res
1576 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1578 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1579 ;CHECK-LABEL: test_xor_epi64
1580 ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1581 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1585 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1586 ;CHECK-LABEL: test_mask_xor_epi64
1587 ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1588 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1592 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1594 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1595 ;CHECK-LABEL: test_or_epi64
1596 ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1597 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1601 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1602 ;CHECK-LABEL: test_mask_or_epi64
1603 ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1604 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1608 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1610 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
1611 ;CHECK-LABEL: test_and_epi64
1612 ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
1613 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1617 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1618 ;CHECK-LABEL: test_mask_and_epi64
1619 ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1620 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1624 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1627 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1628 ;CHECK-LABEL: test_mask_add_epi32_rr
1629 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
1630 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1631 ret < 16 x i32> %res
1634 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1635 ;CHECK-LABEL: test_mask_add_epi32_rrk
1636 ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1637 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1638 ret < 16 x i32> %res
1641 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1642 ;CHECK-LABEL: test_mask_add_epi32_rrkz
1643 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1644 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1645 ret < 16 x i32> %res
1648 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1649 ;CHECK-LABEL: test_mask_add_epi32_rm
1650 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
1651 %b = load <16 x i32>, <16 x i32>* %ptr_b
1652 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1653 ret < 16 x i32> %res
1656 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1657 ;CHECK-LABEL: test_mask_add_epi32_rmk
1658 ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
1659 %b = load <16 x i32>, <16 x i32>* %ptr_b
1660 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1661 ret < 16 x i32> %res
1664 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1665 ;CHECK-LABEL: test_mask_add_epi32_rmkz
1666 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
1667 %b = load <16 x i32>, <16 x i32>* %ptr_b
1668 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1669 ret < 16 x i32> %res
1672 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1673 ;CHECK-LABEL: test_mask_add_epi32_rmb
1674 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
1675 %q = load i32, i32* %ptr_b
1676 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1677 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1678 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1679 ret < 16 x i32> %res
1682 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1683 ;CHECK-LABEL: test_mask_add_epi32_rmbk
1684 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
1685 %q = load i32, i32* %ptr_b
1686 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1687 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1688 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1689 ret < 16 x i32> %res
1692 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1693 ;CHECK-LABEL: test_mask_add_epi32_rmbkz
1694 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
1695 %q = load i32, i32* %ptr_b
1696 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1697 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1698 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1699 ret < 16 x i32> %res
1702 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1704 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1705 ;CHECK-LABEL: test_mask_sub_epi32_rr
1706 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
1707 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1708 ret < 16 x i32> %res
1711 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1712 ;CHECK-LABEL: test_mask_sub_epi32_rrk
1713 ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
1714 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1715 ret < 16 x i32> %res
1718 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1719 ;CHECK-LABEL: test_mask_sub_epi32_rrkz
1720 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
1721 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1722 ret < 16 x i32> %res
1725 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1726 ;CHECK-LABEL: test_mask_sub_epi32_rm
1727 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
1728 %b = load <16 x i32>, <16 x i32>* %ptr_b
1729 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1730 ret < 16 x i32> %res
1733 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1734 ;CHECK-LABEL: test_mask_sub_epi32_rmk
1735 ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
1736 %b = load <16 x i32>, <16 x i32>* %ptr_b
1737 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1738 ret < 16 x i32> %res
1741 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1742 ;CHECK-LABEL: test_mask_sub_epi32_rmkz
1743 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
1744 %b = load <16 x i32>, <16 x i32>* %ptr_b
1745 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1746 ret < 16 x i32> %res
1749 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1750 ;CHECK-LABEL: test_mask_sub_epi32_rmb
1751 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
1752 %q = load i32, i32* %ptr_b
1753 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1754 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1755 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1756 ret < 16 x i32> %res
1759 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1760 ;CHECK-LABEL: test_mask_sub_epi32_rmbk
1761 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
1762 %q = load i32, i32* %ptr_b
1763 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1764 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1765 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1766 ret < 16 x i32> %res
1769 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1770 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
1771 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
1772 %q = load i32, i32* %ptr_b
1773 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1774 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1775 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1776 ret < 16 x i32> %res
1779 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1781 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1782 ;CHECK-LABEL: test_mask_add_epi64_rr
1783 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
1784 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1788 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1789 ;CHECK-LABEL: test_mask_add_epi64_rrk
1790 ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
1791 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1795 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1796 ;CHECK-LABEL: test_mask_add_epi64_rrkz
1797 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
1798 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1802 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1803 ;CHECK-LABEL: test_mask_add_epi64_rm
1804 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
1805 %b = load <8 x i64>, <8 x i64>* %ptr_b
1806 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1810 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1811 ;CHECK-LABEL: test_mask_add_epi64_rmk
1812 ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
1813 %b = load <8 x i64>, <8 x i64>* %ptr_b
1814 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1818 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1819 ;CHECK-LABEL: test_mask_add_epi64_rmkz
1820 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
1821 %b = load <8 x i64>, <8 x i64>* %ptr_b
1822 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1826 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1827 ;CHECK-LABEL: test_mask_add_epi64_rmb
1828 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
1829 %q = load i64, i64* %ptr_b
1830 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1831 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1832 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1836 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1837 ;CHECK-LABEL: test_mask_add_epi64_rmbk
1838 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
1839 %q = load i64, i64* %ptr_b
1840 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1841 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1842 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1846 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1847 ;CHECK-LABEL: test_mask_add_epi64_rmbkz
1848 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
1849 %q = load i64, i64* %ptr_b
1850 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1851 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1852 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1856 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1858 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1859 ;CHECK-LABEL: test_mask_sub_epi64_rr
1860 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
1861 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1865 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1866 ;CHECK-LABEL: test_mask_sub_epi64_rrk
1867 ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
1868 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1872 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1873 ;CHECK-LABEL: test_mask_sub_epi64_rrkz
1874 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
1875 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1879 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1880 ;CHECK-LABEL: test_mask_sub_epi64_rm
1881 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
1882 %b = load <8 x i64>, <8 x i64>* %ptr_b
1883 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1887 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1888 ;CHECK-LABEL: test_mask_sub_epi64_rmk
1889 ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
1890 %b = load <8 x i64>, <8 x i64>* %ptr_b
1891 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1895 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1896 ;CHECK-LABEL: test_mask_sub_epi64_rmkz
1897 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
1898 %b = load <8 x i64>, <8 x i64>* %ptr_b
1899 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1903 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1904 ;CHECK-LABEL: test_mask_sub_epi64_rmb
1905 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
1906 %q = load i64, i64* %ptr_b
1907 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1908 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1909 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1913 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1914 ;CHECK-LABEL: test_mask_sub_epi64_rmbk
1915 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
1916 %q = load i64, i64* %ptr_b
1917 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1918 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1919 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1923 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1924 ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
1925 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
1926 %q = load i64, i64* %ptr_b
1927 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1928 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1929 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1933 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1935 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1936 ;CHECK-LABEL: test_mask_mul_epi32_rr
1937 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
1938 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
1942 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
1943 ;CHECK-LABEL: test_mask_mul_epi32_rrk
1944 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
1945 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
1949 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
1950 ;CHECK-LABEL: test_mask_mul_epi32_rrkz
1951 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
1952 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
1956 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1957 ;CHECK-LABEL: test_mask_mul_epi32_rm
1958 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
1959 %b = load <16 x i32>, <16 x i32>* %ptr_b
1960 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
1964 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1965 ;CHECK-LABEL: test_mask_mul_epi32_rmk
1966 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
1967 %b = load <16 x i32>, <16 x i32>* %ptr_b
1968 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
1972 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
1973 ;CHECK-LABEL: test_mask_mul_epi32_rmkz
1974 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
1975 %b = load <16 x i32>, <16 x i32>* %ptr_b
1976 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
1980 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
1981 ;CHECK-LABEL: test_mask_mul_epi32_rmb
1982 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
1983 %q = load i64, i64* %ptr_b
1984 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1985 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1986 %b = bitcast <8 x i64> %b64 to <16 x i32>
1987 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
1991 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1992 ;CHECK-LABEL: test_mask_mul_epi32_rmbk
1993 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
1994 %q = load i64, i64* %ptr_b
1995 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1996 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1997 %b = bitcast <8 x i64> %b64 to <16 x i32>
1998 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2002 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2003 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
2004 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
2005 %q = load i64, i64* %ptr_b
2006 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2007 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2008 %b = bitcast <8 x i64> %b64 to <16 x i32>
2009 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2013 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2015 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
2016 ;CHECK-LABEL: test_mask_mul_epu32_rr
2017 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
2018 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2022 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2023 ;CHECK-LABEL: test_mask_mul_epu32_rrk
2024 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
2025 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2029 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2030 ;CHECK-LABEL: test_mask_mul_epu32_rrkz
2031 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
2032 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2036 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2037 ;CHECK-LABEL: test_mask_mul_epu32_rm
2038 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
2039 %b = load <16 x i32>, <16 x i32>* %ptr_b
2040 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2044 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2045 ;CHECK-LABEL: test_mask_mul_epu32_rmk
2046 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
2047 %b = load <16 x i32>, <16 x i32>* %ptr_b
2048 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2052 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2053 ;CHECK-LABEL: test_mask_mul_epu32_rmkz
2054 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
2055 %b = load <16 x i32>, <16 x i32>* %ptr_b
2056 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2060 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
2061 ;CHECK-LABEL: test_mask_mul_epu32_rmb
2062 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
2063 %q = load i64, i64* %ptr_b
2064 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2065 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2066 %b = bitcast <8 x i64> %b64 to <16 x i32>
2067 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2071 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2072 ;CHECK-LABEL: test_mask_mul_epu32_rmbk
2073 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
2074 %q = load i64, i64* %ptr_b
2075 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2076 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2077 %b = bitcast <8 x i64> %b64 to <16 x i32>
2078 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2082 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2083 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
2084 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
2085 %q = load i64, i64* %ptr_b
2086 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2087 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2088 %b = bitcast <8 x i64> %b64 to <16 x i32>
2089 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2093 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2095 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2096 ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
2097 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2098 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2102 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2103 ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
2104 ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2105 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2106 ret < 16 x i32> %res
2109 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2110 ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
2111 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2112 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2113 ret < 16 x i32> %res
2116 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2117 ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
2118 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2119 %b = load <16 x i32>, <16 x i32>* %ptr_b
2120 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2121 ret < 16 x i32> %res
2124 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2125 ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
2126 ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2127 %b = load <16 x i32>, <16 x i32>* %ptr_b
2128 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2129 ret < 16 x i32> %res
2132 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2133 ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
2134 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2135 %b = load <16 x i32>, <16 x i32>* %ptr_b
2136 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2137 ret < 16 x i32> %res
2140 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2141 ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
2142 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2143 %q = load i32, i32* %ptr_b
2144 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2145 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2146 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2147 ret < 16 x i32> %res
2150 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2151 ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
2152 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2153 %q = load i32, i32* %ptr_b
2154 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2155 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2156 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2157 ret < 16 x i32> %res
2160 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2161 ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
2162 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2163 %q = load i32, i32* %ptr_b
2164 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2165 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2166 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2167 ret < 16 x i32> %res
2170 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2172 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2173 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
2174 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2175 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2176 ret <16 x float> %res
2178 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2179 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
2180 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2181 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2182 ret <16 x float> %res
2184 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2185 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
2186 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2187 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2188 ret <16 x float> %res
2191 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2192 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
2193 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2194 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2195 ret <16 x float> %res
2199 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2200 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
2201 ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
2202 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2203 ret <16 x float> %res
2206 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2207 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
2208 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2209 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2210 ret <16 x float> %res
2212 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2213 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
2214 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2215 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2216 ret <16 x float> %res
2218 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2219 ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
2220 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2221 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2222 ret <16 x float> %res
2225 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2226 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
2227 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2228 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2229 ret <16 x float> %res
2233 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2234 ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
2235 ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
2236 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2237 ret <16 x float> %res
2241 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2242 ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
2243 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
2244 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2245 ret <16 x float> %res
2247 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2248 ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
2249 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2250 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2251 ret <16 x float> %res
2253 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2254 ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
2255 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
2256 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2257 ret <16 x float> %res
2260 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2261 ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
2262 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
2263 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2264 ret <16 x float> %res
2267 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2268 ;CHECK-LABEL: test_mm512_add_round_ps_current
2269 ;CHECK: vaddps %zmm1, %zmm0, %zmm0
2270 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2271 ret <16 x float> %res
2273 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2275 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2276 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
2277 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2278 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2279 ret <16 x float> %res
2281 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2282 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
2283 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2284 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2285 ret <16 x float> %res
2287 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2288 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
2289 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2290 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2291 ret <16 x float> %res
2294 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2295 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
2296 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2297 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2298 ret <16 x float> %res
2302 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2303 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
2304 ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
2305 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2306 ret <16 x float> %res
2309 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2310 ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
2311 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2312 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2313 ret <16 x float> %res
2315 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2316 ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
2317 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2318 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2319 ret <16 x float> %res
2321 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2322 ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
2323 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2324 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2325 ret <16 x float> %res
2328 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2329 ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
2330 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2331 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2332 ret <16 x float> %res
2335 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2336 ;CHECK-LABEL: test_mm512_sub_round_ps_current
2337 ;CHECK: vsubps %zmm1, %zmm0, %zmm0
2338 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2339 ret <16 x float> %res
2342 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2343 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
2344 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2345 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2346 ret <16 x float> %res
2348 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2349 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
2350 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2351 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2352 ret <16 x float> %res
2354 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2355 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
2356 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2357 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2358 ret <16 x float> %res
2361 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2362 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
2363 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2364 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2365 ret <16 x float> %res
2369 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2370 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
2371 ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
2372 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2373 ret <16 x float> %res
2376 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2377 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
2378 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2379 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2380 ret <16 x float> %res
2382 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2383 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
2384 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2385 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2386 ret <16 x float> %res
2388 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2389 ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
2390 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2391 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2392 ret <16 x float> %res
2395 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2396 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
2397 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2398 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2399 ret <16 x float> %res
2403 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2404 ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
2405 ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
2406 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2407 ret <16 x float> %res
2411 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2412 ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
2413 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
2414 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2415 ret <16 x float> %res
2417 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2418 ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
2419 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2420 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2421 ret <16 x float> %res
2423 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2424 ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
2425 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
2426 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2427 ret <16 x float> %res
2430 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2431 ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
2432 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
2433 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2434 ret <16 x float> %res
2437 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2438 ;CHECK-LABEL: test_mm512_div_round_ps_current
2439 ;CHECK: vdivps %zmm1, %zmm0, %zmm0
2440 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2441 ret <16 x float> %res
2443 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2445 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2446 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
2447 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2448 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2449 ret <16 x float> %res
2452 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2453 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
2454 ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
2455 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2456 ret <16 x float> %res
2459 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2460 ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
2461 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2462 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2463 ret <16 x float> %res
2466 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2467 ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
2468 ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
2469 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2470 ret <16 x float> %res
2473 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2474 ;CHECK-LABEL: test_mm512_min_round_ps_sae
2475 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
2476 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2477 ret <16 x float> %res
2480 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2481 ;CHECK-LABEL: test_mm512_min_round_ps_current
2482 ;CHECK: vminps %zmm1, %zmm0, %zmm0
2483 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2484 ret <16 x float> %res
2486 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2488 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2489 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
2490 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2491 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2492 ret <16 x float> %res
2495 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2496 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
2497 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
2498 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2499 ret <16 x float> %res
2502 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2503 ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
2504 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2505 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2506 ret <16 x float> %res
2509 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2510 ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
2511 ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
2512 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2513 ret <16 x float> %res
2516 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2517 ;CHECK-LABEL: test_mm512_max_round_ps_sae
2518 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
2519 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2520 ret <16 x float> %res
2523 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2524 ;CHECK-LABEL: test_mm512_max_round_ps_current
2525 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
2526 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2527 ret <16 x float> %res
2529 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2531 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2533 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2534 ; CHECK-LABEL: test_mask_add_ss_rn
2535 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2536 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
2537 ret <4 x float> %res
2540 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2541 ; CHECK-LABEL: test_mask_add_ss_rd
2542 ; CHECK: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2543 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
2544 ret <4 x float> %res
2547 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2548 ; CHECK-LABEL: test_mask_add_ss_ru
2549 ; CHECK: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2550 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
2551 ret <4 x float> %res
2554 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2555 ; CHECK-LABEL: test_mask_add_ss_rz
2556 ; CHECK: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2557 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
2558 ret <4 x float> %res
2561 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2562 ; CHECK-LABEL: test_mask_add_ss_current
2563 ; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
2564 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2565 ret <4 x float> %res
2568 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2569 ; CHECK-LABEL: test_maskz_add_ss_rn
2570 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2571 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
2572 ret <4 x float> %res
2575 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
2576 ; CHECK-LABEL: test_add_ss_rn
2577 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
2578 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
2579 ret <4 x float> %res
2582 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2584 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2585 ; CHECK-LABEL: test_mask_add_sd_rn
2586 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2587 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
2588 ret <2 x double> %res
2591 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2592 ; CHECK-LABEL: test_mask_add_sd_rd
2593 ; CHECK: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2594 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
2595 ret <2 x double> %res
2598 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2599 ; CHECK-LABEL: test_mask_add_sd_ru
2600 ; CHECK: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2601 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
2602 ret <2 x double> %res
2605 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2606 ; CHECK-LABEL: test_mask_add_sd_rz
2607 ; CHECK: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2608 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
2609 ret <2 x double> %res
2612 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2613 ; CHECK-LABEL: test_mask_add_sd_current
2614 ; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
2615 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2616 ret <2 x double> %res
2619 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2620 ; CHECK-LABEL: test_maskz_add_sd_rn
2621 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2622 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
2623 ret <2 x double> %res
2626 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
2627 ; CHECK-LABEL: test_add_sd_rn
2628 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
2629 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
2630 ret <2 x double> %res
2633 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2635 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2636 ; CHECK-LABEL: test_mask_max_ss_sae
2637 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2638 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
2639 ret <4 x float> %res
2642 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2643 ; CHECK-LABEL: test_maskz_max_ss_sae
2644 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2645 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
2646 ret <4 x float> %res
2649 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
2650 ; CHECK-LABEL: test_max_ss_sae
2651 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0
2652 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
2653 ret <4 x float> %res
2656 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2657 ; CHECK-LABEL: test_mask_max_ss
2658 ; CHECK: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
2659 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2660 ret <4 x float> %res
2663 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2664 ; CHECK-LABEL: test_maskz_max_ss
2665 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
2666 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
2667 ret <4 x float> %res
2670 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
2671 ; CHECK-LABEL: test_max_ss
2672 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0
2673 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
2674 ret <4 x float> %res
2676 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2678 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2679 ; CHECK-LABEL: test_mask_max_sd_sae
2680 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2681 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
2682 ret <2 x double> %res
2685 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2686 ; CHECK-LABEL: test_maskz_max_sd_sae
2687 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2688 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
2689 ret <2 x double> %res
2692 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
2693 ; CHECK-LABEL: test_max_sd_sae
2694 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
2695 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
2696 ret <2 x double> %res
2699 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2700 ; CHECK-LABEL: test_mask_max_sd
2701 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
2702 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2703 ret <2 x double> %res
2706 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2707 ; CHECK-LABEL: test_maskz_max_sd
2708 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
2709 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
2710 ret <2 x double> %res
2713 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
2714 ; CHECK-LABEL: test_max_sd
2715 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0
2716 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
2717 ret <2 x double> %res
2720 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
2721 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
2723 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
2725 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
2726 ret <2 x double> %res
2728 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
2730 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
2731 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
2733 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
2735 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
2736 ret <2 x double> %res
2738 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
2740 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
2741 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
2743 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
2745 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
2746 ret <4 x float> %res
2748 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
2750 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
2751 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
2753 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
2755 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
2756 ret <4 x float> %res
2758 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
2760 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
2761 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
2763 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
2766 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2767 ret <4 x float> %res
2770 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
2771 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
2773 ; CHECK-NEXT: movl (%rdi), %eax
2774 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
2777 %b = load i32, i32* %ptr
2778 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2779 ret <4 x float> %res
2782 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
2783 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
2785 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
2788 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2789 ret <4 x float> %res
2792 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
2793 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
2795 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
2798 %b = load i32, i32* %ptr
2799 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2800 ret <4 x float> %res
2802 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
2804 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
2805 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
2807 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
2810 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
2811 ret <4 x float> %res
2814 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
2815 ; CHECK-LABEL: _mm_cvtu64_ss:
2817 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
2820 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
2821 ret <4 x float> %res
2823 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
2825 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
2826 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
2828 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
2831 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
2832 ret <2 x double> %res
2834 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
2836 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
2837 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
2839 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
2842 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
2843 ret <2 x double> %res
2846 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
2847 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
2849 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
2852 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
2853 ret <2 x double> %res
2855 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
2857 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
2858 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
2859 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
2860 <8 x i64>zeroinitializer, i8 -1)
2863 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2865 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
2866 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
2867 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
2868 <16 x i32>zeroinitializer, i16 -1)
2871 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2873 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
2874 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
2875 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
2876 <16 x i32>zeroinitializer, i16 -1)
2879 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2881 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512
2883 ; CHECK: vpmaxsd %zmm
2885 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2886 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2887 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2888 %res2 = add <16 x i32> %res, %res1
2889 ret <16 x i32> %res2
2892 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512
2894 ; CHECK: vpmaxsq %zmm
2896 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2897 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2898 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2899 %res2 = add <8 x i64> %res, %res1
2903 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2905 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512
2907 ; CHECK: vpmaxud %zmm
2909 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2910 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2911 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2912 %res2 = add <16 x i32> %res, %res1
2913 ret <16 x i32> %res2
2916 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2918 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512
2920 ; CHECK: vpmaxuq %zmm
2922 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2923 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2924 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2925 %res2 = add <8 x i64> %res, %res1
2929 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2931 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512
2933 ; CHECK: vpminsd %zmm
2935 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2936 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2937 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2938 %res2 = add <16 x i32> %res, %res1
2939 ret <16 x i32> %res2
2942 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2944 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512
2946 ; CHECK: vpminsq %zmm
2948 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2949 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2950 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2951 %res2 = add <8 x i64> %res, %res1
2955 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512
2957 ; CHECK: vpminud %zmm
2959 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2960 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2961 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2962 %res2 = add <16 x i32> %res, %res1
2963 ret <16 x i32> %res2
2966 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2968 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512
2970 ; CHECK: vpminuq %zmm
2972 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2973 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2974 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2975 %res2 = add <8 x i64> %res, %res1
2979 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2981 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
2984 ; CHECK: vpermi2d {{.*}}{%k1}
2985 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2986 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2987 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2988 %res2 = add <16 x i32> %res, %res1
2989 ret <16 x i32> %res2
2992 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
2994 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
2997 ; CHECK: vpermi2pd {{.*}}{%k1}
2998 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
2999 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
3000 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
3001 %res2 = fadd <8 x double> %res, %res1
3002 ret <8 x double> %res2
3005 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
3007 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
3010 ; CHECK: vpermi2ps {{.*}}{%k1}
3011 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
3012 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
3013 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
3014 %res2 = fadd <16 x float> %res, %res1
3015 ret <16 x float> %res2
3018 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3020 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
3023 ; CHECK: vpermi2q {{.*}}{%k1}
3024 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3025 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3026 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3027 %res2 = add <8 x i64> %res, %res1
3031 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3033 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
3036 ; CHECK: vpermt2d {{.*}}{%k1} {z}
3037 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3038 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3039 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3040 %res2 = add <16 x i32> %res, %res1
3041 ret <16 x i32> %res2
3044 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
3046 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
3049 ; CHECK: vpermt2pd {{.*}}{%k1} {z}
3050 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3051 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3052 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3053 %res2 = fadd <8 x double> %res, %res1
3054 ret <8 x double> %res2
3057 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
3059 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
3062 ; CHECK: vpermt2ps {{.*}}{%k1} {z}
3063 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3064 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3065 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3066 %res2 = fadd <16 x float> %res, %res1
3067 ret <16 x float> %res2
3071 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3073 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
3076 ; CHECK: vpermt2q {{.*}}{%k1} {z}
3077 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3078 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3079 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3080 %res2 = add <8 x i64> %res, %res1
3084 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3086 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
3089 ; CHECK: vpermt2d {{.*}}{%k1}
3091 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3092 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3093 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3094 %res2 = add <16 x i32> %res, %res1
3095 ret <16 x i32> %res2
3098 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
3099 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512
3102 ; CHECK: vscalefpd{{.*}}{%k1}
3103 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3104 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
3105 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
3106 %res2 = fadd <8 x double> %res, %res1
3107 ret <8 x double> %res2
3110 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3111 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512
3114 ; CHECK: vscalefps{{.*}}{%k1}
3115 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3116 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
3117 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
3118 %res2 = fadd <16 x float> %res, %res1
3119 ret <16 x float> %res2
3122 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
3124 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3125 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
3127 ; CHECK-NEXT: movzbl %dil, %eax
3128 ; CHECK-NEXT: kmovw %eax, %k1
3129 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1}
3130 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm0
3131 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3132 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3133 %res2 = fadd <8 x double> %res, %res1
3134 ret <8 x double> %res2
3137 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
3139 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3140 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
3142 ; CHECK-NEXT: kmovw %edi, %k1
3143 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1}
3144 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm0
3145 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3146 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3147 %res2 = fadd <16 x float> %res, %res1
3148 ret <16 x float> %res2
3151 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
3153 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3154 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
3156 ; CHECK-NEXT: movzbl %dil, %eax
3157 ; CHECK-NEXT: kmovw %eax, %k1
3158 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1}
3159 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm0
3160 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3161 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3162 %res2 = fadd <8 x double> %res, %res1
3163 ret <8 x double> %res2
3166 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
3168 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3169 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
3171 ; CHECK-NEXT: kmovw %edi, %k1
3172 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1}
3173 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm0
3174 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3175 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3176 %res2 = fadd <16 x float> %res, %res1
3177 ret <16 x float> %res2
3180 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3182 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3183 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
3185 ; CHECK-NEXT: movzbl %dil, %eax
3186 ; CHECK-NEXT: kmovw %eax, %k1
3187 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1}
3188 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z}
3189 ; CHECK-NEXT: vpunpcklqdq {{.*#+}}
3190 ; CHECK: vpaddq %zmm0, %zmm2, %zmm0
3191 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
3193 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3194 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3195 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
3196 %res3 = add <8 x i64> %res, %res1
3197 %res4 = add <8 x i64> %res2, %res3
3201 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3203 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3204 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
3206 ; CHECK-NEXT: movzbl %dil, %eax
3207 ; CHECK-NEXT: kmovw %eax, %k1
3208 ; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1}
3209 ; CHECK-NEXT: vpunpckhqdq {{.*#+}}
3210 ; CHECK: vpaddq %zmm0, %zmm2, %zmm0
3212 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3213 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3214 %res2 = add <8 x i64> %res, %res1
3218 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3220 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3221 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
3223 ; CHECK-NEXT: kmovw %edi, %k1
3224 ; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1}
3225 ; CHECK-NEXT: vpunpckhdq {{.*#+}}
3226 ; CHECK: vpaddd %zmm0, %zmm2, %zmm0
3228 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3229 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3230 %res2 = add <16 x i32> %res, %res1
3231 ret <16 x i32> %res2
3234 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3236 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3237 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
3239 ; CHECK-NEXT: kmovw %edi, %k1
3240 ; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1}
3241 ; CHECK-NEXT: vpunpckldq {{.*#+}}
3242 ; CHECK: vpaddd %zmm0, %zmm2, %zmm0
3244 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3245 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3246 %res2 = add <16 x i32> %res, %res1
3247 ret <16 x i32> %res2
3250 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
3252 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3253 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
3254 ; CHECK: vpmovqb %zmm0, %xmm1 {%k1}
3255 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
3256 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0
3257 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3258 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3259 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3260 %res3 = add <16 x i8> %res0, %res1
3261 %res4 = add <16 x i8> %res3, %res2
3265 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3267 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3268 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
3269 ; CHECK: vpmovqb %zmm0, (%rdi)
3270 ; CHECK: vpmovqb %zmm0, (%rdi) {%k1}
3271 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3272 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3276 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
3278 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3279 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
3280 ; CHECK: vpmovsqb %zmm0, %xmm1 {%k1}
3281 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
3282 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
3283 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3284 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3285 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3286 %res3 = add <16 x i8> %res0, %res1
3287 %res4 = add <16 x i8> %res3, %res2
3291 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3293 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3294 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
3295 ; CHECK: vpmovsqb %zmm0, (%rdi)
3296 ; CHECK: vpmovsqb %zmm0, (%rdi) {%k1}
3297 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3298 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3302 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
3304 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3305 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
3306 ; CHECK: vpmovusqb %zmm0, %xmm1 {%k1}
3307 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
3308 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
3309 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3310 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3311 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3312 %res3 = add <16 x i8> %res0, %res1
3313 %res4 = add <16 x i8> %res3, %res2
3317 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3319 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3320 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
3321 ; CHECK: vpmovusqb %zmm0, (%rdi)
3322 ; CHECK: vpmovusqb %zmm0, (%rdi) {%k1}
3323 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3324 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3328 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
3330 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3331 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
3332 ; CHECK: vpmovqw %zmm0, %xmm1 {%k1}
3333 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
3334 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0
3335 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3336 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3337 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3338 %res3 = add <8 x i16> %res0, %res1
3339 %res4 = add <8 x i16> %res3, %res2
3343 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3345 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3346 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
3347 ; CHECK: vpmovqw %zmm0, (%rdi)
3348 ; CHECK: vpmovqw %zmm0, (%rdi) {%k1}
3349 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3350 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3354 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
3356 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3357 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
3358 ; CHECK: vpmovsqw %zmm0, %xmm1 {%k1}
3359 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
3360 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
3361 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3362 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3363 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3364 %res3 = add <8 x i16> %res0, %res1
3365 %res4 = add <8 x i16> %res3, %res2
3369 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3371 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3372 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
3373 ; CHECK: vpmovsqw %zmm0, (%rdi)
3374 ; CHECK: vpmovsqw %zmm0, (%rdi) {%k1}
3375 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3376 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3380 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
3382 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3383 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
3384 ; CHECK: vpmovusqw %zmm0, %xmm1 {%k1}
3385 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
3386 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
3387 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3388 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3389 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3390 %res3 = add <8 x i16> %res0, %res1
3391 %res4 = add <8 x i16> %res3, %res2
3395 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3397 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3398 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
3399 ; CHECK: vpmovusqw %zmm0, (%rdi)
3400 ; CHECK: vpmovusqw %zmm0, (%rdi) {%k1}
3401 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3402 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3406 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
3408 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3409 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
3410 ; CHECK: vpmovqd %zmm0, %ymm1 {%k1}
3411 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
3412 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0
3413 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3414 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3415 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3416 %res3 = add <8 x i32> %res0, %res1
3417 %res4 = add <8 x i32> %res3, %res2
3421 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3423 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3424 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
3425 ; CHECK: vpmovqd %zmm0, (%rdi)
3426 ; CHECK: vpmovqd %zmm0, (%rdi) {%k1}
3427 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3428 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3432 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
3434 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3435 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
3436 ; CHECK: vpmovsqd %zmm0, %ymm1 {%k1}
3437 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
3438 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
3439 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3440 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3441 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3442 %res3 = add <8 x i32> %res0, %res1
3443 %res4 = add <8 x i32> %res3, %res2
3447 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3449 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3450 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
3451 ; CHECK: vpmovsqd %zmm0, (%rdi)
3452 ; CHECK: vpmovsqd %zmm0, (%rdi) {%k1}
3453 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3454 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3458 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
3460 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3461 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
3462 ; CHECK: vpmovusqd %zmm0, %ymm1 {%k1}
3463 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
3464 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
3465 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3466 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3467 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3468 %res3 = add <8 x i32> %res0, %res1
3469 %res4 = add <8 x i32> %res3, %res2
3473 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3475 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3476 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
3477 ; CHECK: vpmovusqd %zmm0, (%rdi)
3478 ; CHECK: vpmovusqd %zmm0, (%rdi) {%k1}
3479 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3480 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3484 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
3486 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3487 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
3488 ; CHECK: vpmovdb %zmm0, %xmm1 {%k1}
3489 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
3490 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0
3491 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3492 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3493 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3494 %res3 = add <16 x i8> %res0, %res1
3495 %res4 = add <16 x i8> %res3, %res2
3499 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
3501 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3502 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
3503 ; CHECK: vpmovdb %zmm0, (%rdi)
3504 ; CHECK: vpmovdb %zmm0, (%rdi) {%k1}
3505 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3506 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3510 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
3512 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3513 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
3514 ; CHECK: vpmovsdb %zmm0, %xmm1 {%k1}
3515 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
3516 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
3517 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3518 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3519 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3520 %res3 = add <16 x i8> %res0, %res1
3521 %res4 = add <16 x i8> %res3, %res2
3525 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
3527 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3528 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
3529 ; CHECK: vpmovsdb %zmm0, (%rdi)
3530 ; CHECK: vpmovsdb %zmm0, (%rdi) {%k1}
3531 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3532 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3536 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
3538 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3539 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
3540 ; CHECK: vpmovusdb %zmm0, %xmm1 {%k1}
3541 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
3542 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
3543 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3544 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3545 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3546 %res3 = add <16 x i8> %res0, %res1
3547 %res4 = add <16 x i8> %res3, %res2
3551 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
3553 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3554 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
3555 ; CHECK: vpmovusdb %zmm0, (%rdi)
3556 ; CHECK: vpmovusdb %zmm0, (%rdi) {%k1}
3557 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3558 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3562 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
3564 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3565 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
3566 ; CHECK: vpmovdw %zmm0, %ymm1 {%k1}
3567 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
3568 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0
3569 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3570 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3571 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3572 %res3 = add <16 x i16> %res0, %res1
3573 %res4 = add <16 x i16> %res3, %res2
3574 ret <16 x i16> %res4
3577 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3579 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3580 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
3581 ; CHECK: vpmovdw %zmm0, (%rdi)
3582 ; CHECK: vpmovdw %zmm0, (%rdi) {%k1}
3583 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3584 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3588 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
3590 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3591 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
3592 ; CHECK: vpmovsdw %zmm0, %ymm1 {%k1}
3593 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
3594 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
3595 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3596 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3597 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3598 %res3 = add <16 x i16> %res0, %res1
3599 %res4 = add <16 x i16> %res3, %res2
3600 ret <16 x i16> %res4
3603 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3605 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3606 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
3607 ; CHECK: vpmovsdw %zmm0, (%rdi)
3608 ; CHECK: vpmovsdw %zmm0, (%rdi) {%k1}
3609 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3610 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3614 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
3616 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3617 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
3618 ; CHECK: vpmovusdw %zmm0, %ymm1 {%k1}
3619 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
3620 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
3621 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3622 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3623 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3624 %res3 = add <16 x i16> %res0, %res1
3625 %res4 = add <16 x i16> %res3, %res2
3626 ret <16 x i16> %res4
3629 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3631 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3632 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
3633 ; CHECK: vpmovusdw %zmm0, (%rdi)
3634 ; CHECK: vpmovusdw %zmm0, (%rdi) {%k1}
3635 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3636 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3640 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
3642 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3643 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
3645 ; CHECK-NEXT: movzbl %dil, %eax
3646 ; CHECK-NEXT: kmovw %eax, %k1
3647 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
3648 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
3649 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3651 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3652 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3653 %res2 = fadd <8 x double> %res, %res1
3654 ret <8 x double> %res2
3657 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3659 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3660 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
3662 ; CHECK-NEXT: kmovw %edi, %k1
3663 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1}
3664 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
3665 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3667 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3668 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3669 %res2 = fadd <16 x float> %res, %res1
3670 ret <16 x float> %res2
3673 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3675 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3676 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
3678 ; CHECK-NEXT: movzbl %dil, %eax
3679 ; CHECK-NEXT: kmovw %eax, %k1
3680 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1}
3681 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0
3682 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3684 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3685 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3686 %res2 = add <8 x i32> %res, %res1
3690 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
3692 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
3693 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
3695 ; CHECK-NEXT: movzbl %dil, %eax
3696 ; CHECK-NEXT: kmovw %eax, %k1
3697 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1}
3698 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0
3699 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
3701 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
3702 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
3703 %res2 = fadd <8 x float> %res, %res1
3704 ret <8 x float> %res2
3707 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3709 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3710 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
3712 ; CHECK-NEXT: movzbl %dil, %eax
3713 ; CHECK-NEXT: kmovw %eax, %k1
3714 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
3715 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
3716 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3718 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
3719 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3720 %res2 = add <8 x i32> %res, %res1
3724 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3726 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3727 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
3729 ; CHECK-NEXT: kmovw %edi, %k1
3730 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
3731 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
3732 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3734 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3735 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3736 %res2 = add <16 x i32> %res, %res1
3737 ret <16 x i32> %res2
3740 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
3742 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
3743 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
3745 ; CHECK-NEXT: movzbl %dil, %eax
3746 ; CHECK-NEXT: kmovw %eax, %k1
3747 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1}
3748 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0
3749 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3751 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
3752 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
3753 %res2 = fadd <8 x double> %res, %res1
3754 ret <8 x double> %res2
3757 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3759 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3760 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
3762 ; CHECK-NEXT: kmovw %edi, %k1
3763 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
3764 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
3765 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3767 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3768 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3769 %res2 = add <16 x i32> %res, %res1
3770 ret <16 x i32> %res2
3773 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3775 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3776 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
3778 ; CHECK-NEXT: movzbl %dil, %eax
3779 ; CHECK-NEXT: kmovw %eax, %k1
3780 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
3781 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
3782 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3784 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3785 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3786 %res2 = add <8 x i32> %res, %res1
3790 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
3792 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3793 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
3795 ; CHECK-NEXT: movzbl %dil, %eax
3796 ; CHECK-NEXT: kmovw %eax, %k1
3797 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
3798 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
3799 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3801 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3802 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3803 %res2 = fadd <8 x double> %res, %res1
3804 ret <8 x double> %res2
3808 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3810 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3811 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
3813 ; CHECK-NEXT: kmovw %edi, %k1
3814 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1}
3815 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
3816 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3818 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3819 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3820 %res2 = fadd <16 x float> %res, %res1
3821 ret <16 x float> %res2
3824 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3826 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3827 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
3829 ; CHECK-NEXT: movzbl %dil, %eax
3830 ; CHECK-NEXT: kmovw %eax, %k1
3831 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
3832 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
3833 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3835 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3836 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3837 %res2 = add <8 x i32> %res, %res1
3841 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3843 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3844 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
3846 ; CHECK-NEXT: kmovw %edi, %k1
3847 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
3848 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
3849 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3851 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
3852 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
3853 %res2 = add <16 x i32> %res, %res1
3854 ret <16 x i32> %res2
3857 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3859 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3860 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
3862 ; CHECK-NEXT: kmovw %edi, %k1
3863 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
3864 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
3865 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3867 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
3868 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
3869 %res2 = add <16 x i32> %res, %res1
3870 ret <16 x i32> %res2
3874 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
3875 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ss
3878 ; CHECK: vscalefss {{.*}}{%k1}
3879 ; CHECK: vscalefss {rn-sae}
3880 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
3881 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
3882 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
3883 %res2 = fadd <4 x float> %res, %res1
3884 ret <4 x float> %res2
3887 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
3888 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_sd
3891 ; CHECK: vscalefsd {{.*}}{%k1}
3892 ; CHECK: vscalefsd {rn-sae}
3893 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
3894 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
3895 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
3896 %res2 = fadd <2 x double> %res, %res1
3897 ret <2 x double> %res2
3900 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
3902 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3903 ; CHECK-LABEL: test_getexp_ss:
3905 ; CHECK-NEXT: andl $1, %edi
3906 ; CHECK-NEXT: kmovw %edi, %k1
3907 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
3908 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
3909 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
3910 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
3911 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
3912 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
3913 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
3914 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
3916 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
3917 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
3918 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
3919 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
3921 %res.1 = fadd <4 x float> %res0, %res1
3922 %res.2 = fadd <4 x float> %res2, %res3
3923 %res = fadd <4 x float> %res.1, %res.2
3924 ret <4 x float> %res
3927 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
3929 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3930 ; CHECK-LABEL: test_getexp_sd:
3932 ; CHECK-NEXT: andl $1, %edi
3933 ; CHECK-NEXT: kmovw %edi, %k1
3934 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
3935 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
3936 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
3937 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
3938 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3939 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
3940 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
3941 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
3943 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
3944 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
3945 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
3946 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
3948 %res.1 = fadd <2 x double> %res0, %res1
3949 %res.2 = fadd <2 x double> %res2, %res3
3950 %res = fadd <2 x double> %res.1, %res.2
3951 ret <2 x double> %res