1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
101 ; CHECK-LABEL: test_sqrt_pd_512
103 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
104 ret <8 x double> %res
106 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
108 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 ; CHECK-LABEL: test_sqrt_ps_512
111 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
112 ret <16 x float> %res
114 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
115 ; CHECK-LABEL: test_sqrt_round_ps_512
116 ; CHECK: vsqrtps {rz-sae}
117 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
118 ret <16 x float> %res
120 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
122 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
123 ; CHECK-LABEL: test_getexp_pd_512
125 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
126 ret <8 x double> %res
128 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
129 ; CHECK-LABEL: test_getexp_round_pd_512
130 ; CHECK: vgetexppd {sae}
131 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
132 ret <8 x double> %res
134 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
136 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
137 ; CHECK-LABEL: test_getexp_ps_512
139 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
140 ret <16 x float> %res
143 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
144 ; CHECK-LABEL: test_getexp_round_ps_512
145 ; CHECK: vgetexpps {sae}
146 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
147 ret <16 x float> %res
149 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
151 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
152 ; CHECK: vsqrtss {{.*}}encoding: [0x62
153 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
156 declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
158 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
159 ; CHECK: vsqrtsd {{.*}}encoding: [0x62
160 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
161 ret <2 x double> %res
163 declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
165 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
166 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
167 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
170 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
172 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
173 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
174 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
175 ret <2 x double> %res
177 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
179 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
180 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
181 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
184 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
187 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
188 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
189 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
192 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
195 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
196 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
197 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
200 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
203 define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
204 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
205 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
208 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
210 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
211 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
212 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
215 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
217 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
218 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
219 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
220 ret <16 x float> %res
222 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
225 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
226 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
227 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
231 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
233 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
234 ; CHECK: vbroadcastss
235 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
236 ret <16 x float> %res
238 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
240 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
241 ; CHECK: vbroadcastsd
242 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
243 ret <8 x double> %res
245 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
247 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
248 ; CHECK: vbroadcastss
249 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
250 ret <16 x float> %res
252 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
254 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
255 ; CHECK: vbroadcastsd
256 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
257 ret <8 x double> %res
259 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
261 define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
262 ; CHECK: vpbroadcastd
263 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
266 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
268 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
269 ; CHECK: vpbroadcastd
270 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
273 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
275 define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
276 ; CHECK: vpbroadcastq
277 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
280 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
282 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
283 ; CHECK: vpbroadcastq
284 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
287 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
289 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
290 ; CHECK-LABEL: test_conflict_d:
292 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0
293 ; CHECK-NEXT: retq ## encoding: [0xc3]
294 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
298 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
300 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
301 ; CHECK-LABEL: test_conflict_q:
303 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
305 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
309 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
311 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
312 ; CHECK-LABEL: test_maskz_conflict_d:
314 ; CHECK-NEXT: kmovw %edi, %k1
315 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
317 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
321 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
322 ; CHECK-LABEL: test_mask_conflict_q:
324 ; CHECK-NEXT: movzbl %dil, %eax
325 ; CHECK-NEXT: kmovw %eax, %k1
326 ; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
327 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
329 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
333 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
334 ; CHECK-LABEL: test_lzcnt_d:
336 ; CHECK-NEXT: vplzcntd %zmm0, %zmm0
338 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
342 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
344 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
345 ; CHECK-LABEL: test_lzcnt_q:
347 ; CHECK-NEXT: vplzcntq %zmm0, %zmm0
349 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
353 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
356 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
357 ; CHECK-LABEL: test_mask_lzcnt_d:
359 ; CHECK-NEXT: kmovw %edi, %k1
360 ; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
361 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
363 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
367 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
368 ; CHECK-LABEL: test_mask_lzcnt_q:
370 ; CHECK-NEXT: movzbl %dil, %eax
371 ; CHECK-NEXT: kmovw %eax, %k1
372 ; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
373 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
374 ; CHECK-NEXT: retq ## encoding: [0xc3]
375 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
379 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
380 ; CHECK: vblendmps %zmm1, %zmm0
381 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
382 ret <16 x float> %res
385 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
387 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
388 ; CHECK: vblendmpd %zmm1, %zmm0
389 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
390 ret <8 x double> %res
393 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
394 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
395 ; CHECK: vblendmpd (%
396 %b = load <8 x double>, <8 x double>* %ptr
397 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
398 ret <8 x double> %res
400 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
402 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
404 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
407 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
409 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
411 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
414 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
416 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
417 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
418 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
421 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
423 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
424 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
425 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
428 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
431 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
433 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
434 <8 x double>zeroinitializer, i8 -1, i32 4)
435 ret <8 x double> %res
437 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
438 <8 x double>, i8, i32)
440 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
442 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
443 <8 x double>zeroinitializer, i8 -1, i32 4)
444 ret <8 x double> %res
446 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
447 <8 x double>, i8, i32)
449 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
451 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512
454 ; CHECK: vpabsd{{.*}}{%k1}
455 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
456 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
457 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
458 %res2 = add <16 x i32> %res, %res1
462 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
464 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512
467 ; CHECK: vpabsq{{.*}}{%k1}
468 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
469 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
470 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
471 %res2 = add <8 x i64> %res, %res1
475 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
476 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
477 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
480 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
482 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
483 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
484 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
487 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
489 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
490 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
491 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
495 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
497 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
498 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
499 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
503 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
505 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
506 ; CHECK-LABEL: test_mask_store_aligned_ps:
508 ; CHECK-NEXT: kmovw %esi, %k1
509 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
511 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
515 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
517 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
518 ; CHECK-LABEL: test_mask_store_aligned_pd:
520 ; CHECK-NEXT: kmovw %esi, %k1
521 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
523 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
527 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
529 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
530 ; CHECK-LABEL: test_maskz_load_aligned_ps:
532 ; CHECK-NEXT: kmovw %esi, %k1
533 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
535 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
536 ret <16 x float> %res
539 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
541 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
542 ; CHECK-LABEL: test_maskz_load_aligned_pd:
544 ; CHECK-NEXT: kmovw %esi, %k1
545 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
547 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
548 ret <8 x double> %res
551 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
553 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
554 ; CHECK-LABEL: test_load_aligned_ps:
556 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
558 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
559 ret <16 x float> %res
562 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
563 ; CHECK-LABEL: test_load_aligned_pd:
565 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
567 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
568 ret <8 x double> %res
571 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
573 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
574 ; CHECK-LABEL: test_valign_q:
575 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
576 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
580 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
581 ; CHECK-LABEL: test_mask_valign_q:
582 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
583 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
587 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
589 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
590 ; CHECK-LABEL: test_maskz_valign_d:
591 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
592 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
596 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
598 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
599 ; CHECK-LABEL: test_mask_store_ss
600 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
601 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
605 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
607 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
608 ; CHECK-LABEL: test_pcmpeq_d
609 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
610 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
614 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
615 ; CHECK-LABEL: test_mask_pcmpeq_d
616 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
617 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
621 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
623 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
624 ; CHECK-LABEL: test_pcmpeq_q
625 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
626 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
630 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
631 ; CHECK-LABEL: test_mask_pcmpeq_q
632 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
633 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
637 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
639 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
640 ; CHECK-LABEL: test_pcmpgt_d
641 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
642 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
646 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
647 ; CHECK-LABEL: test_mask_pcmpgt_d
648 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
649 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
653 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
655 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
656 ; CHECK-LABEL: test_pcmpgt_q
657 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
658 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
662 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
663 ; CHECK-LABEL: test_mask_pcmpgt_q
664 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
665 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
669 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
671 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
672 ; CHECK_LABEL: test_cmp_d_512
673 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
674 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
675 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
676 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
677 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
678 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
679 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
680 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
681 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
682 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
683 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
684 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
685 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
686 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
687 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
688 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
689 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
690 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
691 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
692 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
693 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
694 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
695 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
696 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
700 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
701 ; CHECK_LABEL: test_mask_cmp_d_512
702 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
703 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
704 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
705 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
706 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
707 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
708 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
709 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
710 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
711 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
712 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
713 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
714 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
715 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
716 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
717 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
718 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
719 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
720 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
721 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
722 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
723 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
724 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
725 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
729 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
731 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
732 ; CHECK_LABEL: test_ucmp_d_512
733 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
734 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
735 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
736 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
737 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
738 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
739 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
740 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
741 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
742 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
743 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
744 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
745 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
746 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
747 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
748 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
749 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
750 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
751 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
752 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
753 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
754 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
755 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
756 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
760 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
761 ; CHECK_LABEL: test_mask_ucmp_d_512
762 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
763 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
764 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
765 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
766 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
767 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
768 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
769 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
770 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
771 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
772 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
773 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
774 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
775 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
776 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
777 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
778 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
779 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
780 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
781 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
782 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
783 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
784 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
785 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
789 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
791 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
792 ; CHECK_LABEL: test_cmp_q_512
793 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
794 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
795 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
796 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
797 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
798 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
799 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
800 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
801 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
802 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
803 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
804 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
805 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
806 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
807 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
808 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
809 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
810 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
811 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
812 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
813 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
814 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
815 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
816 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
820 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
821 ; CHECK_LABEL: test_mask_cmp_q_512
822 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
823 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
824 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
825 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
826 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
827 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
828 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
829 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
830 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
831 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
832 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
833 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
834 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
835 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
836 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
837 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
838 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
839 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
840 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
841 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
842 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
843 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
844 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
845 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
849 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
851 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
852 ; CHECK_LABEL: test_ucmp_q_512
853 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
854 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
855 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
856 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
857 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
858 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
859 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
860 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
861 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
862 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
863 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
864 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
865 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
866 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
867 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
868 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
869 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
870 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
871 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
872 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
873 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
874 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
875 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
876 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
880 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
881 ; CHECK_LABEL: test_mask_ucmp_q_512
882 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
883 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
884 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
885 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
886 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
887 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
888 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
889 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
890 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
891 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
892 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
893 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
894 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
895 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
896 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
897 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
898 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
899 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
900 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
901 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
902 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
903 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
904 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
905 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
909 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
911 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
912 ; CHECK-LABEL: test_mask_vextractf32x4:
913 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
914 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
918 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
920 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
921 ; CHECK-LABEL: test_mask_vextracti64x4:
922 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
923 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask)
927 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
929 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
930 ; CHECK-LABEL: test_maskz_vextracti32x4:
931 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
932 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
936 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
938 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
939 ; CHECK-LABEL: test_vextractf64x4:
940 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
941 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1)
942 ret <4 x double> %res
945 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
947 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
948 ; CHECK-LABEL: test_x86_avx512_pslli_d
950 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
954 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
955 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
956 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
957 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
961 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
962 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
963 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
964 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
968 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
970 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
971 ; CHECK-LABEL: test_x86_avx512_pslli_q
973 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
977 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
978 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
979 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
980 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
984 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
985 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
986 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
987 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
991 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
993 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
994 ; CHECK-LABEL: test_x86_avx512_psrli_d
996 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1000 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1001 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1002 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1003 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1007 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1008 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1009 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1010 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1014 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1016 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1017 ; CHECK-LABEL: test_x86_avx512_psrli_q
1019 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1023 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1024 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1025 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1026 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1030 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1031 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1032 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1033 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1037 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1039 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1040 ; CHECK-LABEL: test_x86_avx512_psrai_d
1042 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1046 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1047 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1048 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1049 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1053 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1054 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1055 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1056 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1060 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1062 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1063 ; CHECK-LABEL: test_x86_avx512_psrai_q
1065 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1069 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1070 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1071 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1072 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1076 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1077 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1078 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1079 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1083 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1085 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1086 ; CHECK-LABEL: test_x86_avx512_psll_d
1088 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1092 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1093 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1094 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1095 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1099 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1100 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1101 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1102 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1106 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1108 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1109 ; CHECK-LABEL: test_x86_avx512_psll_q
1111 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1115 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1116 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1117 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1118 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1122 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1123 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1124 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1125 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1129 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1131 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1132 ; CHECK-LABEL: test_x86_avx512_psrl_d
1134 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1138 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1139 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1140 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1141 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1145 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1146 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1147 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1148 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1152 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1154 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1155 ; CHECK-LABEL: test_x86_avx512_psrl_q
1157 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1161 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1162 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1163 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1164 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1168 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1169 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1170 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1171 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1175 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1177 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1178 ; CHECK-LABEL: test_x86_avx512_psra_d
1180 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1184 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1185 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1186 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1187 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1191 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1192 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1193 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1194 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1198 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1200 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1201 ; CHECK-LABEL: test_x86_avx512_psra_q
1203 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1207 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1208 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1209 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1210 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1214 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1215 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1216 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1217 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1221 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1223 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1224 ; CHECK-LABEL: test_x86_avx512_psllv_d
1226 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1230 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1231 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1232 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1233 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1237 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1238 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1239 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1240 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1244 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1246 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1247 ; CHECK-LABEL: test_x86_avx512_psllv_q
1249 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1253 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1254 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1255 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1256 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1260 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1261 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1262 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1263 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1267 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1270 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1271 ; CHECK-LABEL: test_x86_avx512_psrav_d
1273 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1277 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1278 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1279 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1280 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1284 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1285 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1286 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1287 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1291 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1293 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1294 ; CHECK-LABEL: test_x86_avx512_psrav_q
1296 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1300 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1301 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1302 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1303 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1307 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1308 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1309 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1310 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1314 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1316 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1317 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1319 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1323 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1324 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1325 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1326 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1330 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1331 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1332 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1333 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1337 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1339 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1340 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1342 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1346 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1347 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1348 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1349 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1353 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1354 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1355 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1356 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1360 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1362 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1363 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1365 %b = load <8 x i64>, <8 x i64>* %ptr
1366 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1370 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1371 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1372 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1374 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1375 ; CHECK-LABEL: test_vsubps_rn
1376 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
1377 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1378 <16 x float> zeroinitializer, i16 -1, i32 0)
1379 ret <16 x float> %res
1382 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1383 ; CHECK-LABEL: test_vsubps_rd
1384 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
1385 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1386 <16 x float> zeroinitializer, i16 -1, i32 1)
1387 ret <16 x float> %res
1390 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1391 ; CHECK-LABEL: test_vsubps_ru
1392 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
1393 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1394 <16 x float> zeroinitializer, i16 -1, i32 2)
1395 ret <16 x float> %res
1398 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1399 ; CHECK-LABEL: test_vsubps_rz
1400 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
1401 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1402 <16 x float> zeroinitializer, i16 -1, i32 3)
1403 ret <16 x float> %res
1406 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1407 ; CHECK-LABEL: test_vmulps_rn
1408 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
1409 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1410 <16 x float> zeroinitializer, i16 -1, i32 0)
1411 ret <16 x float> %res
1414 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1415 ; CHECK-LABEL: test_vmulps_rd
1416 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
1417 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1418 <16 x float> zeroinitializer, i16 -1, i32 1)
1419 ret <16 x float> %res
1422 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1423 ; CHECK-LABEL: test_vmulps_ru
1424 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
1425 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1426 <16 x float> zeroinitializer, i16 -1, i32 2)
1427 ret <16 x float> %res
1430 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1431 ; CHECK-LABEL: test_vmulps_rz
1432 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
1433 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1434 <16 x float> zeroinitializer, i16 -1, i32 3)
1435 ret <16 x float> %res
1439 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1440 ; CHECK-LABEL: test_vmulps_mask_rn
1441 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
1442 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1443 <16 x float> zeroinitializer, i16 %mask, i32 0)
1444 ret <16 x float> %res
1447 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1448 ; CHECK-LABEL: test_vmulps_mask_rd
1449 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
1450 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1451 <16 x float> zeroinitializer, i16 %mask, i32 1)
1452 ret <16 x float> %res
1455 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1456 ; CHECK-LABEL: test_vmulps_mask_ru
1457 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
1458 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1459 <16 x float> zeroinitializer, i16 %mask, i32 2)
1460 ret <16 x float> %res
1463 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1464 ; CHECK-LABEL: test_vmulps_mask_rz
1465 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
1466 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1467 <16 x float> zeroinitializer, i16 %mask, i32 3)
1468 ret <16 x float> %res
1471 ;; With Passthru value
1472 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1473 ; CHECK-LABEL: test_vmulps_mask_passthru_rn
1474 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
1475 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1476 <16 x float> %passthru, i16 %mask, i32 0)
1477 ret <16 x float> %res
1480 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1481 ; CHECK-LABEL: test_vmulps_mask_passthru_rd
1482 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
1483 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1484 <16 x float> %passthru, i16 %mask, i32 1)
1485 ret <16 x float> %res
1488 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1489 ; CHECK-LABEL: test_vmulps_mask_passthru_ru
1490 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
1491 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1492 <16 x float> %passthru, i16 %mask, i32 2)
1493 ret <16 x float> %res
1496 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1497 ; CHECK-LABEL: test_vmulps_mask_passthru_rz
1498 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
1499 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1500 <16 x float> %passthru, i16 %mask, i32 3)
1501 ret <16 x float> %res
1505 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1506 ; CHECK-LABEL: test_vmulpd_mask_rn
1507 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
1508 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1509 <8 x double> zeroinitializer, i8 %mask, i32 0)
1510 ret <8 x double> %res
1513 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1514 ; CHECK-LABEL: test_vmulpd_mask_rd
1515 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
1516 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1517 <8 x double> zeroinitializer, i8 %mask, i32 1)
1518 ret <8 x double> %res
1521 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1522 ; CHECK-LABEL: test_vmulpd_mask_ru
1523 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
1524 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1525 <8 x double> zeroinitializer, i8 %mask, i32 2)
1526 ret <8 x double> %res
1529 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1530 ; CHECK-LABEL: test_vmulpd_mask_rz
1531 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
1532 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1533 <8 x double> zeroinitializer, i8 %mask, i32 3)
1534 ret <8 x double> %res
1537 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1538 ;CHECK-LABEL: test_xor_epi32
1539 ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1540 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1541 ret < 16 x i32> %res
1544 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1545 ;CHECK-LABEL: test_mask_xor_epi32
1546 ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1547 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1548 ret < 16 x i32> %res
1551 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1553 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1554 ;CHECK-LABEL: test_or_epi32
1555 ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1556 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1557 ret < 16 x i32> %res
1560 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1561 ;CHECK-LABEL: test_mask_or_epi32
1562 ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1563 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1564 ret < 16 x i32> %res
1567 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1569 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1570 ;CHECK-LABEL: test_and_epi32
1571 ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1572 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1573 ret < 16 x i32> %res
1576 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1577 ;CHECK-LABEL: test_mask_and_epi32
1578 ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1579 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1580 ret < 16 x i32> %res
1583 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1585 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1586 ;CHECK-LABEL: test_xor_epi64
1587 ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1588 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1592 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1593 ;CHECK-LABEL: test_mask_xor_epi64
1594 ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1595 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1599 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1601 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1602 ;CHECK-LABEL: test_or_epi64
1603 ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1604 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1608 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1609 ;CHECK-LABEL: test_mask_or_epi64
1610 ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1611 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1615 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1617 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
1618 ;CHECK-LABEL: test_and_epi64
1619 ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
1620 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1624 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1625 ;CHECK-LABEL: test_mask_and_epi64
1626 ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1627 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1631 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1634 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1635 ;CHECK-LABEL: test_mask_add_epi32_rr
1636 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
1637 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1638 ret < 16 x i32> %res
1641 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1642 ;CHECK-LABEL: test_mask_add_epi32_rrk
1643 ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1644 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1645 ret < 16 x i32> %res
1648 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1649 ;CHECK-LABEL: test_mask_add_epi32_rrkz
1650 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1651 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1652 ret < 16 x i32> %res
1655 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1656 ;CHECK-LABEL: test_mask_add_epi32_rm
1657 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
1658 %b = load <16 x i32>, <16 x i32>* %ptr_b
1659 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1660 ret < 16 x i32> %res
1663 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1664 ;CHECK-LABEL: test_mask_add_epi32_rmk
1665 ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
1666 %b = load <16 x i32>, <16 x i32>* %ptr_b
1667 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1668 ret < 16 x i32> %res
1671 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1672 ;CHECK-LABEL: test_mask_add_epi32_rmkz
1673 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
1674 %b = load <16 x i32>, <16 x i32>* %ptr_b
1675 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1676 ret < 16 x i32> %res
1679 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1680 ;CHECK-LABEL: test_mask_add_epi32_rmb
1681 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
1682 %q = load i32, i32* %ptr_b
1683 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1684 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1685 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1686 ret < 16 x i32> %res
1689 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1690 ;CHECK-LABEL: test_mask_add_epi32_rmbk
1691 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
1692 %q = load i32, i32* %ptr_b
1693 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1694 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1695 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1696 ret < 16 x i32> %res
1699 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1700 ;CHECK-LABEL: test_mask_add_epi32_rmbkz
1701 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
1702 %q = load i32, i32* %ptr_b
1703 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1704 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1705 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1706 ret < 16 x i32> %res
1709 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1711 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1712 ;CHECK-LABEL: test_mask_sub_epi32_rr
1713 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
1714 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1715 ret < 16 x i32> %res
1718 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1719 ;CHECK-LABEL: test_mask_sub_epi32_rrk
1720 ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
1721 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1722 ret < 16 x i32> %res
1725 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1726 ;CHECK-LABEL: test_mask_sub_epi32_rrkz
1727 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
1728 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1729 ret < 16 x i32> %res
1732 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1733 ;CHECK-LABEL: test_mask_sub_epi32_rm
1734 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
1735 %b = load <16 x i32>, <16 x i32>* %ptr_b
1736 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1737 ret < 16 x i32> %res
1740 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1741 ;CHECK-LABEL: test_mask_sub_epi32_rmk
1742 ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
1743 %b = load <16 x i32>, <16 x i32>* %ptr_b
1744 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1745 ret < 16 x i32> %res
1748 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1749 ;CHECK-LABEL: test_mask_sub_epi32_rmkz
1750 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
1751 %b = load <16 x i32>, <16 x i32>* %ptr_b
1752 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1753 ret < 16 x i32> %res
1756 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1757 ;CHECK-LABEL: test_mask_sub_epi32_rmb
1758 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
1759 %q = load i32, i32* %ptr_b
1760 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1761 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1762 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1763 ret < 16 x i32> %res
1766 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1767 ;CHECK-LABEL: test_mask_sub_epi32_rmbk
1768 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
1769 %q = load i32, i32* %ptr_b
1770 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1771 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1772 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1773 ret < 16 x i32> %res
1776 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1777 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
1778 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
1779 %q = load i32, i32* %ptr_b
1780 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1781 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1782 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1783 ret < 16 x i32> %res
1786 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1788 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1789 ;CHECK-LABEL: test_mask_add_epi64_rr
1790 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
1791 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1795 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1796 ;CHECK-LABEL: test_mask_add_epi64_rrk
1797 ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
1798 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1802 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1803 ;CHECK-LABEL: test_mask_add_epi64_rrkz
1804 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
1805 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1809 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1810 ;CHECK-LABEL: test_mask_add_epi64_rm
1811 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
1812 %b = load <8 x i64>, <8 x i64>* %ptr_b
1813 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1817 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1818 ;CHECK-LABEL: test_mask_add_epi64_rmk
1819 ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
1820 %b = load <8 x i64>, <8 x i64>* %ptr_b
1821 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1825 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1826 ;CHECK-LABEL: test_mask_add_epi64_rmkz
1827 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
1828 %b = load <8 x i64>, <8 x i64>* %ptr_b
1829 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1833 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1834 ;CHECK-LABEL: test_mask_add_epi64_rmb
1835 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
1836 %q = load i64, i64* %ptr_b
1837 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1838 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1839 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1843 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1844 ;CHECK-LABEL: test_mask_add_epi64_rmbk
1845 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
1846 %q = load i64, i64* %ptr_b
1847 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1848 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1849 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1853 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1854 ;CHECK-LABEL: test_mask_add_epi64_rmbkz
1855 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
1856 %q = load i64, i64* %ptr_b
1857 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1858 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1859 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1863 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1865 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1866 ;CHECK-LABEL: test_mask_sub_epi64_rr
1867 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
1868 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1872 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1873 ;CHECK-LABEL: test_mask_sub_epi64_rrk
1874 ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
1875 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1879 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1880 ;CHECK-LABEL: test_mask_sub_epi64_rrkz
1881 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
1882 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1886 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1887 ;CHECK-LABEL: test_mask_sub_epi64_rm
1888 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
1889 %b = load <8 x i64>, <8 x i64>* %ptr_b
1890 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1894 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1895 ;CHECK-LABEL: test_mask_sub_epi64_rmk
1896 ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
1897 %b = load <8 x i64>, <8 x i64>* %ptr_b
1898 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1902 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1903 ;CHECK-LABEL: test_mask_sub_epi64_rmkz
1904 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
1905 %b = load <8 x i64>, <8 x i64>* %ptr_b
1906 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1910 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1911 ;CHECK-LABEL: test_mask_sub_epi64_rmb
1912 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
1913 %q = load i64, i64* %ptr_b
1914 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1915 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1916 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1920 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1921 ;CHECK-LABEL: test_mask_sub_epi64_rmbk
1922 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
1923 %q = load i64, i64* %ptr_b
1924 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1925 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1926 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1930 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1931 ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
1932 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
1933 %q = load i64, i64* %ptr_b
1934 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1935 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1936 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1940 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1942 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1943 ;CHECK-LABEL: test_mask_mul_epi32_rr
1944 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
1945 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
1949 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
1950 ;CHECK-LABEL: test_mask_mul_epi32_rrk
1951 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
1952 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
1956 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
1957 ;CHECK-LABEL: test_mask_mul_epi32_rrkz
1958 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
1959 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
1963 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1964 ;CHECK-LABEL: test_mask_mul_epi32_rm
1965 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
1966 %b = load <16 x i32>, <16 x i32>* %ptr_b
1967 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
1971 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1972 ;CHECK-LABEL: test_mask_mul_epi32_rmk
1973 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
1974 %b = load <16 x i32>, <16 x i32>* %ptr_b
1975 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
1979 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
1980 ;CHECK-LABEL: test_mask_mul_epi32_rmkz
1981 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
1982 %b = load <16 x i32>, <16 x i32>* %ptr_b
1983 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
1987 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
1988 ;CHECK-LABEL: test_mask_mul_epi32_rmb
1989 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
1990 %q = load i64, i64* %ptr_b
1991 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1992 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1993 %b = bitcast <8 x i64> %b64 to <16 x i32>
1994 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
1998 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1999 ;CHECK-LABEL: test_mask_mul_epi32_rmbk
2000 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
2001 %q = load i64, i64* %ptr_b
2002 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2003 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2004 %b = bitcast <8 x i64> %b64 to <16 x i32>
2005 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2009 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2010 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
2011 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
2012 %q = load i64, i64* %ptr_b
2013 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2014 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2015 %b = bitcast <8 x i64> %b64 to <16 x i32>
2016 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2020 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2022 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
2023 ;CHECK-LABEL: test_mask_mul_epu32_rr
2024 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
2025 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2029 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2030 ;CHECK-LABEL: test_mask_mul_epu32_rrk
2031 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
2032 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2036 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2037 ;CHECK-LABEL: test_mask_mul_epu32_rrkz
2038 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
2039 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2043 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2044 ;CHECK-LABEL: test_mask_mul_epu32_rm
2045 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
2046 %b = load <16 x i32>, <16 x i32>* %ptr_b
2047 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2051 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2052 ;CHECK-LABEL: test_mask_mul_epu32_rmk
2053 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
2054 %b = load <16 x i32>, <16 x i32>* %ptr_b
2055 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2059 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2060 ;CHECK-LABEL: test_mask_mul_epu32_rmkz
2061 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
2062 %b = load <16 x i32>, <16 x i32>* %ptr_b
2063 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2067 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
2068 ;CHECK-LABEL: test_mask_mul_epu32_rmb
2069 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
2070 %q = load i64, i64* %ptr_b
2071 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2072 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2073 %b = bitcast <8 x i64> %b64 to <16 x i32>
2074 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2078 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2079 ;CHECK-LABEL: test_mask_mul_epu32_rmbk
2080 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
2081 %q = load i64, i64* %ptr_b
2082 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2083 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2084 %b = bitcast <8 x i64> %b64 to <16 x i32>
2085 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2089 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2090 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
2091 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
2092 %q = load i64, i64* %ptr_b
2093 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2094 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2095 %b = bitcast <8 x i64> %b64 to <16 x i32>
2096 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2100 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2102 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2103 ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
2104 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2105 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2109 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2110 ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
2111 ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2112 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2113 ret < 16 x i32> %res
2116 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2117 ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
2118 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2119 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2120 ret < 16 x i32> %res
2123 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2124 ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
2125 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2126 %b = load <16 x i32>, <16 x i32>* %ptr_b
2127 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2128 ret < 16 x i32> %res
2131 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2132 ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
2133 ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2134 %b = load <16 x i32>, <16 x i32>* %ptr_b
2135 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2136 ret < 16 x i32> %res
2139 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2140 ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
2141 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2142 %b = load <16 x i32>, <16 x i32>* %ptr_b
2143 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2144 ret < 16 x i32> %res
2147 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2148 ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
2149 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2150 %q = load i32, i32* %ptr_b
2151 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2152 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2153 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2154 ret < 16 x i32> %res
2157 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2158 ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
2159 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2160 %q = load i32, i32* %ptr_b
2161 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2162 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2163 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2164 ret < 16 x i32> %res
2167 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2168 ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
2169 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2170 %q = load i32, i32* %ptr_b
2171 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2172 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2173 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2174 ret < 16 x i32> %res
2177 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2179 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2180 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
2181 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2182 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2183 ret <16 x float> %res
2185 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2186 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
2187 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2188 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2189 ret <16 x float> %res
2191 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2192 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
2193 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2194 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2195 ret <16 x float> %res
2198 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2199 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
2200 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2201 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2202 ret <16 x float> %res
2206 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2207 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
2208 ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
2209 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2210 ret <16 x float> %res
2213 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2214 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
2215 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2216 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2217 ret <16 x float> %res
2219 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2220 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
2221 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2222 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2223 ret <16 x float> %res
2225 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2226 ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
2227 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2228 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2229 ret <16 x float> %res
2232 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2233 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
2234 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2235 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2236 ret <16 x float> %res
2240 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2241 ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
2242 ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
2243 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2244 ret <16 x float> %res
2248 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2249 ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
2250 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
2251 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2252 ret <16 x float> %res
2254 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2255 ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
2256 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2257 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2258 ret <16 x float> %res
2260 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2261 ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
2262 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
2263 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2264 ret <16 x float> %res
2267 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2268 ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
2269 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
2270 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2271 ret <16 x float> %res
2274 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2275 ;CHECK-LABEL: test_mm512_add_round_ps_current
2276 ;CHECK: vaddps %zmm1, %zmm0, %zmm0
2277 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2278 ret <16 x float> %res
2280 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2282 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2283 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
2284 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2285 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2286 ret <16 x float> %res
2288 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2289 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
2290 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2291 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2292 ret <16 x float> %res
2294 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2295 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
2296 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2297 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2298 ret <16 x float> %res
2301 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2302 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
2303 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2304 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2305 ret <16 x float> %res
2309 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2310 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
2311 ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
2312 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2313 ret <16 x float> %res
2316 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2317 ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
2318 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2319 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2320 ret <16 x float> %res
2322 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2323 ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
2324 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2325 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2326 ret <16 x float> %res
2328 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2329 ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
2330 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2331 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2332 ret <16 x float> %res
2335 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2336 ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
2337 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2338 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2339 ret <16 x float> %res
2342 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2343 ;CHECK-LABEL: test_mm512_sub_round_ps_current
2344 ;CHECK: vsubps %zmm1, %zmm0, %zmm0
2345 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2346 ret <16 x float> %res
2349 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2350 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
2351 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2352 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2353 ret <16 x float> %res
2355 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2356 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
2357 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2358 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2359 ret <16 x float> %res
2361 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2362 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
2363 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2364 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2365 ret <16 x float> %res
2368 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2369 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
2370 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2371 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2372 ret <16 x float> %res
2376 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2377 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
2378 ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
2379 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2380 ret <16 x float> %res
2383 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2384 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
2385 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2386 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2387 ret <16 x float> %res
2389 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2390 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
2391 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2392 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2393 ret <16 x float> %res
2395 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2396 ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
2397 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2398 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2399 ret <16 x float> %res
2402 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2403 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
2404 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2405 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2406 ret <16 x float> %res
2410 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2411 ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
2412 ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
2413 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2414 ret <16 x float> %res
2418 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2419 ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
2420 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
2421 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2422 ret <16 x float> %res
2424 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2425 ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
2426 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2427 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2428 ret <16 x float> %res
2430 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2431 ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
2432 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
2433 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2434 ret <16 x float> %res
2437 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2438 ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
2439 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
2440 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2441 ret <16 x float> %res
2444 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2445 ;CHECK-LABEL: test_mm512_div_round_ps_current
2446 ;CHECK: vdivps %zmm1, %zmm0, %zmm0
2447 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2448 ret <16 x float> %res
2450 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2452 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2453 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
2454 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2455 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2456 ret <16 x float> %res
2459 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2460 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
2461 ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
2462 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2463 ret <16 x float> %res
2466 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2467 ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
2468 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2469 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2470 ret <16 x float> %res
2473 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2474 ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
2475 ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
2476 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2477 ret <16 x float> %res
2480 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2481 ;CHECK-LABEL: test_mm512_min_round_ps_sae
2482 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
2483 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2484 ret <16 x float> %res
2487 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2488 ;CHECK-LABEL: test_mm512_min_round_ps_current
2489 ;CHECK: vminps %zmm1, %zmm0, %zmm0
2490 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2491 ret <16 x float> %res
2493 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2495 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2496 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
2497 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2498 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2499 ret <16 x float> %res
2502 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2503 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
2504 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
2505 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2506 ret <16 x float> %res
2509 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2510 ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
2511 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2512 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2513 ret <16 x float> %res
2516 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2517 ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
2518 ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
2519 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2520 ret <16 x float> %res
2523 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2524 ;CHECK-LABEL: test_mm512_max_round_ps_sae
2525 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
2526 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2527 ret <16 x float> %res
2530 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2531 ;CHECK-LABEL: test_mm512_max_round_ps_current
2532 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
2533 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2534 ret <16 x float> %res
2536 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2538 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2540 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2541 ; CHECK-LABEL: test_mask_add_ss_rn
2542 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2543 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
2544 ret <4 x float> %res
2547 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2548 ; CHECK-LABEL: test_mask_add_ss_rd
2549 ; CHECK: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2550 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
2551 ret <4 x float> %res
2554 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2555 ; CHECK-LABEL: test_mask_add_ss_ru
2556 ; CHECK: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2557 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
2558 ret <4 x float> %res
2561 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2562 ; CHECK-LABEL: test_mask_add_ss_rz
2563 ; CHECK: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2564 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
2565 ret <4 x float> %res
2568 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2569 ; CHECK-LABEL: test_mask_add_ss_current
2570 ; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
2571 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2572 ret <4 x float> %res
2575 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2576 ; CHECK-LABEL: test_maskz_add_ss_rn
2577 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2578 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
2579 ret <4 x float> %res
2582 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
2583 ; CHECK-LABEL: test_add_ss_rn
2584 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
2585 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
2586 ret <4 x float> %res
2589 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2591 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2592 ; CHECK-LABEL: test_mask_add_sd_rn
2593 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2594 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
2595 ret <2 x double> %res
2598 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2599 ; CHECK-LABEL: test_mask_add_sd_rd
2600 ; CHECK: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2601 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
2602 ret <2 x double> %res
2605 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2606 ; CHECK-LABEL: test_mask_add_sd_ru
2607 ; CHECK: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2608 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
2609 ret <2 x double> %res
2612 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2613 ; CHECK-LABEL: test_mask_add_sd_rz
2614 ; CHECK: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2615 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
2616 ret <2 x double> %res
2619 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2620 ; CHECK-LABEL: test_mask_add_sd_current
2621 ; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
2622 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2623 ret <2 x double> %res
2626 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2627 ; CHECK-LABEL: test_maskz_add_sd_rn
2628 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2629 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
2630 ret <2 x double> %res
2633 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
2634 ; CHECK-LABEL: test_add_sd_rn
2635 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
2636 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
2637 ret <2 x double> %res
2640 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2642 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2643 ; CHECK-LABEL: test_mask_max_ss_sae
2644 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2645 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
2646 ret <4 x float> %res
2649 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2650 ; CHECK-LABEL: test_maskz_max_ss_sae
2651 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2652 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
2653 ret <4 x float> %res
2656 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
2657 ; CHECK-LABEL: test_max_ss_sae
2658 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0
2659 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
2660 ret <4 x float> %res
2663 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2664 ; CHECK-LABEL: test_mask_max_ss
2665 ; CHECK: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
2666 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2667 ret <4 x float> %res
2670 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2671 ; CHECK-LABEL: test_maskz_max_ss
2672 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
2673 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
2674 ret <4 x float> %res
2677 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
2678 ; CHECK-LABEL: test_max_ss
2679 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0
2680 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
2681 ret <4 x float> %res
2683 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2685 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2686 ; CHECK-LABEL: test_mask_max_sd_sae
2687 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2688 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
2689 ret <2 x double> %res
2692 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2693 ; CHECK-LABEL: test_maskz_max_sd_sae
2694 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2695 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
2696 ret <2 x double> %res
2699 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
2700 ; CHECK-LABEL: test_max_sd_sae
2701 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
2702 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
2703 ret <2 x double> %res
2706 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2707 ; CHECK-LABEL: test_mask_max_sd
2708 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
2709 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2710 ret <2 x double> %res
2713 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2714 ; CHECK-LABEL: test_maskz_max_sd
2715 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
2716 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
2717 ret <2 x double> %res
2720 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
2721 ; CHECK-LABEL: test_max_sd
2722 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0
2723 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
2724 ret <2 x double> %res
2727 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
2728 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
2730 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
2732 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
2733 ret <2 x double> %res
2735 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
2737 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
2738 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
2740 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
2742 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
2743 ret <2 x double> %res
2745 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
2747 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
2748 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
2750 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
2752 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
2753 ret <4 x float> %res
2755 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
2757 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
2758 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
2760 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
2762 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
2763 ret <4 x float> %res
2765 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
2767 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
2768 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
2770 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
2773 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2774 ret <4 x float> %res
2777 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
2778 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
2780 ; CHECK-NEXT: movl (%rdi), %eax
2781 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
2784 %b = load i32, i32* %ptr
2785 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2786 ret <4 x float> %res
2789 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
2790 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
2792 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
2795 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2796 ret <4 x float> %res
2799 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
2800 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
2802 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
2805 %b = load i32, i32* %ptr
2806 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2807 ret <4 x float> %res
2809 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
2811 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
2812 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
2814 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
2817 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
2818 ret <4 x float> %res
2821 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
2822 ; CHECK-LABEL: _mm_cvtu64_ss:
2824 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
2827 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
2828 ret <4 x float> %res
2830 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
2832 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
2833 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
2835 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
2838 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
2839 ret <2 x double> %res
2841 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
2843 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
2844 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
2846 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
2849 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
2850 ret <2 x double> %res
2853 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
2854 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
2856 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
2859 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
2860 ret <2 x double> %res
2862 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
2864 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
2865 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
2866 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
2867 <8 x i64>zeroinitializer, i8 -1)
2870 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2872 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
2873 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
2874 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
2875 <16 x i32>zeroinitializer, i16 -1)
2878 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2880 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
2881 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
2882 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
2883 <16 x i32>zeroinitializer, i16 -1)
2886 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2888 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512
2890 ; CHECK: vpmaxsd %zmm
2892 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2893 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2894 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2895 %res2 = add <16 x i32> %res, %res1
2896 ret <16 x i32> %res2
2899 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512
2901 ; CHECK: vpmaxsq %zmm
2903 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2904 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2905 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2906 %res2 = add <8 x i64> %res, %res1
2910 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2912 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512
2914 ; CHECK: vpmaxud %zmm
2916 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2917 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2918 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2919 %res2 = add <16 x i32> %res, %res1
2920 ret <16 x i32> %res2
2923 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2925 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512
2927 ; CHECK: vpmaxuq %zmm
2929 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2930 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2931 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2932 %res2 = add <8 x i64> %res, %res1
2936 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2938 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512
2940 ; CHECK: vpminsd %zmm
2942 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2943 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2944 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2945 %res2 = add <16 x i32> %res, %res1
2946 ret <16 x i32> %res2
2949 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2951 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512
2953 ; CHECK: vpminsq %zmm
2955 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2956 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2957 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2958 %res2 = add <8 x i64> %res, %res1
2962 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512
2964 ; CHECK: vpminud %zmm
2966 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2967 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2968 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2969 %res2 = add <16 x i32> %res, %res1
2970 ret <16 x i32> %res2
2973 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2975 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512
2977 ; CHECK: vpminuq %zmm
2979 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2980 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2981 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2982 %res2 = add <8 x i64> %res, %res1
2986 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2988 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
2991 ; CHECK: vpermi2d {{.*}}{%k1}
2992 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2993 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2994 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2995 %res2 = add <16 x i32> %res, %res1
2996 ret <16 x i32> %res2
2999 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
3001 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
3004 ; CHECK: vpermi2pd {{.*}}{%k1}
3005 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
3006 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
3007 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
3008 %res2 = fadd <8 x double> %res, %res1
3009 ret <8 x double> %res2
3012 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
3014 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
3017 ; CHECK: vpermi2ps {{.*}}{%k1}
3018 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
3019 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
3020 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
3021 %res2 = fadd <16 x float> %res, %res1
3022 ret <16 x float> %res2
3025 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3027 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
3030 ; CHECK: vpermi2q {{.*}}{%k1}
3031 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3032 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3033 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3034 %res2 = add <8 x i64> %res, %res1
3038 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3040 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
3043 ; CHECK: vpermt2d {{.*}}{%k1} {z}
3044 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3045 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3046 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3047 %res2 = add <16 x i32> %res, %res1
3048 ret <16 x i32> %res2
3051 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
3053 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
3056 ; CHECK: vpermt2pd {{.*}}{%k1} {z}
3057 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3058 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3059 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3060 %res2 = fadd <8 x double> %res, %res1
3061 ret <8 x double> %res2
3064 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
3066 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
3069 ; CHECK: vpermt2ps {{.*}}{%k1} {z}
3070 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3071 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3072 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3073 %res2 = fadd <16 x float> %res, %res1
3074 ret <16 x float> %res2
3078 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3080 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
3083 ; CHECK: vpermt2q {{.*}}{%k1} {z}
3084 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3085 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3086 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3087 %res2 = add <8 x i64> %res, %res1
3091 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3093 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
3096 ; CHECK: vpermt2d {{.*}}{%k1}
3098 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3099 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3100 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3101 %res2 = add <16 x i32> %res, %res1
3102 ret <16 x i32> %res2
3105 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
3106 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512
3109 ; CHECK: vscalefpd{{.*}}{%k1}
3110 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3111 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
3112 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
3113 %res2 = fadd <8 x double> %res, %res1
3114 ret <8 x double> %res2
3117 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3118 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512
3121 ; CHECK: vscalefps{{.*}}{%k1}
3122 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3123 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
3124 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
3125 %res2 = fadd <16 x float> %res, %res1
3126 ret <16 x float> %res2
3129 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
3131 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3132 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
3134 ; CHECK-NEXT: movzbl %dil, %eax
3135 ; CHECK-NEXT: kmovw %eax, %k1
3136 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1}
3137 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm0
3138 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3139 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3140 %res2 = fadd <8 x double> %res, %res1
3141 ret <8 x double> %res2
3144 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
3146 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3147 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
3149 ; CHECK-NEXT: kmovw %edi, %k1
3150 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1}
3151 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm0
3152 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3153 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3154 %res2 = fadd <16 x float> %res, %res1
3155 ret <16 x float> %res2
3158 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
3160 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3161 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
3163 ; CHECK-NEXT: movzbl %dil, %eax
3164 ; CHECK-NEXT: kmovw %eax, %k1
3165 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1}
3166 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm0
3167 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3168 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3169 %res2 = fadd <8 x double> %res, %res1
3170 ret <8 x double> %res2
3173 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
3175 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3176 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
3178 ; CHECK-NEXT: kmovw %edi, %k1
3179 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1}
3180 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm0
3181 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3182 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3183 %res2 = fadd <16 x float> %res, %res1
3184 ret <16 x float> %res2
3187 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3189 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3190 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
3192 ; CHECK-NEXT: movzbl %dil, %eax
3193 ; CHECK-NEXT: kmovw %eax, %k1
3194 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1}
3195 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z}
3196 ; CHECK-NEXT: vpunpcklqdq {{.*#+}}
3197 ; CHECK: vpaddq %zmm0, %zmm2, %zmm0
3198 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
3200 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3201 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3202 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
3203 %res3 = add <8 x i64> %res, %res1
3204 %res4 = add <8 x i64> %res2, %res3
3208 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3210 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3211 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
3213 ; CHECK-NEXT: movzbl %dil, %eax
3214 ; CHECK-NEXT: kmovw %eax, %k1
3215 ; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1}
3216 ; CHECK-NEXT: vpunpckhqdq {{.*#+}}
3217 ; CHECK: vpaddq %zmm0, %zmm2, %zmm0
3219 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3220 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3221 %res2 = add <8 x i64> %res, %res1
3225 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3227 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3228 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
3230 ; CHECK-NEXT: kmovw %edi, %k1
3231 ; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1}
3232 ; CHECK-NEXT: vpunpckhdq {{.*#+}}
3233 ; CHECK: vpaddd %zmm0, %zmm2, %zmm0
3235 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3236 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3237 %res2 = add <16 x i32> %res, %res1
3238 ret <16 x i32> %res2
3241 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3243 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3244 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
3246 ; CHECK-NEXT: kmovw %edi, %k1
3247 ; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1}
3248 ; CHECK-NEXT: vpunpckldq {{.*#+}}
3249 ; CHECK: vpaddd %zmm0, %zmm2, %zmm0
3251 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3252 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3253 %res2 = add <16 x i32> %res, %res1
3254 ret <16 x i32> %res2
3257 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
3259 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3260 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
3261 ; CHECK: vpmovqb %zmm0, %xmm1 {%k1}
3262 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
3263 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0
3264 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3265 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3266 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3267 %res3 = add <16 x i8> %res0, %res1
3268 %res4 = add <16 x i8> %res3, %res2
3272 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3274 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3275 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
3276 ; CHECK: vpmovqb %zmm0, (%rdi)
3277 ; CHECK: vpmovqb %zmm0, (%rdi) {%k1}
3278 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3279 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3283 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
3285 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3286 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
3287 ; CHECK: vpmovsqb %zmm0, %xmm1 {%k1}
3288 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
3289 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
3290 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3291 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3292 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3293 %res3 = add <16 x i8> %res0, %res1
3294 %res4 = add <16 x i8> %res3, %res2
3298 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3300 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3301 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
3302 ; CHECK: vpmovsqb %zmm0, (%rdi)
3303 ; CHECK: vpmovsqb %zmm0, (%rdi) {%k1}
3304 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3305 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3309 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
3311 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3312 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
3313 ; CHECK: vpmovusqb %zmm0, %xmm1 {%k1}
3314 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
3315 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
3316 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3317 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3318 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3319 %res3 = add <16 x i8> %res0, %res1
3320 %res4 = add <16 x i8> %res3, %res2
3324 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3326 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3327 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
3328 ; CHECK: vpmovusqb %zmm0, (%rdi)
3329 ; CHECK: vpmovusqb %zmm0, (%rdi) {%k1}
3330 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3331 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3335 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
3337 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3338 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
3339 ; CHECK: vpmovqw %zmm0, %xmm1 {%k1}
3340 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
3341 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0
3342 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3343 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3344 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3345 %res3 = add <8 x i16> %res0, %res1
3346 %res4 = add <8 x i16> %res3, %res2
3350 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3352 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3353 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
3354 ; CHECK: vpmovqw %zmm0, (%rdi)
3355 ; CHECK: vpmovqw %zmm0, (%rdi) {%k1}
3356 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3357 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3361 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
3363 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3364 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
3365 ; CHECK: vpmovsqw %zmm0, %xmm1 {%k1}
3366 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
3367 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
3368 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3369 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3370 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3371 %res3 = add <8 x i16> %res0, %res1
3372 %res4 = add <8 x i16> %res3, %res2
3376 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3378 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3379 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
3380 ; CHECK: vpmovsqw %zmm0, (%rdi)
3381 ; CHECK: vpmovsqw %zmm0, (%rdi) {%k1}
3382 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3383 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3387 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
3389 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3390 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
3391 ; CHECK: vpmovusqw %zmm0, %xmm1 {%k1}
3392 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
3393 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
3394 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3395 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3396 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3397 %res3 = add <8 x i16> %res0, %res1
3398 %res4 = add <8 x i16> %res3, %res2
3402 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3404 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3405 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
3406 ; CHECK: vpmovusqw %zmm0, (%rdi)
3407 ; CHECK: vpmovusqw %zmm0, (%rdi) {%k1}
3408 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3409 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3413 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
3415 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3416 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
3417 ; CHECK: vpmovqd %zmm0, %ymm1 {%k1}
3418 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
3419 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0
3420 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3421 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3422 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3423 %res3 = add <8 x i32> %res0, %res1
3424 %res4 = add <8 x i32> %res3, %res2
3428 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3430 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3431 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
3432 ; CHECK: vpmovqd %zmm0, (%rdi)
3433 ; CHECK: vpmovqd %zmm0, (%rdi) {%k1}
3434 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3435 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3439 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
3441 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3442 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
3443 ; CHECK: vpmovsqd %zmm0, %ymm1 {%k1}
3444 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
3445 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
3446 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3447 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3448 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3449 %res3 = add <8 x i32> %res0, %res1
3450 %res4 = add <8 x i32> %res3, %res2
3454 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3456 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3457 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
3458 ; CHECK: vpmovsqd %zmm0, (%rdi)
3459 ; CHECK: vpmovsqd %zmm0, (%rdi) {%k1}
3460 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3461 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3465 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
3467 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3468 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
3469 ; CHECK: vpmovusqd %zmm0, %ymm1 {%k1}
3470 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
3471 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
3472 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3473 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3474 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3475 %res3 = add <8 x i32> %res0, %res1
3476 %res4 = add <8 x i32> %res3, %res2
3480 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3482 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3483 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
3484 ; CHECK: vpmovusqd %zmm0, (%rdi)
3485 ; CHECK: vpmovusqd %zmm0, (%rdi) {%k1}
3486 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3487 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3491 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
3493 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3494 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
3495 ; CHECK: vpmovdb %zmm0, %xmm1 {%k1}
3496 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
3497 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0
3498 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3499 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3500 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3501 %res3 = add <16 x i8> %res0, %res1
3502 %res4 = add <16 x i8> %res3, %res2
3506 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
3508 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3509 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
3510 ; CHECK: vpmovdb %zmm0, (%rdi)
3511 ; CHECK: vpmovdb %zmm0, (%rdi) {%k1}
3512 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3513 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3517 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
3519 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3520 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
3521 ; CHECK: vpmovsdb %zmm0, %xmm1 {%k1}
3522 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
3523 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
3524 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3525 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3526 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3527 %res3 = add <16 x i8> %res0, %res1
3528 %res4 = add <16 x i8> %res3, %res2
3532 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
3534 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3535 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
3536 ; CHECK: vpmovsdb %zmm0, (%rdi)
3537 ; CHECK: vpmovsdb %zmm0, (%rdi) {%k1}
3538 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3539 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3543 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
3545 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3546 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
3547 ; CHECK: vpmovusdb %zmm0, %xmm1 {%k1}
3548 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
3549 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
3550 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3551 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3552 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3553 %res3 = add <16 x i8> %res0, %res1
3554 %res4 = add <16 x i8> %res3, %res2
3558 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
3560 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3561 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
3562 ; CHECK: vpmovusdb %zmm0, (%rdi)
3563 ; CHECK: vpmovusdb %zmm0, (%rdi) {%k1}
3564 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3565 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3569 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
3571 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3572 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
3573 ; CHECK: vpmovdw %zmm0, %ymm1 {%k1}
3574 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
3575 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0
3576 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3577 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3578 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3579 %res3 = add <16 x i16> %res0, %res1
3580 %res4 = add <16 x i16> %res3, %res2
3581 ret <16 x i16> %res4
3584 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3586 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3587 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
3588 ; CHECK: vpmovdw %zmm0, (%rdi)
3589 ; CHECK: vpmovdw %zmm0, (%rdi) {%k1}
3590 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3591 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3595 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
3597 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3598 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
3599 ; CHECK: vpmovsdw %zmm0, %ymm1 {%k1}
3600 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
3601 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
3602 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3603 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3604 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3605 %res3 = add <16 x i16> %res0, %res1
3606 %res4 = add <16 x i16> %res3, %res2
3607 ret <16 x i16> %res4
3610 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3612 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3613 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
3614 ; CHECK: vpmovsdw %zmm0, (%rdi)
3615 ; CHECK: vpmovsdw %zmm0, (%rdi) {%k1}
3616 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3617 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3621 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
3623 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3624 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
3625 ; CHECK: vpmovusdw %zmm0, %ymm1 {%k1}
3626 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
3627 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
3628 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3629 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3630 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3631 %res3 = add <16 x i16> %res0, %res1
3632 %res4 = add <16 x i16> %res3, %res2
3633 ret <16 x i16> %res4
3636 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3638 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3639 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
3640 ; CHECK: vpmovusdw %zmm0, (%rdi)
3641 ; CHECK: vpmovusdw %zmm0, (%rdi) {%k1}
3642 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3643 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3647 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
3649 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3650 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
3652 ; CHECK-NEXT: movzbl %dil, %eax
3653 ; CHECK-NEXT: kmovw %eax, %k1
3654 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
3655 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
3656 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3658 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3659 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3660 %res2 = fadd <8 x double> %res, %res1
3661 ret <8 x double> %res2
3664 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3666 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3667 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
3669 ; CHECK-NEXT: kmovw %edi, %k1
3670 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1}
3671 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
3672 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3674 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3675 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3676 %res2 = fadd <16 x float> %res, %res1
3677 ret <16 x float> %res2
3680 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3682 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3683 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
3685 ; CHECK-NEXT: movzbl %dil, %eax
3686 ; CHECK-NEXT: kmovw %eax, %k1
3687 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1}
3688 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0
3689 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3691 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3692 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3693 %res2 = add <8 x i32> %res, %res1
3697 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
3699 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
3700 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
3702 ; CHECK-NEXT: movzbl %dil, %eax
3703 ; CHECK-NEXT: kmovw %eax, %k1
3704 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1}
3705 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0
3706 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
3708 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
3709 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
3710 %res2 = fadd <8 x float> %res, %res1
3711 ret <8 x float> %res2
3714 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3716 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3717 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
3719 ; CHECK-NEXT: movzbl %dil, %eax
3720 ; CHECK-NEXT: kmovw %eax, %k1
3721 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
3722 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
3723 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3725 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
3726 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3727 %res2 = add <8 x i32> %res, %res1
3731 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3733 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3734 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
3736 ; CHECK-NEXT: kmovw %edi, %k1
3737 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
3738 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
3739 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3741 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3742 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3743 %res2 = add <16 x i32> %res, %res1
3744 ret <16 x i32> %res2
3747 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
3749 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
3750 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
3752 ; CHECK-NEXT: movzbl %dil, %eax
3753 ; CHECK-NEXT: kmovw %eax, %k1
3754 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1}
3755 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0
3756 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3758 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
3759 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
3760 %res2 = fadd <8 x double> %res, %res1
3761 ret <8 x double> %res2
3764 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3766 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3767 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
3769 ; CHECK-NEXT: kmovw %edi, %k1
3770 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
3771 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
3772 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3774 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3775 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3776 %res2 = add <16 x i32> %res, %res1
3777 ret <16 x i32> %res2
3780 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3782 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3783 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
3785 ; CHECK-NEXT: movzbl %dil, %eax
3786 ; CHECK-NEXT: kmovw %eax, %k1
3787 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
3788 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
3789 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3791 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3792 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3793 %res2 = add <8 x i32> %res, %res1
3797 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
3799 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3800 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
3802 ; CHECK-NEXT: movzbl %dil, %eax
3803 ; CHECK-NEXT: kmovw %eax, %k1
3804 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
3805 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
3806 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3808 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3809 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3810 %res2 = fadd <8 x double> %res, %res1
3811 ret <8 x double> %res2
3815 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3817 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3818 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
3820 ; CHECK-NEXT: kmovw %edi, %k1
3821 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1}
3822 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
3823 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3825 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3826 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3827 %res2 = fadd <16 x float> %res, %res1
3828 ret <16 x float> %res2
3831 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3833 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3834 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
3836 ; CHECK-NEXT: movzbl %dil, %eax
3837 ; CHECK-NEXT: kmovw %eax, %k1
3838 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
3839 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
3840 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3842 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3843 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3844 %res2 = add <8 x i32> %res, %res1
3848 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3850 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3851 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
3853 ; CHECK-NEXT: kmovw %edi, %k1
3854 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
3855 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
3856 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3858 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
3859 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
3860 %res2 = add <16 x i32> %res, %res1
3861 ret <16 x i32> %res2
3864 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3866 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3867 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
3869 ; CHECK-NEXT: kmovw %edi, %k1
3870 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
3871 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
3872 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3874 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
3875 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
3876 %res2 = add <16 x i32> %res, %res1
3877 ret <16 x i32> %res2
3881 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
3882 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ss
3885 ; CHECK: vscalefss {{.*}}{%k1}
3886 ; CHECK: vscalefss {rn-sae}
3887 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
3888 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
3889 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
3890 %res2 = fadd <4 x float> %res, %res1
3891 ret <4 x float> %res2
3894 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
3895 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_sd
3898 ; CHECK: vscalefsd {{.*}}{%k1}
3899 ; CHECK: vscalefsd {rn-sae}
3900 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
3901 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
3902 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
3903 %res2 = fadd <2 x double> %res, %res1
3904 ret <2 x double> %res2
3907 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
3909 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3910 ; CHECK-LABEL: test_getexp_ss:
3912 ; CHECK-NEXT: andl $1, %edi
3913 ; CHECK-NEXT: kmovw %edi, %k1
3914 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
3915 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
3916 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
3917 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
3918 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
3919 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
3920 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
3921 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
3923 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
3924 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
3925 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
3926 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
3928 %res.1 = fadd <4 x float> %res0, %res1
3929 %res.2 = fadd <4 x float> %res2, %res3
3930 %res = fadd <4 x float> %res.1, %res.2
3931 ret <4 x float> %res
3934 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
3936 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3937 ; CHECK-LABEL: test_getexp_sd:
3939 ; CHECK-NEXT: andl $1, %edi
3940 ; CHECK-NEXT: kmovw %edi, %k1
3941 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
3942 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
3943 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
3944 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
3945 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3946 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
3947 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
3948 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
3950 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
3951 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
3952 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
3953 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
3955 %res.1 = fadd <2 x double> %res0, %res1
3956 %res.2 = fadd <2 x double> %res2, %res3
3957 %res = fadd <2 x double> %res.1, %res.2
3958 ret <2 x double> %res
3961 declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
3963 define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
3964 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512:
3966 ; CHECK-NEXT: movzbl %dil, %eax
3967 ; CHECK-NEXT: kmovw %eax, %k1
3968 ; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1}
3969 ; CHECK-NEXT: vgetmantpd $11,{sae}, %zmm0, %zmm0
3970 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3972 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
3973 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
3974 %res2 = fadd <8 x double> %res, %res1
3975 ret <8 x double> %res2
3978 declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
3980 define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
3981 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512:
3983 ; CHECK-NEXT: kmovw %edi, %k1
3984 ; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1}
3985 ; CHECK-NEXT: vgetmantps $11,{sae}, %zmm0, %zmm0
3986 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3988 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
3989 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
3990 %res2 = fadd <16 x float> %res, %res1
3991 ret <16 x float> %res2
3994 declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
3996 define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
3997 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
3999 ; CHECK-NEXT: andl $1, %edi
4000 ; CHECK-NEXT: kmovw %edi, %k1
4001 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
4002 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
4003 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
4004 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
4005 ; CHECK-NEXT: vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1}
4006 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
4007 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
4008 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
4010 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
4011 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
4012 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8)
4013 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4)
4014 %res11 = fadd <2 x double> %res, %res1
4015 %res12 = fadd <2 x double> %res2, %res3
4016 %res13 = fadd <2 x double> %res11, %res12
4017 ret <2 x double> %res13
4020 declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
4022 define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
4023 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
4025 ; CHECK-NEXT: andl $1, %edi
4026 ; CHECK-NEXT: kmovw %edi, %k1
4027 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
4028 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
4029 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4
4030 ; CHECK-NEXT: vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0
4031 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
4032 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
4033 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4035 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
4036 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
4037 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8)
4038 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4)
4039 %res11 = fadd <4 x float> %res, %res1
4040 %res12 = fadd <4 x float> %res2, %res3
4041 %res13 = fadd <4 x float> %res11, %res12
4042 ret <4 x float> %res13
4045 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
4047 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
4048 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
4050 ; CHECK-NEXT: movzbl %dil, %eax
4051 ; CHECK-NEXT: kmovw %eax, %k1
4052 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1}
4053 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
4054 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0
4055 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4056 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
4058 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
4059 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
4060 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
4062 %res3 = fadd <8 x double> %res, %res1
4063 %res4 = fadd <8 x double> %res3, %res2
4064 ret <8 x double> %res4
4067 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
4069 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
4070 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
4072 ; CHECK-NEXT: kmovw %edi, %k1
4073 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1}
4074 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm0
4075 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4077 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
4078 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
4079 %res2 = fadd <16 x float> %res, %res1
4080 ret <16 x float> %res2