1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
102 %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1]
103 ret <8 x double> %res
105 declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
107 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1]
110 ret <16 x float> %res
112 declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
114 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
115 ; CHECK: vsqrtss {{.*}}encoding: [0x62
116 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
119 declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
121 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
122 ; CHECK: vsqrtsd {{.*}}encoding: [0x62
123 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
124 ret <2 x double> %res
126 declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
128 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
129 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
130 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
133 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
135 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
136 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
137 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
138 ret <2 x double> %res
140 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
142 define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
143 ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62
144 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
145 ret <2 x double> %res
147 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
149 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
150 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
151 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
154 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
157 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
158 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
159 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
162 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
165 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
166 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
167 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
170 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
173 define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
174 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
175 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
178 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
180 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
181 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
182 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
185 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
187 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
188 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
189 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
190 ret <16 x float> %res
192 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
195 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
196 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
197 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
201 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
203 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
204 ; CHECK: vbroadcastss
205 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
206 ret <16 x float> %res
208 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
210 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
211 ; CHECK: vbroadcastsd
212 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
213 ret <8 x double> %res
215 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
217 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
218 ; CHECK: vbroadcastss
219 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
220 ret <16 x float> %res
222 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
224 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
225 ; CHECK: vbroadcastsd
226 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
227 ret <8 x double> %res
229 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
231 define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
232 ; CHECK: vpbroadcastd
233 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
236 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
238 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
239 ; CHECK: vpbroadcastd
240 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
243 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
245 define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
246 ; CHECK: vpbroadcastq
247 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
250 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
252 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
253 ; CHECK: vpbroadcastq
254 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
257 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
259 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
260 ; CHECK: movw $-1, %ax
263 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
267 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
269 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
270 ; CHECK: movb $-1, %al
273 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
277 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
279 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
281 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
285 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
287 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
291 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
292 ; CHECK: movw $-1, %ax
295 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
299 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
301 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
302 ; CHECK: movb $-1, %al
305 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
309 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
312 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
314 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
318 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
320 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
324 define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
325 ; CHECK-LABEL: test_ctlz_d
327 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
331 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
333 define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
334 ; CHECK-LABEL: test_ctlz_q
336 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
340 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
342 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
343 ; CHECK: vblendmps %zmm1, %zmm0
344 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
345 ret <16 x float> %res
348 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
350 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
351 ; CHECK: vblendmpd %zmm1, %zmm0
352 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
353 ret <8 x double> %res
356 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
357 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
358 ; CHECK: vblendmpd (%
359 %b = load <8 x double>, <8 x double>* %ptr
360 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
361 ret <8 x double> %res
363 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
365 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
367 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
370 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
372 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
374 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
377 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
379 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) {
380 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0]
381 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2)
384 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
386 define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
387 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
388 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
391 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
393 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
394 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
395 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
398 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
400 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
401 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
402 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
405 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
408 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
409 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0]
410 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
413 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
415 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) {
416 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0]
417 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
420 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
422 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) {
423 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
424 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
427 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
429 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) {
430 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
431 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
434 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
437 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
439 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
440 <8 x double>zeroinitializer, i8 -1, i32 4)
441 ret <8 x double> %res
443 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
444 <8 x double>, i8, i32)
446 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
448 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
449 <8 x double>zeroinitializer, i8 -1, i32 4)
450 ret <8 x double> %res
452 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
453 <8 x double>, i8, i32)
455 define <8 x float> @test_cvtpd2ps(<8 x double> %a) {
456 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0]
457 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1)
460 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
462 define <16 x i32> @test_pabsd(<16 x i32> %a) {
463 ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
464 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1)
467 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
469 define <8 x i64> @test_pabsq(<8 x i64> %a) {
470 ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
471 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1)
474 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
476 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
477 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
478 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
479 <8 x i64>zeroinitializer, i8 -1)
482 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
484 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
485 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
486 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
487 <16 x i32>zeroinitializer, i16 -1)
490 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
492 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
493 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
494 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
495 <16 x i32>zeroinitializer, i16 -1)
498 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
500 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
501 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
502 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
505 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
507 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
508 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
509 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
512 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
514 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
515 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
516 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
520 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
522 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
523 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
524 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
528 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
530 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
531 ; CHECK-LABEL: test_mask_store_aligned_ps:
533 ; CHECK-NEXT: kmovw %esi, %k1
534 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
536 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
540 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
542 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
543 ; CHECK-LABEL: test_mask_store_aligned_pd:
545 ; CHECK-NEXT: kmovw %esi, %k1
546 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
548 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
552 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
554 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
555 ; CHECK-LABEL: test_maskz_load_aligned_ps:
557 ; CHECK-NEXT: kmovw %esi, %k1
558 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
560 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
561 ret <16 x float> %res
564 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
566 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
567 ; CHECK-LABEL: test_maskz_load_aligned_pd:
569 ; CHECK-NEXT: kmovw %esi, %k1
570 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
572 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
573 ret <8 x double> %res
576 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
578 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
579 ; CHECK-LABEL: test_load_aligned_ps:
581 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
583 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
584 ret <16 x float> %res
587 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
588 ; CHECK-LABEL: test_load_aligned_pd:
590 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
592 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
593 ret <8 x double> %res
596 define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
597 ; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
598 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1)
599 ret <16 x float> %res
602 define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) {
603 ; CHECK-LABEL: test_vpermt2ps_mask:
604 ; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1]
605 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask)
606 ret <16 x float> %res
609 declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
611 define <8 x i64> @test_vmovntdqa(i8 *%x) {
612 ; CHECK-LABEL: test_vmovntdqa:
613 ; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
614 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x)
618 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
620 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
621 ; CHECK-LABEL: test_valign_q:
622 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
623 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
627 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
628 ; CHECK-LABEL: test_mask_valign_q:
629 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
630 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
634 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
636 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
637 ; CHECK-LABEL: test_maskz_valign_d:
638 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
639 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
643 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
645 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
646 ; CHECK-LABEL: test_mask_store_ss
647 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
648 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
652 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
654 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
655 ; CHECK-LABEL: test_pcmpeq_d
656 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
657 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
661 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
662 ; CHECK-LABEL: test_mask_pcmpeq_d
663 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
664 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
668 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
670 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
671 ; CHECK-LABEL: test_pcmpeq_q
672 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
673 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
677 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
678 ; CHECK-LABEL: test_mask_pcmpeq_q
679 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
680 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
684 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
686 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
687 ; CHECK-LABEL: test_pcmpgt_d
688 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
689 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
693 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
694 ; CHECK-LABEL: test_mask_pcmpgt_d
695 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
696 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
700 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
702 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
703 ; CHECK-LABEL: test_pcmpgt_q
704 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
705 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
709 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
710 ; CHECK-LABEL: test_mask_pcmpgt_q
711 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
712 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
716 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
718 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
719 ; CHECK_LABEL: test_cmp_d_512
720 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
721 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
722 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
723 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
724 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
725 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
726 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
727 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
728 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
729 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
730 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
731 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
732 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
733 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
734 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
735 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
736 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
737 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
738 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
739 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
740 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
741 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
742 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
743 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
747 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
748 ; CHECK_LABEL: test_mask_cmp_d_512
749 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
750 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
751 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
752 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
753 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
754 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
755 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
756 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
757 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
758 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
759 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
760 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
761 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
762 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
763 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
764 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
765 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
766 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
767 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
768 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
769 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
770 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
771 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
772 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
776 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
778 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
779 ; CHECK_LABEL: test_ucmp_d_512
780 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
781 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
782 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
783 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
784 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
785 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
786 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
787 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
788 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
789 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
790 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
791 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
792 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
793 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
794 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
795 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
796 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
797 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
798 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
799 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
800 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
801 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
802 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
803 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
807 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
808 ; CHECK_LABEL: test_mask_ucmp_d_512
809 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
810 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
811 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
812 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
813 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
814 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
815 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
816 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
817 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
818 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
819 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
820 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
821 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
822 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
823 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
824 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
825 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
826 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
827 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
828 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
829 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
830 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
831 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
832 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
836 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
838 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
839 ; CHECK_LABEL: test_cmp_q_512
840 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
841 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
842 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
843 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
844 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
845 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
846 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
847 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
848 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
849 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
850 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
851 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
852 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
853 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
854 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
855 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
856 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
857 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
858 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
859 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
860 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
861 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
862 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
863 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
867 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
868 ; CHECK_LABEL: test_mask_cmp_q_512
869 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
870 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
871 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
872 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
873 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
874 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
875 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
876 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
877 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
878 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
879 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
880 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
881 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
882 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
883 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
884 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
885 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
886 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
887 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
888 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
889 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
890 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
891 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
892 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
896 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
898 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
899 ; CHECK_LABEL: test_ucmp_q_512
900 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
901 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
902 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
903 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
904 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
905 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
906 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
907 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
908 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
909 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
910 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
911 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
912 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
913 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
914 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
915 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
916 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
917 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
918 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
919 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
920 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
921 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
922 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
923 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
927 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
928 ; CHECK_LABEL: test_mask_ucmp_q_512
929 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
930 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
931 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
932 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
933 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
934 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
935 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
936 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
937 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
938 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
939 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
940 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
941 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
942 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
943 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
944 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
945 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
946 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
947 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
948 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
949 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
950 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
951 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
952 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
956 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
958 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
959 ; CHECK-LABEL: test_mask_vextractf32x4:
960 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
961 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
965 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
967 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
968 ; CHECK-LABEL: test_mask_vextracti64x4:
969 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
970 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
974 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
976 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
977 ; CHECK-LABEL: test_maskz_vextracti32x4:
978 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
979 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
983 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
985 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
986 ; CHECK-LABEL: test_vextractf64x4:
987 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
988 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
989 ret <4 x double> %res
992 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
994 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
995 ; CHECK-LABEL: test_x86_avx512_pslli_d
997 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1001 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1002 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
1003 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
1004 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1008 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1009 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
1010 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1011 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1015 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1017 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1018 ; CHECK-LABEL: test_x86_avx512_pslli_q
1020 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1024 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1025 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
1026 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
1027 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1031 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1032 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
1033 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1034 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1038 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1040 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1041 ; CHECK-LABEL: test_x86_avx512_psrli_d
1043 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1047 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1048 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1049 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1050 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1054 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1055 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1056 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1057 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1061 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1063 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1064 ; CHECK-LABEL: test_x86_avx512_psrli_q
1066 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1070 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1071 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1072 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1073 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1077 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1078 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1079 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1080 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1084 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1086 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1087 ; CHECK-LABEL: test_x86_avx512_psrai_d
1089 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1093 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1094 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1095 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1096 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1100 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1101 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1102 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1103 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1107 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1109 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1110 ; CHECK-LABEL: test_x86_avx512_psrai_q
1112 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1116 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1117 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1118 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1119 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1123 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1124 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1125 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1126 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1130 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1132 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1133 ; CHECK-LABEL: test_x86_avx512_psll_d
1135 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1139 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1140 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1141 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1142 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1146 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1147 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1148 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1149 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1153 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1155 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1156 ; CHECK-LABEL: test_x86_avx512_psll_q
1158 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1162 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1163 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1164 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1165 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1169 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1170 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1171 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1172 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1176 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1178 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1179 ; CHECK-LABEL: test_x86_avx512_psrl_d
1181 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1185 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1186 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1187 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1188 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1192 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1193 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1194 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1195 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1199 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1201 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1202 ; CHECK-LABEL: test_x86_avx512_psrl_q
1204 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1208 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1209 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1210 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1211 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1215 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1216 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1217 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1218 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1222 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1224 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1225 ; CHECK-LABEL: test_x86_avx512_psra_d
1227 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1231 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1232 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1233 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1234 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1238 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1239 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1240 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1241 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1245 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1247 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1248 ; CHECK-LABEL: test_x86_avx512_psra_q
1250 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1254 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1255 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1256 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1257 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1261 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1262 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1263 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1264 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1268 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1270 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1271 ; CHECK-LABEL: test_x86_avx512_psllv_d
1273 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1277 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1278 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1279 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1280 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1284 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1285 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1286 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1287 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1291 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1293 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1294 ; CHECK-LABEL: test_x86_avx512_psllv_q
1296 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1300 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1301 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1302 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1303 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1307 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1308 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1309 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1310 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1314 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1317 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1318 ; CHECK-LABEL: test_x86_avx512_psrav_d
1320 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1324 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1325 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1326 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1327 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1331 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1332 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1333 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1334 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1338 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1340 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1341 ; CHECK-LABEL: test_x86_avx512_psrav_q
1343 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1347 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1348 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1349 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1350 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1354 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1355 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1356 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1357 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1361 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1363 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1364 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1366 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1370 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1371 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1372 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1373 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1377 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1378 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1379 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1380 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1384 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1386 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1387 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1389 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1393 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1394 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1395 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1396 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1400 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1401 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1402 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1403 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1407 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1409 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1410 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1412 %b = load <8 x i64>, <8 x i64>* %ptr
1413 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1417 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1418 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1419 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1421 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1422 ; CHECK-LABEL: test_vsubps_rn
1423 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
1424 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1425 <16 x float> zeroinitializer, i16 -1, i32 0)
1426 ret <16 x float> %res
1429 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1430 ; CHECK-LABEL: test_vsubps_rd
1431 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
1432 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1433 <16 x float> zeroinitializer, i16 -1, i32 1)
1434 ret <16 x float> %res
1437 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1438 ; CHECK-LABEL: test_vsubps_ru
1439 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
1440 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1441 <16 x float> zeroinitializer, i16 -1, i32 2)
1442 ret <16 x float> %res
1445 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1446 ; CHECK-LABEL: test_vsubps_rz
1447 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
1448 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1449 <16 x float> zeroinitializer, i16 -1, i32 3)
1450 ret <16 x float> %res
1453 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1454 ; CHECK-LABEL: test_vmulps_rn
1455 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
1456 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1457 <16 x float> zeroinitializer, i16 -1, i32 0)
1458 ret <16 x float> %res
1461 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1462 ; CHECK-LABEL: test_vmulps_rd
1463 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
1464 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1465 <16 x float> zeroinitializer, i16 -1, i32 1)
1466 ret <16 x float> %res
1469 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1470 ; CHECK-LABEL: test_vmulps_ru
1471 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
1472 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1473 <16 x float> zeroinitializer, i16 -1, i32 2)
1474 ret <16 x float> %res
1477 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1478 ; CHECK-LABEL: test_vmulps_rz
1479 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
1480 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1481 <16 x float> zeroinitializer, i16 -1, i32 3)
1482 ret <16 x float> %res
1486 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1487 ; CHECK-LABEL: test_vmulps_mask_rn
1488 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
1489 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1490 <16 x float> zeroinitializer, i16 %mask, i32 0)
1491 ret <16 x float> %res
1494 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1495 ; CHECK-LABEL: test_vmulps_mask_rd
1496 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
1497 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1498 <16 x float> zeroinitializer, i16 %mask, i32 1)
1499 ret <16 x float> %res
1502 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1503 ; CHECK-LABEL: test_vmulps_mask_ru
1504 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
1505 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1506 <16 x float> zeroinitializer, i16 %mask, i32 2)
1507 ret <16 x float> %res
1510 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1511 ; CHECK-LABEL: test_vmulps_mask_rz
1512 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
1513 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1514 <16 x float> zeroinitializer, i16 %mask, i32 3)
1515 ret <16 x float> %res
1518 ;; With Passthru value
1519 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1520 ; CHECK-LABEL: test_vmulps_mask_passthru_rn
1521 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
1522 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1523 <16 x float> %passthru, i16 %mask, i32 0)
1524 ret <16 x float> %res
1527 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1528 ; CHECK-LABEL: test_vmulps_mask_passthru_rd
1529 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
1530 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1531 <16 x float> %passthru, i16 %mask, i32 1)
1532 ret <16 x float> %res
1535 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1536 ; CHECK-LABEL: test_vmulps_mask_passthru_ru
1537 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
1538 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1539 <16 x float> %passthru, i16 %mask, i32 2)
1540 ret <16 x float> %res
1543 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1544 ; CHECK-LABEL: test_vmulps_mask_passthru_rz
1545 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
1546 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1547 <16 x float> %passthru, i16 %mask, i32 3)
1548 ret <16 x float> %res
1552 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1553 ; CHECK-LABEL: test_vmulpd_mask_rn
1554 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
1555 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1556 <8 x double> zeroinitializer, i8 %mask, i32 0)
1557 ret <8 x double> %res
1560 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1561 ; CHECK-LABEL: test_vmulpd_mask_rd
1562 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
1563 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1564 <8 x double> zeroinitializer, i8 %mask, i32 1)
1565 ret <8 x double> %res
1568 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1569 ; CHECK-LABEL: test_vmulpd_mask_ru
1570 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
1571 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1572 <8 x double> zeroinitializer, i8 %mask, i32 2)
1573 ret <8 x double> %res
1576 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1577 ; CHECK-LABEL: test_vmulpd_mask_rz
1578 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
1579 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1580 <8 x double> zeroinitializer, i8 %mask, i32 3)
1581 ret <8 x double> %res
1584 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1585 ;CHECK-LABEL: test_xor_epi32
1586 ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1587 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1588 ret < 16 x i32> %res
1591 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1592 ;CHECK-LABEL: test_mask_xor_epi32
1593 ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1594 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1595 ret < 16 x i32> %res
1598 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1600 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1601 ;CHECK-LABEL: test_or_epi32
1602 ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1603 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1604 ret < 16 x i32> %res
1607 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1608 ;CHECK-LABEL: test_mask_or_epi32
1609 ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1610 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1611 ret < 16 x i32> %res
1614 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1616 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1617 ;CHECK-LABEL: test_and_epi32
1618 ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1619 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1620 ret < 16 x i32> %res
1623 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1624 ;CHECK-LABEL: test_mask_and_epi32
1625 ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1626 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1627 ret < 16 x i32> %res
1630 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1632 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1633 ;CHECK-LABEL: test_xor_epi64
1634 ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1635 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1639 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1640 ;CHECK-LABEL: test_mask_xor_epi64
1641 ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1642 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1646 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1648 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1649 ;CHECK-LABEL: test_or_epi64
1650 ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1651 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1655 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1656 ;CHECK-LABEL: test_mask_or_epi64
1657 ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1658 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1662 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1664 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
1665 ;CHECK-LABEL: test_and_epi64
1666 ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
1667 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1671 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1672 ;CHECK-LABEL: test_mask_and_epi64
1673 ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1674 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1678 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1681 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1682 ;CHECK-LABEL: test_mask_add_epi32_rr
1683 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
1684 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1685 ret < 16 x i32> %res
1688 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1689 ;CHECK-LABEL: test_mask_add_epi32_rrk
1690 ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1691 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1692 ret < 16 x i32> %res
1695 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1696 ;CHECK-LABEL: test_mask_add_epi32_rrkz
1697 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1698 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1699 ret < 16 x i32> %res
1702 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1703 ;CHECK-LABEL: test_mask_add_epi32_rm
1704 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
1705 %b = load <16 x i32>, <16 x i32>* %ptr_b
1706 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1707 ret < 16 x i32> %res
1710 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1711 ;CHECK-LABEL: test_mask_add_epi32_rmk
1712 ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
1713 %b = load <16 x i32>, <16 x i32>* %ptr_b
1714 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1715 ret < 16 x i32> %res
1718 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1719 ;CHECK-LABEL: test_mask_add_epi32_rmkz
1720 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
1721 %b = load <16 x i32>, <16 x i32>* %ptr_b
1722 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1723 ret < 16 x i32> %res
1726 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1727 ;CHECK-LABEL: test_mask_add_epi32_rmb
1728 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
1729 %q = load i32, i32* %ptr_b
1730 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1731 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1732 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1733 ret < 16 x i32> %res
1736 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1737 ;CHECK-LABEL: test_mask_add_epi32_rmbk
1738 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
1739 %q = load i32, i32* %ptr_b
1740 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1741 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1742 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1743 ret < 16 x i32> %res
1746 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1747 ;CHECK-LABEL: test_mask_add_epi32_rmbkz
1748 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
1749 %q = load i32, i32* %ptr_b
1750 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1751 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1752 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1753 ret < 16 x i32> %res
1756 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1758 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1759 ;CHECK-LABEL: test_mask_sub_epi32_rr
1760 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
1761 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1762 ret < 16 x i32> %res
1765 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1766 ;CHECK-LABEL: test_mask_sub_epi32_rrk
1767 ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
1768 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1769 ret < 16 x i32> %res
1772 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1773 ;CHECK-LABEL: test_mask_sub_epi32_rrkz
1774 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
1775 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1776 ret < 16 x i32> %res
1779 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1780 ;CHECK-LABEL: test_mask_sub_epi32_rm
1781 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
1782 %b = load <16 x i32>, <16 x i32>* %ptr_b
1783 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1784 ret < 16 x i32> %res
1787 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1788 ;CHECK-LABEL: test_mask_sub_epi32_rmk
1789 ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
1790 %b = load <16 x i32>, <16 x i32>* %ptr_b
1791 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1792 ret < 16 x i32> %res
1795 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1796 ;CHECK-LABEL: test_mask_sub_epi32_rmkz
1797 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
1798 %b = load <16 x i32>, <16 x i32>* %ptr_b
1799 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1800 ret < 16 x i32> %res
1803 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1804 ;CHECK-LABEL: test_mask_sub_epi32_rmb
1805 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
1806 %q = load i32, i32* %ptr_b
1807 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1808 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1809 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1810 ret < 16 x i32> %res
1813 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1814 ;CHECK-LABEL: test_mask_sub_epi32_rmbk
1815 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
1816 %q = load i32, i32* %ptr_b
1817 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1818 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1819 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1820 ret < 16 x i32> %res
1823 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1824 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
1825 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
1826 %q = load i32, i32* %ptr_b
1827 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1828 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1829 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1830 ret < 16 x i32> %res
1833 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1835 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1836 ;CHECK-LABEL: test_mask_add_epi64_rr
1837 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
1838 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1842 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1843 ;CHECK-LABEL: test_mask_add_epi64_rrk
1844 ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
1845 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1849 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1850 ;CHECK-LABEL: test_mask_add_epi64_rrkz
1851 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
1852 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1856 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1857 ;CHECK-LABEL: test_mask_add_epi64_rm
1858 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
1859 %b = load <8 x i64>, <8 x i64>* %ptr_b
1860 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1864 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1865 ;CHECK-LABEL: test_mask_add_epi64_rmk
1866 ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
1867 %b = load <8 x i64>, <8 x i64>* %ptr_b
1868 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1872 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1873 ;CHECK-LABEL: test_mask_add_epi64_rmkz
1874 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
1875 %b = load <8 x i64>, <8 x i64>* %ptr_b
1876 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1880 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1881 ;CHECK-LABEL: test_mask_add_epi64_rmb
1882 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
1883 %q = load i64, i64* %ptr_b
1884 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1885 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1886 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1890 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1891 ;CHECK-LABEL: test_mask_add_epi64_rmbk
1892 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
1893 %q = load i64, i64* %ptr_b
1894 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1895 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1896 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1900 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1901 ;CHECK-LABEL: test_mask_add_epi64_rmbkz
1902 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
1903 %q = load i64, i64* %ptr_b
1904 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1905 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1906 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1910 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1912 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1913 ;CHECK-LABEL: test_mask_sub_epi64_rr
1914 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
1915 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1919 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1920 ;CHECK-LABEL: test_mask_sub_epi64_rrk
1921 ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
1922 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1926 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1927 ;CHECK-LABEL: test_mask_sub_epi64_rrkz
1928 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
1929 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1933 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1934 ;CHECK-LABEL: test_mask_sub_epi64_rm
1935 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
1936 %b = load <8 x i64>, <8 x i64>* %ptr_b
1937 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1941 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1942 ;CHECK-LABEL: test_mask_sub_epi64_rmk
1943 ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
1944 %b = load <8 x i64>, <8 x i64>* %ptr_b
1945 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1949 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1950 ;CHECK-LABEL: test_mask_sub_epi64_rmkz
1951 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
1952 %b = load <8 x i64>, <8 x i64>* %ptr_b
1953 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1957 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1958 ;CHECK-LABEL: test_mask_sub_epi64_rmb
1959 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
1960 %q = load i64, i64* %ptr_b
1961 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1962 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1963 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1967 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1968 ;CHECK-LABEL: test_mask_sub_epi64_rmbk
1969 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
1970 %q = load i64, i64* %ptr_b
1971 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1972 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1973 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1977 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1978 ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
1979 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
1980 %q = load i64, i64* %ptr_b
1981 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1982 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1983 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1987 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1989 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1990 ;CHECK-LABEL: test_mask_mul_epi32_rr
1991 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
1992 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
1996 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
1997 ;CHECK-LABEL: test_mask_mul_epi32_rrk
1998 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
1999 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2003 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2004 ;CHECK-LABEL: test_mask_mul_epi32_rrkz
2005 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
2006 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2010 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2011 ;CHECK-LABEL: test_mask_mul_epi32_rm
2012 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
2013 %b = load <16 x i32>, <16 x i32>* %ptr_b
2014 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2018 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2019 ;CHECK-LABEL: test_mask_mul_epi32_rmk
2020 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
2021 %b = load <16 x i32>, <16 x i32>* %ptr_b
2022 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2026 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2027 ;CHECK-LABEL: test_mask_mul_epi32_rmkz
2028 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
2029 %b = load <16 x i32>, <16 x i32>* %ptr_b
2030 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2034 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
2035 ;CHECK-LABEL: test_mask_mul_epi32_rmb
2036 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
2037 %q = load i64, i64* %ptr_b
2038 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2039 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2040 %b = bitcast <8 x i64> %b64 to <16 x i32>
2041 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2045 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2046 ;CHECK-LABEL: test_mask_mul_epi32_rmbk
2047 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
2048 %q = load i64, i64* %ptr_b
2049 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2050 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2051 %b = bitcast <8 x i64> %b64 to <16 x i32>
2052 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2056 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2057 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
2058 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
2059 %q = load i64, i64* %ptr_b
2060 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2061 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2062 %b = bitcast <8 x i64> %b64 to <16 x i32>
2063 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2067 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2069 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
2070 ;CHECK-LABEL: test_mask_mul_epu32_rr
2071 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
2072 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2076 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2077 ;CHECK-LABEL: test_mask_mul_epu32_rrk
2078 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
2079 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2083 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2084 ;CHECK-LABEL: test_mask_mul_epu32_rrkz
2085 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
2086 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2090 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2091 ;CHECK-LABEL: test_mask_mul_epu32_rm
2092 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
2093 %b = load <16 x i32>, <16 x i32>* %ptr_b
2094 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2098 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2099 ;CHECK-LABEL: test_mask_mul_epu32_rmk
2100 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
2101 %b = load <16 x i32>, <16 x i32>* %ptr_b
2102 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2106 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2107 ;CHECK-LABEL: test_mask_mul_epu32_rmkz
2108 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
2109 %b = load <16 x i32>, <16 x i32>* %ptr_b
2110 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2114 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
2115 ;CHECK-LABEL: test_mask_mul_epu32_rmb
2116 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
2117 %q = load i64, i64* %ptr_b
2118 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2119 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2120 %b = bitcast <8 x i64> %b64 to <16 x i32>
2121 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2125 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2126 ;CHECK-LABEL: test_mask_mul_epu32_rmbk
2127 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
2128 %q = load i64, i64* %ptr_b
2129 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2130 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2131 %b = bitcast <8 x i64> %b64 to <16 x i32>
2132 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2136 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2137 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
2138 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
2139 %q = load i64, i64* %ptr_b
2140 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2141 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2142 %b = bitcast <8 x i64> %b64 to <16 x i32>
2143 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2147 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2149 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2150 ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
2151 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2152 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2156 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2157 ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
2158 ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2159 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2160 ret < 16 x i32> %res
2163 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2164 ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
2165 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2166 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2167 ret < 16 x i32> %res
2170 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2171 ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
2172 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2173 %b = load <16 x i32>, <16 x i32>* %ptr_b
2174 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2175 ret < 16 x i32> %res
2178 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2179 ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
2180 ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2181 %b = load <16 x i32>, <16 x i32>* %ptr_b
2182 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2183 ret < 16 x i32> %res
2186 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2187 ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
2188 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2189 %b = load <16 x i32>, <16 x i32>* %ptr_b
2190 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2191 ret < 16 x i32> %res
2194 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2195 ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
2196 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2197 %q = load i32, i32* %ptr_b
2198 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2199 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2200 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2201 ret < 16 x i32> %res
2204 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2205 ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
2206 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2207 %q = load i32, i32* %ptr_b
2208 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2209 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2210 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2211 ret < 16 x i32> %res
2214 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2215 ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
2216 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2217 %q = load i32, i32* %ptr_b
2218 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2219 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2220 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2221 ret < 16 x i32> %res
2224 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2226 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2227 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
2228 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2229 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2230 ret <16 x float> %res
2232 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2233 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
2234 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2235 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2236 ret <16 x float> %res
2238 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2239 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
2240 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2241 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2242 ret <16 x float> %res
2245 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2246 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
2247 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2248 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2249 ret <16 x float> %res
2253 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2254 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
2255 ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
2256 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2257 ret <16 x float> %res
2260 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2261 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
2262 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2263 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2264 ret <16 x float> %res
2266 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2267 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
2268 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2269 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2270 ret <16 x float> %res
2272 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2273 ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
2274 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2275 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2276 ret <16 x float> %res
2279 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2280 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
2281 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2282 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2283 ret <16 x float> %res
2287 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2288 ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
2289 ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
2290 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2291 ret <16 x float> %res
2295 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2296 ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
2297 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
2298 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2299 ret <16 x float> %res
2301 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2302 ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
2303 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2304 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2305 ret <16 x float> %res
2307 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2308 ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
2309 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
2310 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2311 ret <16 x float> %res
2314 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2315 ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
2316 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
2317 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2318 ret <16 x float> %res
2321 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2322 ;CHECK-LABEL: test_mm512_add_round_ps_current
2323 ;CHECK: vaddps %zmm1, %zmm0, %zmm0
2324 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2325 ret <16 x float> %res
2327 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2329 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2330 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
2331 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2332 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2333 ret <16 x float> %res
2335 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2336 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
2337 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2338 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2339 ret <16 x float> %res
2341 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2342 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
2343 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2344 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2345 ret <16 x float> %res
2348 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2349 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
2350 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2351 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2352 ret <16 x float> %res
2356 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2357 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
2358 ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
2359 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2360 ret <16 x float> %res
2363 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2364 ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
2365 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2366 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2367 ret <16 x float> %res
2369 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2370 ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
2371 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2372 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2373 ret <16 x float> %res
2375 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2376 ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
2377 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2378 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2379 ret <16 x float> %res
2382 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2383 ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
2384 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2385 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2386 ret <16 x float> %res
2389 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2390 ;CHECK-LABEL: test_mm512_sub_round_ps_current
2391 ;CHECK: vsubps %zmm1, %zmm0, %zmm0
2392 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2393 ret <16 x float> %res
2396 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2397 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
2398 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2399 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2400 ret <16 x float> %res
2402 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2403 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
2404 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2405 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2406 ret <16 x float> %res
2408 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2409 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
2410 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2411 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2412 ret <16 x float> %res
2415 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2416 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
2417 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2418 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2419 ret <16 x float> %res
2423 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2424 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
2425 ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
2426 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2427 ret <16 x float> %res
2430 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2431 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
2432 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2433 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2434 ret <16 x float> %res
2436 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2437 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
2438 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2439 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2440 ret <16 x float> %res
2442 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2443 ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
2444 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2445 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2446 ret <16 x float> %res
2449 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2450 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
2451 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2452 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2453 ret <16 x float> %res
2457 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2458 ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
2459 ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
2460 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2461 ret <16 x float> %res
2465 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2466 ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
2467 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
2468 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2469 ret <16 x float> %res
2471 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2472 ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
2473 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2474 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2475 ret <16 x float> %res
2477 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2478 ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
2479 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
2480 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2481 ret <16 x float> %res
2484 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2485 ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
2486 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
2487 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2488 ret <16 x float> %res
2491 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2492 ;CHECK-LABEL: test_mm512_div_round_ps_current
2493 ;CHECK: vdivps %zmm1, %zmm0, %zmm0
2494 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2495 ret <16 x float> %res
2497 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2499 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2500 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
2501 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2502 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2503 ret <16 x float> %res
2506 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2507 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
2508 ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
2509 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2510 ret <16 x float> %res
2513 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2514 ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
2515 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2516 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2517 ret <16 x float> %res
2520 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2521 ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
2522 ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
2523 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2524 ret <16 x float> %res
2527 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2528 ;CHECK-LABEL: test_mm512_min_round_ps_sae
2529 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
2530 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2531 ret <16 x float> %res
2534 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2535 ;CHECK-LABEL: test_mm512_min_round_ps_current
2536 ;CHECK: vminps %zmm1, %zmm0, %zmm0
2537 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2538 ret <16 x float> %res
2540 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2542 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2543 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
2544 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2545 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2546 ret <16 x float> %res
2549 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2550 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
2551 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
2552 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2553 ret <16 x float> %res
2556 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2557 ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
2558 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2559 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2560 ret <16 x float> %res
2563 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2564 ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
2565 ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
2566 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2567 ret <16 x float> %res
2570 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2571 ;CHECK-LABEL: test_mm512_max_round_ps_sae
2572 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
2573 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2574 ret <16 x float> %res
2577 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2578 ;CHECK-LABEL: test_mm512_max_round_ps_current
2579 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
2580 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2581 ret <16 x float> %res
2583 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)