1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
102 %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1]
103 ret <8 x double> %res
105 declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
107 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1]
110 ret <16 x float> %res
112 declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
114 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
115 ; CHECK: vsqrtss {{.*}}encoding: [0x62
116 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
119 declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
121 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
122 ; CHECK: vsqrtsd {{.*}}encoding: [0x62
123 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
124 ret <2 x double> %res
126 declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
128 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
129 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
130 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
133 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
135 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
136 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
137 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
138 ret <2 x double> %res
140 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
142 define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
143 ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62
144 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
145 ret <2 x double> %res
147 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
149 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
150 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
151 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
154 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
157 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
158 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
159 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
162 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
165 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
166 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
167 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
170 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
173 define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
174 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
175 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
178 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
180 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
181 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
182 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
185 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
187 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
188 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
189 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
190 ret <16 x float> %res
192 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
195 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
196 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
197 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
201 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
203 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
204 ; CHECK: vbroadcastss
205 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
206 ret <16 x float> %res
208 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
210 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
211 ; CHECK: vbroadcastsd
212 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
213 ret <8 x double> %res
215 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
217 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
218 ; CHECK: vbroadcastss
219 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
220 ret <16 x float> %res
222 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
224 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
225 ; CHECK: vbroadcastsd
226 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
227 ret <8 x double> %res
229 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
231 define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
232 ; CHECK: vpbroadcastd
233 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
236 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
238 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
239 ; CHECK: vpbroadcastd
240 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
243 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
245 define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
246 ; CHECK: vpbroadcastq
247 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
250 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
252 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
253 ; CHECK: vpbroadcastq
254 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
257 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
259 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
260 ; CHECK: movw $-1, %ax
263 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
267 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
269 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
270 ; CHECK: movb $-1, %al
273 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
277 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
279 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
281 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
285 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
287 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
291 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
292 ; CHECK: movw $-1, %ax
295 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
299 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
301 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
302 ; CHECK: movb $-1, %al
305 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
309 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
312 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
314 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
318 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
320 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
324 define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
325 ; CHECK-LABEL: test_ctlz_d
327 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
331 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
333 define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
334 ; CHECK-LABEL: test_ctlz_q
336 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
340 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
342 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
344 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
345 ret <16 x float> %res
348 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
350 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
352 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
353 ret <8 x double> %res
356 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
357 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
358 ; CHECK: vblendmpd (%
359 %b = load <8 x double>* %ptr
360 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
361 ret <8 x double> %res
363 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
365 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
367 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
370 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
372 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
374 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
377 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
379 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) {
380 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0]
381 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2)
384 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
386 define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
387 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
388 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
391 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
393 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
394 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
395 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
398 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
400 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
401 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
402 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
405 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
408 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
409 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0]
410 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
413 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
415 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) {
416 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0]
417 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
420 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
422 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) {
423 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
424 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
427 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
429 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) {
430 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
431 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
434 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
437 define <16 x float> @test_vmaxps(<16 x float> %a0, <16 x float> %a1) {
439 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1,
440 <16 x float>zeroinitializer, i16 -1, i32 4)
441 ret <16 x float> %res
443 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>,
444 <16 x float>, i16, i32)
446 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
448 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
449 <8 x double>zeroinitializer, i8 -1, i32 4)
450 ret <8 x double> %res
452 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
453 <8 x double>, i8, i32)
455 define <16 x float> @test_vminps(<16 x float> %a0, <16 x float> %a1) {
457 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1,
458 <16 x float>zeroinitializer, i16 -1, i32 4)
459 ret <16 x float> %res
461 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>,
462 <16 x float>, i16, i32)
464 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
466 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
467 <8 x double>zeroinitializer, i8 -1, i32 4)
468 ret <8 x double> %res
470 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
471 <8 x double>, i8, i32)
473 define <8 x float> @test_cvtpd2ps(<8 x double> %a) {
474 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0]
475 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1)
478 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
480 define <16 x i32> @test_pabsd(<16 x i32> %a) {
481 ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
482 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1)
485 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
487 define <8 x i64> @test_pabsq(<8 x i64> %a) {
488 ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
489 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1)
492 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
494 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
495 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
496 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
497 <8 x i64>zeroinitializer, i8 -1)
500 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
502 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
503 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
504 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
505 <16 x i32>zeroinitializer, i16 -1)
508 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
510 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
511 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
512 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
513 <16 x i32>zeroinitializer, i16 -1)
516 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
518 define <8 x i64> @test_vpmuludq(<16 x i32> %a0, <16 x i32> %a1) {
519 ; CHECK: vpmuludq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
520 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a0, <16 x i32> %a1,
521 <8 x i64>zeroinitializer, i8 -1)
524 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
526 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
527 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
528 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
531 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
533 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
534 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
535 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
538 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
540 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
541 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
542 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
546 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
548 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
549 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
550 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
554 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 )
556 define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
557 ; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
558 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1)
559 ret <16 x float> %res
562 define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) {
563 ; CHECK-LABEL: test_vpermt2ps_mask:
564 ; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1]
565 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask)
566 ret <16 x float> %res
569 declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
571 define <8 x i64> @test_vmovntdqa(i8 *%x) {
572 ; CHECK-LABEL: test_vmovntdqa:
573 ; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
574 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x)
578 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
580 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
581 ; CHECK-LABEL: test_valign_q:
582 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
583 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
587 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
588 ; CHECK-LABEL: test_mask_valign_q:
589 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
590 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
594 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
596 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
597 ; CHECK-LABEL: test_maskz_valign_d:
598 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
599 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
603 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
605 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
606 ; CHECK-LABEL: test_mask_store_ss
607 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
608 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
612 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
614 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
615 ; CHECK-LABEL: test_pcmpeq_d
616 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
617 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
621 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
622 ; CHECK-LABEL: test_mask_pcmpeq_d
623 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
624 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
628 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
630 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
631 ; CHECK-LABEL: test_pcmpeq_q
632 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
633 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
637 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
638 ; CHECK-LABEL: test_mask_pcmpeq_q
639 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
640 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
644 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
646 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
647 ; CHECK-LABEL: test_pcmpgt_d
648 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
649 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
653 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
654 ; CHECK-LABEL: test_mask_pcmpgt_d
655 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
656 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
660 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
662 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
663 ; CHECK-LABEL: test_pcmpgt_q
664 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
665 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
669 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
670 ; CHECK-LABEL: test_mask_pcmpgt_q
671 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
672 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
676 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
678 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
679 ; CHECK_LABEL: test_cmp_d_512
680 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
681 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
682 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
683 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
684 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
685 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
686 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
687 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
688 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
689 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
690 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
691 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
692 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
693 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
694 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
695 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
696 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
697 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
698 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
699 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
700 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
701 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
702 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
703 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
707 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
708 ; CHECK_LABEL: test_mask_cmp_d_512
709 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
710 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
711 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
712 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
713 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
714 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
715 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
716 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
717 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
718 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
719 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
720 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
721 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
722 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
723 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
724 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
725 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
726 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
727 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
728 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
729 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
730 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
731 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
732 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
736 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
738 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
739 ; CHECK_LABEL: test_ucmp_d_512
740 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
741 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
742 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
743 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
744 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
745 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
746 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
747 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
748 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
749 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
750 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
751 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
752 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
753 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
754 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
755 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
756 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
757 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
758 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
759 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
760 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
761 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
762 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
763 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
767 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
768 ; CHECK_LABEL: test_mask_ucmp_d_512
769 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
770 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
771 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
772 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
773 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
774 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
775 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
776 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
777 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
778 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
779 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
780 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
781 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
782 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
783 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
784 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
785 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
786 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
787 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
788 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
789 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
790 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
791 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
792 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
796 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
798 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
799 ; CHECK_LABEL: test_cmp_q_512
800 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
801 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
802 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
803 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
804 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
805 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
806 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
807 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
808 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
809 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
810 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
811 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
812 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
813 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
814 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
815 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
816 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
817 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
818 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
819 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
820 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
821 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
822 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
823 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
827 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
828 ; CHECK_LABEL: test_mask_cmp_q_512
829 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
830 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
831 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
832 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
833 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
834 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
835 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
836 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
837 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
838 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
839 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
840 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
841 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
842 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
843 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
844 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
845 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
846 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
847 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
848 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
849 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
850 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
851 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
852 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
856 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
858 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
859 ; CHECK_LABEL: test_ucmp_q_512
860 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
861 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
862 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
863 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
864 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
865 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
866 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
867 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
868 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
869 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
870 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
871 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
872 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
873 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
874 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
875 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
876 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
877 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
878 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
879 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
880 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
881 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
882 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
883 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
887 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
888 ; CHECK_LABEL: test_mask_ucmp_q_512
889 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
890 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
891 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
892 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
893 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
894 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
895 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
896 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
897 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
898 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
899 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
900 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
901 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
902 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
903 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
904 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
905 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
906 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
907 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
908 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
909 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
910 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
911 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
912 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
916 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
918 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
919 ; CHECK-LABEL: test_mask_vextractf32x4:
920 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
921 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
925 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
927 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
928 ; CHECK-LABEL: test_mask_vextracti64x4:
929 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
930 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
934 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
936 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
937 ; CHECK-LABEL: test_maskz_vextracti32x4:
938 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
939 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
943 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
945 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
946 ; CHECK-LABEL: test_vextractf64x4:
947 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
948 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
949 ret <4 x double> %res
952 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
954 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
955 ; CHECK-LABEL: test_x86_avx512_pslli_d
957 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
961 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
962 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
963 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
964 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
968 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
969 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
970 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
971 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
975 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
977 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
978 ; CHECK-LABEL: test_x86_avx512_pslli_q
980 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
984 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
985 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
986 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
987 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
991 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
992 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
993 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
994 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
998 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1000 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1001 ; CHECK-LABEL: test_x86_avx512_psrli_d
1003 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1007 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1008 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1009 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1010 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1014 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1015 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1016 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1017 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1021 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1023 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1024 ; CHECK-LABEL: test_x86_avx512_psrli_q
1026 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1030 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1031 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1032 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1033 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1037 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1038 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1039 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1040 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1044 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1046 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1047 ; CHECK-LABEL: test_x86_avx512_psrai_d
1049 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1053 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1054 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1055 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1056 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1060 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1061 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1062 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1063 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1067 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1069 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1070 ; CHECK-LABEL: test_x86_avx512_psrai_q
1072 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1076 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1077 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1078 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1079 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1083 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1084 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1085 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1086 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1090 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone