1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
4 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
5 define i32 @test_kortestz(i16 %a0, i16 %a1) {
6 ; CHECK-LABEL: test_kortestz:
8 ; CHECK-NEXT: kmovw %esi, %k0
9 ; CHECK-NEXT: kmovw %edi, %k1
10 ; CHECK-NEXT: kortestw %k0, %k1
11 ; CHECK-NEXT: sete %al
12 ; CHECK-NEXT: kmovw %eax, %k0
13 ; CHECK-NEXT: kmovw %k0, %eax
14 ; CHECK-NEXT: andl $1, %eax
16 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
20 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
21 define i32 @test_kortestc(i16 %a0, i16 %a1) {
22 ; CHECK-LABEL: test_kortestc:
24 ; CHECK-NEXT: kmovw %esi, %k0
25 ; CHECK-NEXT: kmovw %edi, %k1
26 ; CHECK-NEXT: kortestw %k0, %k1
27 ; CHECK-NEXT: sbbl %eax, %eax
28 ; CHECK-NEXT: andl $1, %eax
30 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
34 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
35 define i16 @test_kand(i16 %a0, i16 %a1) {
36 ; CHECK-LABEL: test_kand:
38 ; CHECK-NEXT: movw $8, %ax
39 ; CHECK-NEXT: kmovw %eax, %k0
40 ; CHECK-NEXT: kmovw %edi, %k1
41 ; CHECK-NEXT: kandw %k0, %k1, %k0
42 ; CHECK-NEXT: kmovw %esi, %k1
43 ; CHECK-NEXT: kandw %k1, %k0, %k0
44 ; CHECK-NEXT: kmovw %k0, %eax
46 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
47 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
51 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
52 define i16 @test_knot(i16 %a0) {
53 ; CHECK-LABEL: test_knot:
55 ; CHECK-NEXT: kmovw %edi, %k0
56 ; CHECK-NEXT: knotw %k0, %k0
57 ; CHECK-NEXT: kmovw %k0, %eax
59 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
63 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
65 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
66 ; CHECK-LABEL: unpckbw_test:
68 ; CHECK-NEXT: kmovw %esi, %k0
69 ; CHECK-NEXT: kmovw %edi, %k1
70 ; CHECK-NEXT: kunpckbw %k0, %k1, %k0
71 ; CHECK-NEXT: kmovw %k0, %eax
73 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
77 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
78 ; CHECK-LABEL: test_rcp_ps_512:
80 ; CHECK-NEXT: vrcp14ps %zmm0, %zmm0
82 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
85 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
87 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
88 ; CHECK-LABEL: test_rcp_pd_512:
90 ; CHECK-NEXT: vrcp14pd %zmm0, %zmm0
92 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
95 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
97 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
99 define <8 x double> @test7(<8 x double> %a) {
100 ; CHECK-LABEL: test7:
102 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
104 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
108 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
110 define <16 x float> @test8(<16 x float> %a) {
111 ; CHECK-LABEL: test8:
113 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
115 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
119 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
120 ; CHECK-LABEL: test_rsqrt_ps_512:
122 ; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0
124 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
125 ret <16 x float> %res
127 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
129 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
130 ; CHECK-LABEL: test_rsqrt14_ss:
132 ; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
134 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
137 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
139 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
140 ; CHECK-LABEL: test_rcp14_ss:
142 ; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
144 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
147 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
149 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
150 ; CHECK-LABEL: test_sqrt_pd_512:
152 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
154 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
155 ret <8 x double> %res
157 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
159 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
160 ; CHECK-LABEL: test_sqrt_ps_512:
162 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0
164 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
165 ret <16 x float> %res
167 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
168 ; CHECK-LABEL: test_sqrt_round_ps_512:
170 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0
172 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
173 ret <16 x float> %res
175 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
177 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
178 ; CHECK-LABEL: test_getexp_pd_512:
180 ; CHECK-NEXT: vgetexppd %zmm0, %zmm0
182 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
183 ret <8 x double> %res
185 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
186 ; CHECK-LABEL: test_getexp_round_pd_512:
188 ; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0
190 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
191 ret <8 x double> %res
193 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
195 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
196 ; CHECK-LABEL: test_getexp_ps_512:
198 ; CHECK-NEXT: vgetexpps %zmm0, %zmm0
200 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
201 ret <16 x float> %res
204 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
205 ; CHECK-LABEL: test_getexp_round_ps_512:
207 ; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0
209 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
210 ret <16 x float> %res
212 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
214 declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
216 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
217 ; CHECK-LABEL: test_sqrt_ss:
219 ; CHECK-NEXT: andl $1, %edi
220 ; CHECK-NEXT: kmovw %edi, %k1
221 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
222 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
223 ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
224 ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
225 ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
226 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
227 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
228 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
230 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
231 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
232 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
233 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
235 %res.1 = fadd <4 x float> %res0, %res1
236 %res.2 = fadd <4 x float> %res2, %res3
237 %res = fadd <4 x float> %res.1, %res.2
241 declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
243 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
244 ; CHECK-LABEL: test_sqrt_sd:
246 ; CHECK-NEXT: andl $1, %edi
247 ; CHECK-NEXT: kmovw %edi, %k1
248 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
249 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
250 ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
251 ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
252 ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
253 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
254 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
255 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
257 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
258 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
259 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
260 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
262 %res.1 = fadd <2 x double> %res0, %res1
263 %res.2 = fadd <2 x double> %res2, %res3
264 %res = fadd <2 x double> %res.1, %res.2
265 ret <2 x double> %res
268 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
269 ; CHECK-LABEL: test_x86_sse2_cvtsd2si64:
271 ; CHECK-NEXT: vcvtsd2si %xmm0, %rax
273 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
276 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
278 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
279 ; CHECK-LABEL: test_x86_sse2_cvtsi642sd:
281 ; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
283 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
284 ret <2 x double> %res
286 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
288 define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
289 ; CHECK-LABEL: test_x86_avx512_cvttsd2si64:
291 ; CHECK-NEXT: vcvttsd2si %xmm0, %rcx
292 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax
293 ; CHECK-NEXT: addq %rcx, %rax
295 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
296 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
297 %res2 = add i64 %res0, %res1
300 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
302 define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
303 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi:
305 ; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx
306 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax
307 ; CHECK-NEXT: addl %ecx, %eax
309 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
310 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
311 %res2 = add i32 %res0, %res1
314 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
316 define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
317 ; CHECK-LABEL: test_x86_avx512_cvttsd2si:
319 ; CHECK-NEXT: vcvttsd2si %xmm0, %ecx
320 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax
321 ; CHECK-NEXT: addl %ecx, %eax
323 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
324 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
325 %res2 = add i32 %res0, %res1
328 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
332 define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
333 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi64:
335 ; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx
336 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax
337 ; CHECK-NEXT: addq %rcx, %rax
339 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
340 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
341 %res2 = add i64 %res0, %res1
344 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
346 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
347 ; CHECK-LABEL: test_x86_sse_cvtss2si64:
349 ; CHECK-NEXT: vcvtss2si %xmm0, %rax
351 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
354 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
357 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
358 ; CHECK-LABEL: test_x86_sse_cvtsi642ss:
360 ; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
362 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
365 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
368 define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
369 ; CHECK-LABEL: test_x86_avx512_cvttss2si:
371 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx
372 ; CHECK-NEXT: vcvttss2si %xmm0, %eax
373 ; CHECK-NEXT: addl %ecx, %eax
375 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
376 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
377 %res2 = add i32 %res0, %res1
380 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
382 define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
383 ; CHECK-LABEL: test_x86_avx512_cvttss2si64:
385 ; CHECK-NEXT: vcvttss2si %xmm0, %rcx
386 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax
387 ; CHECK-NEXT: addq %rcx, %rax
389 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
390 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
391 %res2 = add i64 %res0, %res1
394 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
396 define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
397 ; CHECK-LABEL: test_x86_avx512_cvttss2usi:
399 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx
400 ; CHECK-NEXT: vcvttss2usi %xmm0, %eax
401 ; CHECK-NEXT: addl %ecx, %eax
403 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
404 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
405 %res2 = add i32 %res0, %res1
408 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
410 define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
411 ; CHECK-LABEL: test_x86_avx512_cvttss2usi64:
413 ; CHECK-NEXT: vcvttss2usi %xmm0, %rcx
414 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax
415 ; CHECK-NEXT: addq %rcx, %rax
417 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
418 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
419 %res2 = add i64 %res0, %res1
422 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
424 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
425 ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
427 ; CHECK-NEXT: vcvtsd2usi %xmm0, %rax
429 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
432 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
434 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
435 ; CHECK-LABEL: test_x86_vcvtph2ps_512:
437 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
439 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
440 ret <16 x float> %res
443 define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
444 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
446 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0
448 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
449 ret <16 x float> %res
452 define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
453 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk:
455 ; CHECK-NEXT: kmovw %edi, %k1
456 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1}
457 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
459 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
460 ret <16 x float> %res
463 define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
464 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
466 ; CHECK-NEXT: kmovw %edi, %k1
467 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
469 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
470 ret <16 x float> %res
473 define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
474 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz:
476 ; CHECK-NEXT: kmovw %edi, %k1
477 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
479 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
480 ret <16 x float> %res
483 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
486 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
487 ; CHECK-LABEL: test_x86_vcvtps2ph_256:
489 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm0
491 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
495 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
497 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
498 ; CHECK-LABEL: test_x86_vbroadcast_ss_512:
500 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0
502 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
503 ret <16 x float> %res
505 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
507 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
508 ; CHECK-LABEL: test_x86_vbroadcast_sd_512:
510 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
512 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
513 ret <8 x double> %res
515 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
517 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
518 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_512:
520 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
522 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
523 ret <16 x float> %res
525 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
527 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
528 ; CHECK-LABEL: test_x86_vbroadcast_sd_pd_512:
530 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
532 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
533 ret <8 x double> %res
535 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
537 define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
538 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
540 ; CHECK-NEXT: kmovw %edi, %k1
541 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1}
542 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 {%k1} {z}
543 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0
544 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
545 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
547 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
548 %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
549 %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
550 %res3 = add <16 x i32> %res, %res1
551 %res4 = add <16 x i32> %res2, %res3
554 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
556 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
557 ; CHECK-LABEL: test_x86_pbroadcastd_i32_512:
559 ; CHECK-NEXT: vpbroadcastd %edi, %zmm0
561 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
564 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
566 define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
567 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
569 ; CHECK-NEXT: movzbl %dil, %eax
570 ; CHECK-NEXT: kmovw %eax, %k1
571 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1}
572 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 {%k1} {z}
573 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0
574 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
575 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
577 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
578 %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
579 %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
580 %res3 = add <8 x i64> %res, %res1
581 %res4 = add <8 x i64> %res2, %res3
584 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
586 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
587 ; CHECK-LABEL: test_x86_pbroadcastq_i64_512:
589 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
591 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
594 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
596 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
597 ; CHECK-LABEL: test_conflict_d:
599 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0
601 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
605 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
607 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
608 ; CHECK-LABEL: test_conflict_q:
610 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
612 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
616 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
618 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
619 ; CHECK-LABEL: test_maskz_conflict_d:
621 ; CHECK-NEXT: kmovw %edi, %k1
622 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
624 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
628 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
629 ; CHECK-LABEL: test_mask_conflict_q:
631 ; CHECK-NEXT: movzbl %dil, %eax
632 ; CHECK-NEXT: kmovw %eax, %k1
633 ; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
634 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
636 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
640 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
641 ; CHECK-LABEL: test_lzcnt_d:
643 ; CHECK-NEXT: vplzcntd %zmm0, %zmm0
645 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
649 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
651 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
652 ; CHECK-LABEL: test_lzcnt_q:
654 ; CHECK-NEXT: vplzcntq %zmm0, %zmm0
656 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
660 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
663 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
664 ; CHECK-LABEL: test_mask_lzcnt_d:
666 ; CHECK-NEXT: kmovw %edi, %k1
667 ; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
668 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
670 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
674 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
675 ; CHECK-LABEL: test_mask_lzcnt_q:
677 ; CHECK-NEXT: movzbl %dil, %eax
678 ; CHECK-NEXT: kmovw %eax, %k1
679 ; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
680 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
682 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
686 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
687 ; CHECK-LABEL: test_x86_mask_blend_ps_512:
689 ; CHECK-NEXT: kmovw %edi, %k1
690 ; CHECK-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
692 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
693 ret <16 x float> %res
696 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
698 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
699 ; CHECK-LABEL: test_x86_mask_blend_pd_512:
701 ; CHECK-NEXT: movzbl %dil, %eax
702 ; CHECK-NEXT: kmovw %eax, %k1
703 ; CHECK-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
705 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
706 ret <8 x double> %res
709 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
710 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop:
712 ; CHECK-NEXT: movzbl %sil, %eax
713 ; CHECK-NEXT: kmovw %eax, %k1
714 ; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1}
716 %b = load <8 x double>, <8 x double>* %ptr
717 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
718 ret <8 x double> %res
720 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
722 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
723 ; CHECK-LABEL: test_x86_mask_blend_d_512:
725 ; CHECK-NEXT: kmovw %edi, %k1
726 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm0 {%k1}
728 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
731 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
733 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
734 ; CHECK-LABEL: test_x86_mask_blend_q_512:
736 ; CHECK-NEXT: movzbl %dil, %eax
737 ; CHECK-NEXT: kmovw %eax, %k1
738 ; CHECK-NEXT: vpblendmq %zmm1, %zmm0, %zmm0 {%k1}
740 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
743 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
745 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
746 ; CHECK-LABEL: test_cmpps:
748 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
749 ; CHECK-NEXT: kmovw %k0, %eax
751 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
754 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
756 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
757 ; CHECK-LABEL: test_cmppd:
759 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0
760 ; CHECK-NEXT: kmovw %k0, %eax
762 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
765 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
768 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
769 ; CHECK-LABEL: test_vmaxpd:
771 ; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
773 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
774 <8 x double>zeroinitializer, i8 -1, i32 4)
775 ret <8 x double> %res
777 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
778 <8 x double>, i8, i32)
780 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
781 ; CHECK-LABEL: test_vminpd:
783 ; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0
785 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
786 <8 x double>zeroinitializer, i8 -1, i32 4)
787 ret <8 x double> %res
789 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
790 <8 x double>, i8, i32)
792 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
794 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
795 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512:
797 ; CHECK-NEXT: kmovw %edi, %k1
798 ; CHECK-NEXT: vpabsd %zmm0, %zmm1 {%k1}
799 ; CHECK-NEXT: vpabsd %zmm0, %zmm0
800 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
802 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
803 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
804 %res2 = add <16 x i32> %res, %res1
808 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
810 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
811 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512:
813 ; CHECK-NEXT: movzbl %dil, %eax
814 ; CHECK-NEXT: kmovw %eax, %k1
815 ; CHECK-NEXT: vpabsq %zmm0, %zmm1 {%k1}
816 ; CHECK-NEXT: vpabsq %zmm0, %zmm0
817 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
819 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
820 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
821 %res2 = add <8 x i64> %res, %res1
825 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
826 ; CHECK-LABEL: test_vptestmq:
828 ; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
829 ; CHECK-NEXT: kmovw %k0, %eax
831 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
834 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
836 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
837 ; CHECK-LABEL: test_vptestmd:
839 ; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
840 ; CHECK-NEXT: kmovw %k0, %eax
842 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
845 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
847 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
848 ; CHECK-LABEL: test_store1:
850 ; CHECK-NEXT: kmovw %esi, %k1
851 ; CHECK-NEXT: vmovups %zmm0, (%rdi) {%k1}
853 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
857 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
859 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
860 ; CHECK-LABEL: test_store2:
862 ; CHECK-NEXT: kmovw %esi, %k1
863 ; CHECK-NEXT: vmovupd %zmm0, (%rdi) {%k1}
865 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
869 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
871 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
872 ; CHECK-LABEL: test_mask_store_aligned_ps:
874 ; CHECK-NEXT: kmovw %esi, %k1
875 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
877 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
881 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
883 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
884 ; CHECK-LABEL: test_mask_store_aligned_pd:
886 ; CHECK-NEXT: kmovw %esi, %k1
887 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
889 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
893 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
895 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
896 ; CHECK-LABEL: test_maskz_load_aligned_ps:
898 ; CHECK-NEXT: kmovw %esi, %k1
899 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
901 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
902 ret <16 x float> %res
905 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
907 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
908 ; CHECK-LABEL: test_maskz_load_aligned_pd:
910 ; CHECK-NEXT: kmovw %esi, %k1
911 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
913 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
914 ret <8 x double> %res
917 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
919 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
920 ; CHECK-LABEL: test_load_aligned_ps:
922 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
924 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
925 ret <16 x float> %res
928 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
929 ; CHECK-LABEL: test_load_aligned_pd:
931 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
933 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
934 ret <8 x double> %res
937 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
939 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
940 ; CHECK-LABEL: test_valign_q:
942 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm0
944 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
948 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
949 ; CHECK-LABEL: test_mask_valign_q:
951 ; CHECK-NEXT: movzbl %dil, %eax
952 ; CHECK-NEXT: kmovw %eax, %k1
953 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
954 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
956 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
960 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
962 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
963 ; CHECK-LABEL: test_maskz_valign_d:
965 ; CHECK-NEXT: kmovw %edi, %k1
966 ; CHECK-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z}
968 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
972 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
974 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
975 ; CHECK-LABEL: test_mask_store_ss:
977 ; CHECK-NEXT: kmovw %esi, %k1
978 ; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1}
980 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
984 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
986 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
987 ; CHECK-LABEL: test_pcmpeq_d:
989 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
990 ; CHECK-NEXT: kmovw %k0, %eax
992 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
996 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
997 ; CHECK-LABEL: test_mask_pcmpeq_d:
999 ; CHECK-NEXT: kmovw %edi, %k1
1000 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1001 ; CHECK-NEXT: kmovw %k0, %eax
1003 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1007 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
1009 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
1010 ; CHECK-LABEL: test_pcmpeq_q:
1012 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
1013 ; CHECK-NEXT: kmovw %k0, %eax
1015 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1019 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1020 ; CHECK-LABEL: test_mask_pcmpeq_q:
1022 ; CHECK-NEXT: movzbl %dil, %eax
1023 ; CHECK-NEXT: kmovw %eax, %k1
1024 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
1025 ; CHECK-NEXT: kmovw %k0, %eax
1027 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1031 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
1033 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
1034 ; CHECK-LABEL: test_pcmpgt_d:
1036 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
1037 ; CHECK-NEXT: kmovw %k0, %eax
1039 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
1043 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1044 ; CHECK-LABEL: test_mask_pcmpgt_d:
1046 ; CHECK-NEXT: kmovw %edi, %k1
1047 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
1048 ; CHECK-NEXT: kmovw %k0, %eax
1050 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1054 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
1056 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
1057 ; CHECK-LABEL: test_pcmpgt_q:
1059 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
1060 ; CHECK-NEXT: kmovw %k0, %eax
1062 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1066 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1067 ; CHECK-LABEL: test_mask_pcmpgt_q:
1069 ; CHECK-NEXT: movzbl %dil, %eax
1070 ; CHECK-NEXT: kmovw %eax, %k1
1071 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
1072 ; CHECK-NEXT: kmovw %k0, %eax
1074 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1078 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
1080 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
1081 ; CHECK-LABEL: test_cmp_d_512:
1083 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1084 ; CHECK-NEXT: kmovw %k0, %r8d
1085 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0
1086 ; CHECK-NEXT: kmovw %k0, %r9d
1087 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0
1088 ; CHECK-NEXT: kmovw %k0, %r10d
1089 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0
1090 ; CHECK-NEXT: kmovw %k0, %esi
1091 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
1092 ; CHECK-NEXT: kmovw %k0, %edi
1093 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
1094 ; CHECK-NEXT: kmovw %k0, %eax
1095 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0
1096 ; CHECK-NEXT: kmovw %k0, %ecx
1097 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0
1098 ; CHECK-NEXT: kmovw %k0, %edx
1099 ; CHECK-NEXT: vmovd %r8d, %xmm0
1100 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1101 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1102 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1103 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1104 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1105 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1106 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1108 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
1109 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1110 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
1111 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1112 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
1113 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1114 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
1115 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1116 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
1117 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1118 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
1119 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1120 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
1121 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1122 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
1123 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1127 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1128 ; CHECK-LABEL: test_mask_cmp_d_512:
1130 ; CHECK-NEXT: kmovw %edi, %k1
1131 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1132 ; CHECK-NEXT: kmovw %k0, %r8d
1133 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0 {%k1}
1134 ; CHECK-NEXT: kmovw %k0, %r9d
1135 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0 {%k1}
1136 ; CHECK-NEXT: kmovw %k0, %r10d
1137 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0 {%k1}
1138 ; CHECK-NEXT: kmovw %k0, %esi
1139 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k1}
1140 ; CHECK-NEXT: kmovw %k0, %edi
1141 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
1142 ; CHECK-NEXT: kmovw %k0, %eax
1143 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0 {%k1}
1144 ; CHECK-NEXT: kmovw %k0, %ecx
1145 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0 {%k1}
1146 ; CHECK-NEXT: kmovw %k0, %edx
1147 ; CHECK-NEXT: vmovd %r8d, %xmm0
1148 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1149 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1150 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1151 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1152 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1153 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1154 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1156 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
1157 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1158 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
1159 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1160 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
1161 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1162 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
1163 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1164 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
1165 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1166 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
1167 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1168 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
1169 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1170 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
1171 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1175 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
1177 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
1178 ; CHECK-LABEL: test_ucmp_d_512:
1180 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0
1181 ; CHECK-NEXT: kmovw %k0, %r8d
1182 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0
1183 ; CHECK-NEXT: kmovw %k0, %r9d
1184 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0
1185 ; CHECK-NEXT: kmovw %k0, %r10d
1186 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0
1187 ; CHECK-NEXT: kmovw %k0, %esi
1188 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0
1189 ; CHECK-NEXT: kmovw %k0, %edi
1190 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0
1191 ; CHECK-NEXT: kmovw %k0, %eax
1192 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
1193 ; CHECK-NEXT: kmovw %k0, %ecx
1194 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0
1195 ; CHECK-NEXT: kmovw %k0, %edx
1196 ; CHECK-NEXT: vmovd %r8d, %xmm0
1197 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1198 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1199 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1200 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1201 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1202 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1203 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1205 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
1206 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1207 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
1208 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1209 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
1210 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1211 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
1212 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1213 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
1214 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1215 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
1216 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1217 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
1218 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1219 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
1220 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1224 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1225 ; CHECK-LABEL: test_mask_ucmp_d_512:
1227 ; CHECK-NEXT: kmovw %edi, %k1
1228 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 {%k1}
1229 ; CHECK-NEXT: kmovw %k0, %r8d
1230 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
1231 ; CHECK-NEXT: kmovw %k0, %r9d
1232 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0 {%k1}
1233 ; CHECK-NEXT: kmovw %k0, %r10d
1234 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0 {%k1}
1235 ; CHECK-NEXT: kmovw %k0, %esi
1236 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 {%k1}
1237 ; CHECK-NEXT: kmovw %k0, %edi
1238 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0 {%k1}
1239 ; CHECK-NEXT: kmovw %k0, %eax
1240 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 {%k1}
1241 ; CHECK-NEXT: kmovw %k0, %ecx
1242 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0 {%k1}
1243 ; CHECK-NEXT: kmovw %k0, %edx
1244 ; CHECK-NEXT: vmovd %r8d, %xmm0
1245 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1246 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1247 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1248 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1249 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1250 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1251 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1253 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
1254 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1255 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
1256 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1257 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
1258 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1259 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
1260 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1261 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
1262 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1263 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
1264 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1265 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
1266 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1267 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
1268 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1272 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
1274 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
1275 ; CHECK-LABEL: test_cmp_q_512:
1277 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
1278 ; CHECK-NEXT: kmovw %k0, %r8d
1279 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0
1280 ; CHECK-NEXT: kmovw %k0, %r9d
1281 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0
1282 ; CHECK-NEXT: kmovw %k0, %r10d
1283 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0
1284 ; CHECK-NEXT: kmovw %k0, %r11d
1285 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0
1286 ; CHECK-NEXT: kmovw %k0, %edi
1287 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
1288 ; CHECK-NEXT: kmovw %k0, %eax
1289 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0
1290 ; CHECK-NEXT: kmovw %k0, %ecx
1291 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0
1292 ; CHECK-NEXT: kmovw %k0, %edx
1293 ; CHECK-NEXT: movzbl %r8b, %esi
1294 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1295 ; CHECK-NEXT: movzbl %r9b, %esi
1296 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1297 ; CHECK-NEXT: movzbl %r10b, %esi
1298 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1299 ; CHECK-NEXT: movzbl %r11b, %esi
1300 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1301 ; CHECK-NEXT: movzbl %dil, %esi
1302 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1303 ; CHECK-NEXT: movzbl %al, %eax
1304 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1305 ; CHECK-NEXT: movzbl %cl, %eax
1306 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1307 ; CHECK-NEXT: movzbl %dl, %eax
1308 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1310 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
1311 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1312 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
1313 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1314 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
1315 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1316 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
1317 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1318 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
1319 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1320 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
1321 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1322 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
1323 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1324 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
1325 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1329 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1330 ; CHECK-LABEL: test_mask_cmp_q_512:
1332 ; CHECK-NEXT: movzbl %dil, %eax
1333 ; CHECK-NEXT: kmovw %eax, %k1
1334 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
1335 ; CHECK-NEXT: kmovw %k0, %r8d
1336 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0 {%k1}
1337 ; CHECK-NEXT: kmovw %k0, %r9d
1338 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1}
1339 ; CHECK-NEXT: kmovw %k0, %r10d
1340 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0 {%k1}
1341 ; CHECK-NEXT: kmovw %k0, %r11d
1342 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1}
1343 ; CHECK-NEXT: kmovw %k0, %edi
1344 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
1345 ; CHECK-NEXT: kmovw %k0, %eax
1346 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0 {%k1}
1347 ; CHECK-NEXT: kmovw %k0, %ecx
1348 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 {%k1}
1349 ; CHECK-NEXT: kmovw %k0, %edx
1350 ; CHECK-NEXT: movzbl %r8b, %esi
1351 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1352 ; CHECK-NEXT: movzbl %r9b, %esi
1353 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1354 ; CHECK-NEXT: movzbl %r10b, %esi
1355 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1356 ; CHECK-NEXT: movzbl %r11b, %esi
1357 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1358 ; CHECK-NEXT: movzbl %dil, %esi
1359 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1360 ; CHECK-NEXT: movzbl %al, %eax
1361 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1362 ; CHECK-NEXT: movzbl %cl, %eax
1363 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1364 ; CHECK-NEXT: movzbl %dl, %eax
1365 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1367 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
1368 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1369 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
1370 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1371 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
1372 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1373 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1374 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1375 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1376 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1377 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1378 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1379 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1380 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1381 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1382 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1386 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1388 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
1389 ; CHECK-LABEL: test_ucmp_q_512:
1391 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0
1392 ; CHECK-NEXT: kmovw %k0, %r8d
1393 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
1394 ; CHECK-NEXT: kmovw %k0, %r9d
1395 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0
1396 ; CHECK-NEXT: kmovw %k0, %r10d
1397 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0
1398 ; CHECK-NEXT: kmovw %k0, %r11d
1399 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0
1400 ; CHECK-NEXT: kmovw %k0, %edi
1401 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0
1402 ; CHECK-NEXT: kmovw %k0, %eax
1403 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0
1404 ; CHECK-NEXT: kmovw %k0, %ecx
1405 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0
1406 ; CHECK-NEXT: kmovw %k0, %edx
1407 ; CHECK-NEXT: movzbl %r8b, %esi
1408 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1409 ; CHECK-NEXT: movzbl %r9b, %esi
1410 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1411 ; CHECK-NEXT: movzbl %r10b, %esi
1412 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1413 ; CHECK-NEXT: movzbl %r11b, %esi
1414 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1415 ; CHECK-NEXT: movzbl %dil, %esi
1416 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1417 ; CHECK-NEXT: movzbl %al, %eax
1418 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1419 ; CHECK-NEXT: movzbl %cl, %eax
1420 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1421 ; CHECK-NEXT: movzbl %dl, %eax
1422 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1424 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
1425 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1426 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
1427 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1428 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
1429 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1430 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
1431 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1432 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
1433 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1434 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
1435 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1436 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
1437 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1438 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
1439 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1443 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1444 ; CHECK-LABEL: test_mask_ucmp_q_512:
1446 ; CHECK-NEXT: movzbl %dil, %eax
1447 ; CHECK-NEXT: kmovw %eax, %k1
1448 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 {%k1}
1449 ; CHECK-NEXT: kmovw %k0, %r8d
1450 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
1451 ; CHECK-NEXT: kmovw %k0, %r9d
1452 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 {%k1}
1453 ; CHECK-NEXT: kmovw %k0, %r10d
1454 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1}
1455 ; CHECK-NEXT: kmovw %k0, %r11d
1456 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 {%k1}
1457 ; CHECK-NEXT: kmovw %k0, %edi
1458 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1}
1459 ; CHECK-NEXT: kmovw %k0, %eax
1460 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1}
1461 ; CHECK-NEXT: kmovw %k0, %ecx
1462 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 {%k1}
1463 ; CHECK-NEXT: kmovw %k0, %edx
1464 ; CHECK-NEXT: movzbl %r8b, %esi
1465 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1466 ; CHECK-NEXT: movzbl %r9b, %esi
1467 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1468 ; CHECK-NEXT: movzbl %r10b, %esi
1469 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1470 ; CHECK-NEXT: movzbl %r11b, %esi
1471 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1472 ; CHECK-NEXT: movzbl %dil, %esi
1473 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1474 ; CHECK-NEXT: movzbl %al, %eax
1475 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1476 ; CHECK-NEXT: movzbl %cl, %eax
1477 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1478 ; CHECK-NEXT: movzbl %dl, %eax
1479 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1481 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
1482 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1483 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
1484 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1485 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
1486 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1487 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1488 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1489 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1490 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1491 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1492 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1493 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1494 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1495 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1496 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1500 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1502 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
1503 ; CHECK-LABEL: test_mask_vextractf32x4:
1505 ; CHECK-NEXT: kmovw %edi, %k1
1506 ; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
1508 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
1509 ret <4 x float> %res
1512 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
1514 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
1515 ; CHECK-LABEL: test_mask_vextracti64x4:
1517 ; CHECK-NEXT: kmovw %edi, %k1
1518 ; CHECK-NEXT: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
1520 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask)
1524 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
1526 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
1527 ; CHECK-LABEL: test_maskz_vextracti32x4:
1529 ; CHECK-NEXT: kmovw %edi, %k1
1530 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
1532 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
1536 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
1538 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
1539 ; CHECK-LABEL: test_vextractf64x4:
1541 ; CHECK-NEXT: vextractf64x4 $2, %zmm0, %ymm0
1543 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1)
1544 ret <4 x double> %res
1547 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
1549 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1550 ; CHECK-LABEL: test_x86_avx512_pslli_d:
1552 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0
1554 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1558 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1559 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d:
1561 ; CHECK-NEXT: kmovw %edi, %k1
1562 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1}
1563 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1565 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1569 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1570 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d:
1572 ; CHECK-NEXT: kmovw %edi, %k1
1573 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1575 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1579 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1581 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1582 ; CHECK-LABEL: test_x86_avx512_pslli_q:
1584 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0
1586 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1590 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1591 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q:
1593 ; CHECK-NEXT: movzbl %dil, %eax
1594 ; CHECK-NEXT: kmovw %eax, %k1
1595 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1}
1596 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1598 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1602 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1603 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q:
1605 ; CHECK-NEXT: movzbl %dil, %eax
1606 ; CHECK-NEXT: kmovw %eax, %k1
1607 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1609 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1613 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1615 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1616 ; CHECK-LABEL: test_x86_avx512_psrli_d:
1618 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0
1620 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1624 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1625 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d:
1627 ; CHECK-NEXT: kmovw %edi, %k1
1628 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1}
1629 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1631 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1635 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1636 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d:
1638 ; CHECK-NEXT: kmovw %edi, %k1
1639 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1641 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1645 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1647 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1648 ; CHECK-LABEL: test_x86_avx512_psrli_q:
1650 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0
1652 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1656 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1657 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q:
1659 ; CHECK-NEXT: movzbl %dil, %eax
1660 ; CHECK-NEXT: kmovw %eax, %k1
1661 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1}
1662 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1664 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1668 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1669 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q:
1671 ; CHECK-NEXT: movzbl %dil, %eax
1672 ; CHECK-NEXT: kmovw %eax, %k1
1673 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1675 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1679 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1681 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1682 ; CHECK-LABEL: test_x86_avx512_psrai_d:
1684 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0
1686 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1690 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1691 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d:
1693 ; CHECK-NEXT: kmovw %edi, %k1
1694 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1}
1695 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1697 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1701 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1702 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d:
1704 ; CHECK-NEXT: kmovw %edi, %k1
1705 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1707 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1711 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1713 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1714 ; CHECK-LABEL: test_x86_avx512_psrai_q:
1716 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0
1718 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1722 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1723 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q:
1725 ; CHECK-NEXT: movzbl %dil, %eax
1726 ; CHECK-NEXT: kmovw %eax, %k1
1727 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1}
1728 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1730 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1734 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1735 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q:
1737 ; CHECK-NEXT: movzbl %dil, %eax
1738 ; CHECK-NEXT: kmovw %eax, %k1
1739 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1741 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1745 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1747 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1748 ; CHECK-LABEL: test_x86_avx512_psll_d:
1750 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0
1752 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1756 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1757 ; CHECK-LABEL: test_x86_avx512_mask_psll_d:
1759 ; CHECK-NEXT: kmovw %edi, %k1
1760 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1761 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1763 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1767 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1768 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d:
1770 ; CHECK-NEXT: kmovw %edi, %k1
1771 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1773 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1777 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1779 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1780 ; CHECK-LABEL: test_x86_avx512_psll_q:
1782 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0
1784 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1788 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1789 ; CHECK-LABEL: test_x86_avx512_mask_psll_q:
1791 ; CHECK-NEXT: movzbl %dil, %eax
1792 ; CHECK-NEXT: kmovw %eax, %k1
1793 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1794 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1796 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1800 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1801 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q:
1803 ; CHECK-NEXT: movzbl %dil, %eax
1804 ; CHECK-NEXT: kmovw %eax, %k1
1805 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1807 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1811 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1813 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1814 ; CHECK-LABEL: test_x86_avx512_psrl_d:
1816 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0
1818 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1822 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1823 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d:
1825 ; CHECK-NEXT: kmovw %edi, %k1
1826 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1827 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1829 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1833 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1834 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d:
1836 ; CHECK-NEXT: kmovw %edi, %k1
1837 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1839 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1843 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1845 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1846 ; CHECK-LABEL: test_x86_avx512_psrl_q:
1848 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
1850 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1854 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1855 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q:
1857 ; CHECK-NEXT: movzbl %dil, %eax
1858 ; CHECK-NEXT: kmovw %eax, %k1
1859 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1860 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1862 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1866 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1867 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q:
1869 ; CHECK-NEXT: movzbl %dil, %eax
1870 ; CHECK-NEXT: kmovw %eax, %k1
1871 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1873 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1877 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1879 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1880 ; CHECK-LABEL: test_x86_avx512_psra_d:
1882 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0
1884 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1888 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1889 ; CHECK-LABEL: test_x86_avx512_mask_psra_d:
1891 ; CHECK-NEXT: kmovw %edi, %k1
1892 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1893 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1895 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1899 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1900 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d:
1902 ; CHECK-NEXT: kmovw %edi, %k1
1903 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1905 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1909 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1911 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1912 ; CHECK-LABEL: test_x86_avx512_psra_q:
1914 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0
1916 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1920 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1921 ; CHECK-LABEL: test_x86_avx512_mask_psra_q:
1923 ; CHECK-NEXT: movzbl %dil, %eax
1924 ; CHECK-NEXT: kmovw %eax, %k1
1925 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1926 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1928 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1932 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1933 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q:
1935 ; CHECK-NEXT: movzbl %dil, %eax
1936 ; CHECK-NEXT: kmovw %eax, %k1
1937 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1939 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1943 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1945 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1946 ; CHECK-LABEL: test_x86_avx512_psllv_d:
1948 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
1950 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1954 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1955 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d:
1957 ; CHECK-NEXT: kmovw %edi, %k1
1958 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1959 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1961 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1965 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1966 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d:
1968 ; CHECK-NEXT: kmovw %edi, %k1
1969 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1971 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1975 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1977 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1978 ; CHECK-LABEL: test_x86_avx512_psllv_q:
1980 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
1982 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1986 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1987 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q:
1989 ; CHECK-NEXT: movzbl %dil, %eax
1990 ; CHECK-NEXT: kmovw %eax, %k1
1991 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1992 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1994 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1998 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1999 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q:
2001 ; CHECK-NEXT: movzbl %dil, %eax
2002 ; CHECK-NEXT: kmovw %eax, %k1
2003 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
2005 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
2009 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
2012 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
2013 ; CHECK-LABEL: test_x86_avx512_psrav_d:
2015 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0
2017 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
2021 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
2022 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d:
2024 ; CHECK-NEXT: kmovw %edi, %k1
2025 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
2026 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2028 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
2032 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
2033 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d:
2035 ; CHECK-NEXT: kmovw %edi, %k1
2036 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
2038 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
2042 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
2044 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
2045 ; CHECK-LABEL: test_x86_avx512_psrav_q:
2047 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0
2049 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
2053 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
2054 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q:
2056 ; CHECK-NEXT: movzbl %dil, %eax
2057 ; CHECK-NEXT: kmovw %eax, %k1
2058 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
2059 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2061 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
2065 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
2066 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q:
2068 ; CHECK-NEXT: movzbl %dil, %eax
2069 ; CHECK-NEXT: kmovw %eax, %k1
2070 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
2072 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
2076 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
2078 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
2079 ; CHECK-LABEL: test_x86_avx512_psrlv_d:
2081 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
2083 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
2087 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
2088 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d:
2090 ; CHECK-NEXT: kmovw %edi, %k1
2091 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
2092 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2094 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
2098 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
2099 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d:
2101 ; CHECK-NEXT: kmovw %edi, %k1
2102 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
2104 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
2108 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
2110 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
2111 ; CHECK-LABEL: test_x86_avx512_psrlv_q:
2113 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
2115 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
2119 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
2120 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q:
2122 ; CHECK-NEXT: movzbl %dil, %eax
2123 ; CHECK-NEXT: kmovw %eax, %k1
2124 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
2125 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2127 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
2131 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
2132 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q:
2134 ; CHECK-NEXT: movzbl %dil, %eax
2135 ; CHECK-NEXT: kmovw %eax, %k1
2136 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
2138 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
2142 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
2144 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
2145 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop:
2147 ; CHECK-NEXT: vpsrlvq (%rdi), %zmm0, %zmm0
2149 %b = load <8 x i64>, <8 x i64>* %ptr
2150 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2154 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2155 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2156 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
2158 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
2159 ; CHECK-LABEL: test_vsubps_rn:
2161 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2163 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2164 <16 x float> zeroinitializer, i16 -1, i32 0)
2165 ret <16 x float> %res
2168 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
2169 ; CHECK-LABEL: test_vsubps_rd:
2171 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2173 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2174 <16 x float> zeroinitializer, i16 -1, i32 1)
2175 ret <16 x float> %res
2178 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
2179 ; CHECK-LABEL: test_vsubps_ru:
2181 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2183 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2184 <16 x float> zeroinitializer, i16 -1, i32 2)
2185 ret <16 x float> %res
2188 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
2189 ; CHECK-LABEL: test_vsubps_rz:
2191 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2193 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2194 <16 x float> zeroinitializer, i16 -1, i32 3)
2195 ret <16 x float> %res
2198 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
2199 ; CHECK-LABEL: test_vmulps_rn:
2201 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0
2203 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2204 <16 x float> zeroinitializer, i16 -1, i32 0)
2205 ret <16 x float> %res
2208 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
2209 ; CHECK-LABEL: test_vmulps_rd:
2211 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0
2213 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2214 <16 x float> zeroinitializer, i16 -1, i32 1)
2215 ret <16 x float> %res
2218 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
2219 ; CHECK-LABEL: test_vmulps_ru:
2221 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0
2223 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2224 <16 x float> zeroinitializer, i16 -1, i32 2)
2225 ret <16 x float> %res
2228 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
2229 ; CHECK-LABEL: test_vmulps_rz:
2231 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0
2233 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2234 <16 x float> zeroinitializer, i16 -1, i32 3)
2235 ret <16 x float> %res
2239 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2240 ; CHECK-LABEL: test_vmulps_mask_rn:
2242 ; CHECK-NEXT: kmovw %edi, %k1
2243 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2245 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2246 <16 x float> zeroinitializer, i16 %mask, i32 0)
2247 ret <16 x float> %res
2250 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2251 ; CHECK-LABEL: test_vmulps_mask_rd:
2253 ; CHECK-NEXT: kmovw %edi, %k1
2254 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2256 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2257 <16 x float> zeroinitializer, i16 %mask, i32 1)
2258 ret <16 x float> %res
2261 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2262 ; CHECK-LABEL: test_vmulps_mask_ru:
2264 ; CHECK-NEXT: kmovw %edi, %k1
2265 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2267 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2268 <16 x float> zeroinitializer, i16 %mask, i32 2)
2269 ret <16 x float> %res
2272 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2273 ; CHECK-LABEL: test_vmulps_mask_rz:
2275 ; CHECK-NEXT: kmovw %edi, %k1
2276 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2278 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2279 <16 x float> zeroinitializer, i16 %mask, i32 3)
2280 ret <16 x float> %res
2283 ;; With Passthru value
2284 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2285 ; CHECK-LABEL: test_vmulps_mask_passthru_rn:
2287 ; CHECK-NEXT: kmovw %edi, %k1
2288 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2289 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2291 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2292 <16 x float> %passthru, i16 %mask, i32 0)
2293 ret <16 x float> %res
2296 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2297 ; CHECK-LABEL: test_vmulps_mask_passthru_rd:
2299 ; CHECK-NEXT: kmovw %edi, %k1
2300 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2301 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2303 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2304 <16 x float> %passthru, i16 %mask, i32 1)
2305 ret <16 x float> %res
2308 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2309 ; CHECK-LABEL: test_vmulps_mask_passthru_ru:
2311 ; CHECK-NEXT: kmovw %edi, %k1
2312 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2313 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2315 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2316 <16 x float> %passthru, i16 %mask, i32 2)
2317 ret <16 x float> %res
2320 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2321 ; CHECK-LABEL: test_vmulps_mask_passthru_rz:
2323 ; CHECK-NEXT: kmovw %edi, %k1
2324 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2325 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2327 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2328 <16 x float> %passthru, i16 %mask, i32 3)
2329 ret <16 x float> %res
2333 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2334 ; CHECK-LABEL: test_vmulpd_mask_rn:
2336 ; CHECK-NEXT: movzbl %dil, %eax
2337 ; CHECK-NEXT: kmovw %eax, %k1
2338 ; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2340 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2341 <8 x double> zeroinitializer, i8 %mask, i32 0)
2342 ret <8 x double> %res
2345 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2346 ; CHECK-LABEL: test_vmulpd_mask_rd:
2348 ; CHECK-NEXT: movzbl %dil, %eax
2349 ; CHECK-NEXT: kmovw %eax, %k1
2350 ; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2352 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2353 <8 x double> zeroinitializer, i8 %mask, i32 1)
2354 ret <8 x double> %res
2357 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2358 ; CHECK-LABEL: test_vmulpd_mask_ru:
2360 ; CHECK-NEXT: movzbl %dil, %eax
2361 ; CHECK-NEXT: kmovw %eax, %k1
2362 ; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2364 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2365 <8 x double> zeroinitializer, i8 %mask, i32 2)
2366 ret <8 x double> %res
2369 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2370 ; CHECK-LABEL: test_vmulpd_mask_rz:
2372 ; CHECK-NEXT: movzbl %dil, %eax
2373 ; CHECK-NEXT: kmovw %eax, %k1
2374 ; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2376 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2377 <8 x double> zeroinitializer, i8 %mask, i32 3)
2378 ret <8 x double> %res
2381 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
2382 ; CHECK-LABEL: test_xor_epi32:
2384 ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0
2386 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
2387 ret < 16 x i32> %res
2390 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2391 ; CHECK-LABEL: test_mask_xor_epi32:
2393 ; CHECK-NEXT: kmovw %edi, %k1
2394 ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1}
2395 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2397 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2398 ret < 16 x i32> %res
2401 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2403 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
2404 ; CHECK-LABEL: test_or_epi32:
2406 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
2408 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
2409 ret < 16 x i32> %res
2412 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2413 ; CHECK-LABEL: test_mask_or_epi32:
2415 ; CHECK-NEXT: kmovw %edi, %k1
2416 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1}
2417 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2419 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2420 ret < 16 x i32> %res
2423 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2425 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
2426 ; CHECK-LABEL: test_and_epi32:
2428 ; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0
2430 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
2431 ret < 16 x i32> %res
2434 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2435 ; CHECK-LABEL: test_mask_and_epi32:
2437 ; CHECK-NEXT: kmovw %edi, %k1
2438 ; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1}
2439 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2441 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2442 ret < 16 x i32> %res
2445 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2447 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
2448 ; CHECK-LABEL: test_xor_epi64:
2450 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
2452 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2456 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2457 ; CHECK-LABEL: test_mask_xor_epi64:
2459 ; CHECK-NEXT: movzbl %dil, %eax
2460 ; CHECK-NEXT: kmovw %eax, %k1
2461 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1}
2462 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2464 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2468 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2470 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
2471 ; CHECK-LABEL: test_or_epi64:
2473 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
2475 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2479 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2480 ; CHECK-LABEL: test_mask_or_epi64:
2482 ; CHECK-NEXT: movzbl %dil, %eax
2483 ; CHECK-NEXT: kmovw %eax, %k1
2484 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1}
2485 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2487 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2491 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2493 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
2494 ; CHECK-LABEL: test_and_epi64:
2496 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
2498 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2502 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2503 ; CHECK-LABEL: test_mask_and_epi64:
2505 ; CHECK-NEXT: movzbl %dil, %eax
2506 ; CHECK-NEXT: kmovw %eax, %k1
2507 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1}
2508 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2510 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2514 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2517 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2518 ; CHECK-LABEL: test_mask_add_epi32_rr:
2520 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
2522 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2523 ret < 16 x i32> %res
2526 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2527 ; CHECK-LABEL: test_mask_add_epi32_rrk:
2529 ; CHECK-NEXT: kmovw %edi, %k1
2530 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1}
2531 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2533 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2534 ret < 16 x i32> %res
2537 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2538 ; CHECK-LABEL: test_mask_add_epi32_rrkz:
2540 ; CHECK-NEXT: kmovw %edi, %k1
2541 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
2543 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2544 ret < 16 x i32> %res
2547 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2548 ; CHECK-LABEL: test_mask_add_epi32_rm:
2550 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
2552 %b = load <16 x i32>, <16 x i32>* %ptr_b
2553 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2554 ret < 16 x i32> %res
2557 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2558 ; CHECK-LABEL: test_mask_add_epi32_rmk:
2560 ; CHECK-NEXT: kmovw %esi, %k1
2561 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1}
2562 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2564 %b = load <16 x i32>, <16 x i32>* %ptr_b
2565 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2566 ret < 16 x i32> %res
2569 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2570 ; CHECK-LABEL: test_mask_add_epi32_rmkz:
2572 ; CHECK-NEXT: kmovw %esi, %k1
2573 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
2575 %b = load <16 x i32>, <16 x i32>* %ptr_b
2576 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2577 ret < 16 x i32> %res
2580 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2581 ; CHECK-LABEL: test_mask_add_epi32_rmb:
2583 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0
2585 %q = load i32, i32* %ptr_b
2586 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2587 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2588 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2589 ret < 16 x i32> %res
2592 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2593 ; CHECK-LABEL: test_mask_add_epi32_rmbk:
2595 ; CHECK-NEXT: kmovw %esi, %k1
2596 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
2597 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2599 %q = load i32, i32* %ptr_b
2600 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2601 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2602 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2603 ret < 16 x i32> %res
2606 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2607 ; CHECK-LABEL: test_mask_add_epi32_rmbkz:
2609 ; CHECK-NEXT: kmovw %esi, %k1
2610 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
2612 %q = load i32, i32* %ptr_b
2613 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2614 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2615 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2616 ret < 16 x i32> %res
2619 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2621 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2622 ; CHECK-LABEL: test_mask_sub_epi32_rr:
2624 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
2626 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2627 ret < 16 x i32> %res
2630 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2631 ; CHECK-LABEL: test_mask_sub_epi32_rrk:
2633 ; CHECK-NEXT: kmovw %edi, %k1
2634 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
2635 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2637 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2638 ret < 16 x i32> %res
2641 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2642 ; CHECK-LABEL: test_mask_sub_epi32_rrkz:
2644 ; CHECK-NEXT: kmovw %edi, %k1
2645 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z}
2647 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2648 ret < 16 x i32> %res
2651 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2652 ; CHECK-LABEL: test_mask_sub_epi32_rm:
2654 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0
2656 %b = load <16 x i32>, <16 x i32>* %ptr_b
2657 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2658 ret < 16 x i32> %res
2661 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2662 ; CHECK-LABEL: test_mask_sub_epi32_rmk:
2664 ; CHECK-NEXT: kmovw %esi, %k1
2665 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1}
2666 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2668 %b = load <16 x i32>, <16 x i32>* %ptr_b
2669 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2670 ret < 16 x i32> %res
2673 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2674 ; CHECK-LABEL: test_mask_sub_epi32_rmkz:
2676 ; CHECK-NEXT: kmovw %esi, %k1
2677 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z}
2679 %b = load <16 x i32>, <16 x i32>* %ptr_b
2680 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2681 ret < 16 x i32> %res
2684 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2685 ; CHECK-LABEL: test_mask_sub_epi32_rmb:
2687 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0
2689 %q = load i32, i32* %ptr_b
2690 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2691 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2692 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2693 ret < 16 x i32> %res
2696 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2697 ; CHECK-LABEL: test_mask_sub_epi32_rmbk:
2699 ; CHECK-NEXT: kmovw %esi, %k1
2700 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
2701 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2703 %q = load i32, i32* %ptr_b
2704 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2705 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2706 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2707 ret < 16 x i32> %res
2710 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2711 ; CHECK-LABEL: test_mask_sub_epi32_rmbkz:
2713 ; CHECK-NEXT: kmovw %esi, %k1
2714 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
2716 %q = load i32, i32* %ptr_b
2717 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2718 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2719 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2720 ret < 16 x i32> %res
2723 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2725 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2726 ; CHECK-LABEL: test_mask_add_epi64_rr:
2728 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
2730 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2734 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2735 ; CHECK-LABEL: test_mask_add_epi64_rrk:
2737 ; CHECK-NEXT: movzbl %dil, %eax
2738 ; CHECK-NEXT: kmovw %eax, %k1
2739 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1}
2740 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2742 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2746 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2747 ; CHECK-LABEL: test_mask_add_epi64_rrkz:
2749 ; CHECK-NEXT: movzbl %dil, %eax
2750 ; CHECK-NEXT: kmovw %eax, %k1
2751 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z}
2753 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2757 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2758 ; CHECK-LABEL: test_mask_add_epi64_rm:
2760 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
2762 %b = load <8 x i64>, <8 x i64>* %ptr_b
2763 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2767 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2768 ; CHECK-LABEL: test_mask_add_epi64_rmk:
2770 ; CHECK-NEXT: movzbl %sil, %eax
2771 ; CHECK-NEXT: kmovw %eax, %k1
2772 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1}
2773 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2775 %b = load <8 x i64>, <8 x i64>* %ptr_b
2776 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2780 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2781 ; CHECK-LABEL: test_mask_add_epi64_rmkz:
2783 ; CHECK-NEXT: movzbl %sil, %eax
2784 ; CHECK-NEXT: kmovw %eax, %k1
2785 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z}
2787 %b = load <8 x i64>, <8 x i64>* %ptr_b
2788 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2792 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2793 ; CHECK-LABEL: test_mask_add_epi64_rmb:
2795 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
2797 %q = load i64, i64* %ptr_b
2798 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2799 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2800 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2804 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2805 ; CHECK-LABEL: test_mask_add_epi64_rmbk:
2807 ; CHECK-NEXT: movzbl %sil, %eax
2808 ; CHECK-NEXT: kmovw %eax, %k1
2809 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
2810 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2812 %q = load i64, i64* %ptr_b
2813 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2814 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2815 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2819 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2820 ; CHECK-LABEL: test_mask_add_epi64_rmbkz:
2822 ; CHECK-NEXT: movzbl %sil, %eax
2823 ; CHECK-NEXT: kmovw %eax, %k1
2824 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
2826 %q = load i64, i64* %ptr_b
2827 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2828 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2829 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2833 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2835 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2836 ; CHECK-LABEL: test_mask_sub_epi64_rr:
2838 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
2840 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2844 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2845 ; CHECK-LABEL: test_mask_sub_epi64_rrk:
2847 ; CHECK-NEXT: movzbl %dil, %eax
2848 ; CHECK-NEXT: kmovw %eax, %k1
2849 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1}
2850 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2852 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2856 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2857 ; CHECK-LABEL: test_mask_sub_epi64_rrkz:
2859 ; CHECK-NEXT: movzbl %dil, %eax
2860 ; CHECK-NEXT: kmovw %eax, %k1
2861 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z}
2863 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2867 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2868 ; CHECK-LABEL: test_mask_sub_epi64_rm:
2870 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0
2872 %b = load <8 x i64>, <8 x i64>* %ptr_b
2873 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2877 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2878 ; CHECK-LABEL: test_mask_sub_epi64_rmk:
2880 ; CHECK-NEXT: movzbl %sil, %eax
2881 ; CHECK-NEXT: kmovw %eax, %k1
2882 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1}
2883 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2885 %b = load <8 x i64>, <8 x i64>* %ptr_b
2886 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2890 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2891 ; CHECK-LABEL: test_mask_sub_epi64_rmkz:
2893 ; CHECK-NEXT: movzbl %sil, %eax
2894 ; CHECK-NEXT: kmovw %eax, %k1
2895 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z}
2897 %b = load <8 x i64>, <8 x i64>* %ptr_b
2898 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2902 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2903 ; CHECK-LABEL: test_mask_sub_epi64_rmb:
2905 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0
2907 %q = load i64, i64* %ptr_b
2908 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2909 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2910 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2914 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2915 ; CHECK-LABEL: test_mask_sub_epi64_rmbk:
2917 ; CHECK-NEXT: movzbl %sil, %eax
2918 ; CHECK-NEXT: kmovw %eax, %k1
2919 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
2920 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2922 %q = load i64, i64* %ptr_b
2923 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2924 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2925 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2929 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2930 ; CHECK-LABEL: test_mask_sub_epi64_rmbkz:
2932 ; CHECK-NEXT: movzbl %sil, %eax
2933 ; CHECK-NEXT: kmovw %eax, %k1
2934 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
2936 %q = load i64, i64* %ptr_b
2937 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2938 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2939 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2943 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2945 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2946 ; CHECK-LABEL: test_mask_mul_epi32_rr:
2948 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0
2950 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2954 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2955 ; CHECK-LABEL: test_mask_mul_epi32_rrk:
2957 ; CHECK-NEXT: movzbl %dil, %eax
2958 ; CHECK-NEXT: kmovw %eax, %k1
2959 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1}
2960 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2962 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2966 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2967 ; CHECK-LABEL: test_mask_mul_epi32_rrkz:
2969 ; CHECK-NEXT: movzbl %dil, %eax
2970 ; CHECK-NEXT: kmovw %eax, %k1
2971 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z}
2973 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2977 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2978 ; CHECK-LABEL: test_mask_mul_epi32_rm:
2980 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0
2982 %b = load <16 x i32>, <16 x i32>* %ptr_b
2983 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2987 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2988 ; CHECK-LABEL: test_mask_mul_epi32_rmk:
2990 ; CHECK-NEXT: movzbl %sil, %eax
2991 ; CHECK-NEXT: kmovw %eax, %k1
2992 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1}
2993 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2995 %b = load <16 x i32>, <16 x i32>* %ptr_b
2996 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3000 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
3001 ; CHECK-LABEL: test_mask_mul_epi32_rmkz:
3003 ; CHECK-NEXT: movzbl %sil, %eax
3004 ; CHECK-NEXT: kmovw %eax, %k1
3005 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z}
3007 %b = load <16 x i32>, <16 x i32>* %ptr_b
3008 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3012 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
3013 ; CHECK-LABEL: test_mask_mul_epi32_rmb:
3015 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0
3017 %q = load i64, i64* %ptr_b
3018 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3019 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3020 %b = bitcast <8 x i64> %b64 to <16 x i32>
3021 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3025 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3026 ; CHECK-LABEL: test_mask_mul_epi32_rmbk:
3028 ; CHECK-NEXT: movzbl %sil, %eax
3029 ; CHECK-NEXT: kmovw %eax, %k1
3030 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
3031 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3033 %q = load i64, i64* %ptr_b
3034 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3035 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3036 %b = bitcast <8 x i64> %b64 to <16 x i32>
3037 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3041 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
3042 ; CHECK-LABEL: test_mask_mul_epi32_rmbkz:
3044 ; CHECK-NEXT: movzbl %sil, %eax
3045 ; CHECK-NEXT: kmovw %eax, %k1
3046 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
3048 %q = load i64, i64* %ptr_b
3049 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3050 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3051 %b = bitcast <8 x i64> %b64 to <16 x i32>
3052 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3056 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
3058 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
3059 ; CHECK-LABEL: test_mask_mul_epu32_rr:
3061 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
3063 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3067 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
3068 ; CHECK-LABEL: test_mask_mul_epu32_rrk:
3070 ; CHECK-NEXT: movzbl %dil, %eax
3071 ; CHECK-NEXT: kmovw %eax, %k1
3072 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1}
3073 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3075 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3079 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
3080 ; CHECK-LABEL: test_mask_mul_epu32_rrkz:
3082 ; CHECK-NEXT: movzbl %dil, %eax
3083 ; CHECK-NEXT: kmovw %eax, %k1
3084 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z}
3086 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3090 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
3091 ; CHECK-LABEL: test_mask_mul_epu32_rm:
3093 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0
3095 %b = load <16 x i32>, <16 x i32>* %ptr_b
3096 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3100 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3101 ; CHECK-LABEL: test_mask_mul_epu32_rmk:
3103 ; CHECK-NEXT: movzbl %sil, %eax
3104 ; CHECK-NEXT: kmovw %eax, %k1
3105 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1}
3106 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3108 %b = load <16 x i32>, <16 x i32>* %ptr_b
3109 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3113 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
3114 ; CHECK-LABEL: test_mask_mul_epu32_rmkz:
3116 ; CHECK-NEXT: movzbl %sil, %eax
3117 ; CHECK-NEXT: kmovw %eax, %k1
3118 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z}
3120 %b = load <16 x i32>, <16 x i32>* %ptr_b
3121 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3125 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
3126 ; CHECK-LABEL: test_mask_mul_epu32_rmb:
3128 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0
3130 %q = load i64, i64* %ptr_b
3131 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3132 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3133 %b = bitcast <8 x i64> %b64 to <16 x i32>
3134 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3138 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3139 ; CHECK-LABEL: test_mask_mul_epu32_rmbk:
3141 ; CHECK-NEXT: movzbl %sil, %eax
3142 ; CHECK-NEXT: kmovw %eax, %k1
3143 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
3144 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3146 %q = load i64, i64* %ptr_b
3147 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3148 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3149 %b = bitcast <8 x i64> %b64 to <16 x i32>
3150 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3154 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
3155 ; CHECK-LABEL: test_mask_mul_epu32_rmbkz:
3157 ; CHECK-NEXT: movzbl %sil, %eax
3158 ; CHECK-NEXT: kmovw %eax, %k1
3159 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
3161 %q = load i64, i64* %ptr_b
3162 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3163 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3164 %b = bitcast <8 x i64> %b64 to <16 x i32>
3165 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3169 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
3171 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
3172 ; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
3174 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
3176 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
3180 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
3181 ; CHECK-LABEL: test_mask_mullo_epi32_rrk_512:
3183 ; CHECK-NEXT: kmovw %edi, %k1
3184 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1}
3185 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3187 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
3188 ret < 16 x i32> %res
3191 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
3192 ; CHECK-LABEL: test_mask_mullo_epi32_rrkz_512:
3194 ; CHECK-NEXT: kmovw %edi, %k1
3195 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z}
3197 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
3198 ret < 16 x i32> %res
3201 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
3202 ; CHECK-LABEL: test_mask_mullo_epi32_rm_512:
3204 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0
3206 %b = load <16 x i32>, <16 x i32>* %ptr_b
3207 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
3208 ret < 16 x i32> %res
3211 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
3212 ; CHECK-LABEL: test_mask_mullo_epi32_rmk_512:
3214 ; CHECK-NEXT: kmovw %esi, %k1
3215 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1}
3216 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3218 %b = load <16 x i32>, <16 x i32>* %ptr_b
3219 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
3220 ret < 16 x i32> %res
3223 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
3224 ; CHECK-LABEL: test_mask_mullo_epi32_rmkz_512:
3226 ; CHECK-NEXT: kmovw %esi, %k1
3227 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z}
3229 %b = load <16 x i32>, <16 x i32>* %ptr_b
3230 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
3231 ret < 16 x i32> %res
3234 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
3235 ; CHECK-LABEL: test_mask_mullo_epi32_rmb_512:
3237 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0
3239 %q = load i32, i32* %ptr_b
3240 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
3241 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
3242 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
3243 ret < 16 x i32> %res
3246 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
3247 ; CHECK-LABEL: test_mask_mullo_epi32_rmbk_512:
3249 ; CHECK-NEXT: kmovw %esi, %k1
3250 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1}
3251 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3253 %q = load i32, i32* %ptr_b
3254 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
3255 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
3256 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
3257 ret < 16 x i32> %res
3260 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
3261 ; CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512:
3263 ; CHECK-NEXT: kmovw %esi, %k1
3264 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
3266 %q = load i32, i32* %ptr_b
3267 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
3268 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
3269 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
3270 ret < 16 x i32> %res
3273 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3275 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3276 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
3278 ; CHECK-NEXT: kmovw %edi, %k1
3279 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3281 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
3282 ret <16 x float> %res
3284 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3285 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
3287 ; CHECK-NEXT: kmovw %edi, %k1
3288 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3290 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
3291 ret <16 x float> %res
3293 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3294 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
3296 ; CHECK-NEXT: kmovw %edi, %k1
3297 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3299 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
3300 ret <16 x float> %res
3303 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3304 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
3306 ; CHECK-NEXT: kmovw %edi, %k1
3307 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3309 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
3310 ret <16 x float> %res
3314 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3315 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_current:
3317 ; CHECK-NEXT: kmovw %edi, %k1
3318 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
3320 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3321 ret <16 x float> %res
3324 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3325 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae:
3327 ; CHECK-NEXT: kmovw %edi, %k1
3328 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3329 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3331 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
3332 ret <16 x float> %res
3334 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3335 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae:
3337 ; CHECK-NEXT: kmovw %edi, %k1
3338 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3339 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3341 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
3342 ret <16 x float> %res
3344 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3345 ; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae:
3347 ; CHECK-NEXT: kmovw %edi, %k1
3348 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3349 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3351 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
3352 ret <16 x float> %res
3355 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3356 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae:
3358 ; CHECK-NEXT: kmovw %edi, %k1
3359 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3360 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3362 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
3363 ret <16 x float> %res
3367 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3368 ; CHECK-LABEL: test_mm512_mask_add_round_ps_current:
3370 ; CHECK-NEXT: kmovw %edi, %k1
3371 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1}
3372 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3374 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3375 ret <16 x float> %res
3379 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3380 ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
3382 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
3384 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
3385 ret <16 x float> %res
3387 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3388 ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
3390 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
3392 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
3393 ret <16 x float> %res
3395 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3396 ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
3398 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
3400 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
3401 ret <16 x float> %res
3404 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3405 ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
3407 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
3409 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
3410 ret <16 x float> %res
3413 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3414 ; CHECK-LABEL: test_mm512_add_round_ps_current:
3416 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
3418 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3419 ret <16 x float> %res
3421 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3423 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3424 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
3426 ; CHECK-NEXT: kmovw %edi, %k1
3427 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3428 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3430 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
3431 ret <16 x float> %res
3433 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3434 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
3436 ; CHECK-NEXT: kmovw %edi, %k1
3437 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3438 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3440 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
3441 ret <16 x float> %res
3443 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3444 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
3446 ; CHECK-NEXT: kmovw %edi, %k1
3447 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3448 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3450 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
3451 ret <16 x float> %res
3454 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3455 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
3457 ; CHECK-NEXT: kmovw %edi, %k1
3458 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3459 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3461 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
3462 ret <16 x float> %res
3466 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3467 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_current:
3469 ; CHECK-NEXT: kmovw %edi, %k1
3470 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1}
3471 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3473 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3474 ret <16 x float> %res
3477 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3478 ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
3480 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
3482 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
3483 ret <16 x float> %res
3485 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3486 ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
3488 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
3490 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
3491 ret <16 x float> %res
3493 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3494 ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
3496 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
3498 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
3499 ret <16 x float> %res
3502 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3503 ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
3505 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
3507 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
3508 ret <16 x float> %res
3511 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3512 ; CHECK-LABEL: test_mm512_sub_round_ps_current:
3514 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0
3516 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3517 ret <16 x float> %res
3520 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3521 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
3523 ; CHECK-NEXT: kmovw %edi, %k1
3524 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3526 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
3527 ret <16 x float> %res
3529 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3530 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
3532 ; CHECK-NEXT: kmovw %edi, %k1
3533 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3535 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
3536 ret <16 x float> %res
3538 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3539 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
3541 ; CHECK-NEXT: kmovw %edi, %k1
3542 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3544 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
3545 ret <16 x float> %res
3548 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3549 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
3551 ; CHECK-NEXT: kmovw %edi, %k1
3552 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3554 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
3555 ret <16 x float> %res
3559 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3560 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_current:
3562 ; CHECK-NEXT: kmovw %edi, %k1
3563 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
3565 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3566 ret <16 x float> %res
3569 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3570 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae:
3572 ; CHECK-NEXT: kmovw %edi, %k1
3573 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3574 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3576 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
3577 ret <16 x float> %res
3579 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3580 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae:
3582 ; CHECK-NEXT: kmovw %edi, %k1
3583 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3584 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3586 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
3587 ret <16 x float> %res
3589 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3590 ; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae:
3592 ; CHECK-NEXT: kmovw %edi, %k1
3593 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3594 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3596 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
3597 ret <16 x float> %res
3600 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3601 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae:
3603 ; CHECK-NEXT: kmovw %edi, %k1
3604 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3605 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3607 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
3608 ret <16 x float> %res
3612 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3613 ; CHECK-LABEL: test_mm512_mask_div_round_ps_current:
3615 ; CHECK-NEXT: kmovw %edi, %k1
3616 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1}
3617 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3619 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3620 ret <16 x float> %res
3624 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3625 ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
3627 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
3629 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
3630 ret <16 x float> %res
3632 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3633 ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
3635 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
3637 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
3638 ret <16 x float> %res
3640 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3641 ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
3643 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
3645 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
3646 ret <16 x float> %res
3649 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3650 ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
3652 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
3654 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
3655 ret <16 x float> %res
3658 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3659 ; CHECK-LABEL: test_mm512_div_round_ps_current:
3661 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0
3663 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3664 ret <16 x float> %res
3666 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3668 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3669 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae:
3671 ; CHECK-NEXT: kmovw %edi, %k1
3672 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3674 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
3675 ret <16 x float> %res
3678 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3679 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_current:
3681 ; CHECK-NEXT: kmovw %edi, %k1
3682 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
3684 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3685 ret <16 x float> %res
3688 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3689 ; CHECK-LABEL: test_mm512_mask_min_round_ps_sae:
3691 ; CHECK-NEXT: kmovw %edi, %k1
3692 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
3693 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3695 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
3696 ret <16 x float> %res
3699 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3700 ; CHECK-LABEL: test_mm512_mask_min_round_ps_current:
3702 ; CHECK-NEXT: kmovw %edi, %k1
3703 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1}
3704 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3706 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3707 ret <16 x float> %res
3710 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3711 ; CHECK-LABEL: test_mm512_min_round_ps_sae:
3713 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0
3715 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
3716 ret <16 x float> %res
3719 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3720 ; CHECK-LABEL: test_mm512_min_round_ps_current:
3722 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0
3724 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3725 ret <16 x float> %res
3727 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3729 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3730 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae:
3732 ; CHECK-NEXT: kmovw %edi, %k1
3733 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3735 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
3736 ret <16 x float> %res
3739 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3740 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_current:
3742 ; CHECK-NEXT: kmovw %edi, %k1
3743 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
3745 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3746 ret <16 x float> %res
3749 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3750 ; CHECK-LABEL: test_mm512_mask_max_round_ps_sae:
3752 ; CHECK-NEXT: kmovw %edi, %k1
3753 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
3754 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3756 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
3757 ret <16 x float> %res
3760 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3761 ; CHECK-LABEL: test_mm512_mask_max_round_ps_current:
3763 ; CHECK-NEXT: kmovw %edi, %k1
3764 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
3765 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3767 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3768 ret <16 x float> %res
3771 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3772 ; CHECK-LABEL: test_mm512_max_round_ps_sae:
3774 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0
3776 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
3777 ret <16 x float> %res
3780 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3781 ; CHECK-LABEL: test_mm512_max_round_ps_current:
3783 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0
3785 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3786 ret <16 x float> %res
3788 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3790 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
3792 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3793 ; CHECK-LABEL: test_mask_add_ss_rn:
3795 ; CHECK-NEXT: andl $1, %edi
3796 ; CHECK-NEXT: kmovw %edi, %k1
3797 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3798 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3800 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
3801 ret <4 x float> %res
3804 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3805 ; CHECK-LABEL: test_mask_add_ss_rd:
3807 ; CHECK-NEXT: andl $1, %edi
3808 ; CHECK-NEXT: kmovw %edi, %k1
3809 ; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3810 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3812 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
3813 ret <4 x float> %res
3816 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3817 ; CHECK-LABEL: test_mask_add_ss_ru:
3819 ; CHECK-NEXT: andl $1, %edi
3820 ; CHECK-NEXT: kmovw %edi, %k1
3821 ; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3822 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3824 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
3825 ret <4 x float> %res
3828 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3829 ; CHECK-LABEL: test_mask_add_ss_rz:
3831 ; CHECK-NEXT: andl $1, %edi
3832 ; CHECK-NEXT: kmovw %edi, %k1
3833 ; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3834 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3836 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
3837 ret <4 x float> %res
3840 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3841 ; CHECK-LABEL: test_mask_add_ss_current:
3843 ; CHECK-NEXT: andl $1, %edi
3844 ; CHECK-NEXT: kmovw %edi, %k1
3845 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
3846 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3848 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
3849 ret <4 x float> %res
3852 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
3853 ; CHECK-LABEL: test_maskz_add_ss_rn:
3855 ; CHECK-NEXT: andl $1, %edi
3856 ; CHECK-NEXT: kmovw %edi, %k1
3857 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3859 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
3860 ret <4 x float> %res
3863 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
3864 ; CHECK-LABEL: test_add_ss_rn:
3866 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
3868 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
3869 ret <4 x float> %res
3872 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
3874 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3875 ; CHECK-LABEL: test_mask_add_sd_rn:
3877 ; CHECK-NEXT: andl $1, %edi
3878 ; CHECK-NEXT: kmovw %edi, %k1
3879 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3880 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3882 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
3883 ret <2 x double> %res
3886 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3887 ; CHECK-LABEL: test_mask_add_sd_rd:
3889 ; CHECK-NEXT: andl $1, %edi
3890 ; CHECK-NEXT: kmovw %edi, %k1
3891 ; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3892 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3894 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
3895 ret <2 x double> %res
3898 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3899 ; CHECK-LABEL: test_mask_add_sd_ru:
3901 ; CHECK-NEXT: andl $1, %edi
3902 ; CHECK-NEXT: kmovw %edi, %k1
3903 ; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3904 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3906 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
3907 ret <2 x double> %res
3910 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3911 ; CHECK-LABEL: test_mask_add_sd_rz:
3913 ; CHECK-NEXT: andl $1, %edi
3914 ; CHECK-NEXT: kmovw %edi, %k1
3915 ; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3916 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3918 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
3919 ret <2 x double> %res
3922 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3923 ; CHECK-LABEL: test_mask_add_sd_current:
3925 ; CHECK-NEXT: andl $1, %edi
3926 ; CHECK-NEXT: kmovw %edi, %k1
3927 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
3928 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3930 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
3931 ret <2 x double> %res
3934 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
3935 ; CHECK-LABEL: test_maskz_add_sd_rn:
3937 ; CHECK-NEXT: andl $1, %edi
3938 ; CHECK-NEXT: kmovw %edi, %k1
3939 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3941 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
3942 ret <2 x double> %res
3945 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
3946 ; CHECK-LABEL: test_add_sd_rn:
3948 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
3950 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
3951 ret <2 x double> %res
3954 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
3956 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3957 ; CHECK-LABEL: test_mask_max_ss_sae:
3959 ; CHECK-NEXT: andl $1, %edi
3960 ; CHECK-NEXT: kmovw %edi, %k1
3961 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
3962 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3964 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
3965 ret <4 x float> %res
3968 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
3969 ; CHECK-LABEL: test_maskz_max_ss_sae:
3971 ; CHECK-NEXT: andl $1, %edi
3972 ; CHECK-NEXT: kmovw %edi, %k1
3973 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3975 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
3976 ret <4 x float> %res
3979 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
3980 ; CHECK-LABEL: test_max_ss_sae:
3982 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0
3984 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
3985 ret <4 x float> %res
3988 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3989 ; CHECK-LABEL: test_mask_max_ss:
3991 ; CHECK-NEXT: andl $1, %edi
3992 ; CHECK-NEXT: kmovw %edi, %k1
3993 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
3994 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3996 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
3997 ret <4 x float> %res
4000 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4001 ; CHECK-LABEL: test_maskz_max_ss:
4003 ; CHECK-NEXT: andl $1, %edi
4004 ; CHECK-NEXT: kmovw %edi, %k1
4005 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
4007 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
4008 ret <4 x float> %res
4011 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
4012 ; CHECK-LABEL: test_max_ss:
4014 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0
4016 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
4017 ret <4 x float> %res
4019 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
4021 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
4022 ; CHECK-LABEL: test_mask_max_sd_sae:
4024 ; CHECK-NEXT: andl $1, %edi
4025 ; CHECK-NEXT: kmovw %edi, %k1
4026 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
4027 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
4029 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
4030 ret <2 x double> %res
4033 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4034 ; CHECK-LABEL: test_maskz_max_sd_sae:
4036 ; CHECK-NEXT: andl $1, %edi
4037 ; CHECK-NEXT: kmovw %edi, %k1
4038 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
4040 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
4041 ret <2 x double> %res
4044 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
4045 ; CHECK-LABEL: test_max_sd_sae:
4047 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
4049 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
4050 ret <2 x double> %res
4053 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
4054 ; CHECK-LABEL: test_mask_max_sd:
4056 ; CHECK-NEXT: andl $1, %edi
4057 ; CHECK-NEXT: kmovw %edi, %k1
4058 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
4059 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
4061 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
4062 ret <2 x double> %res
4065 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4066 ; CHECK-LABEL: test_maskz_max_sd:
4068 ; CHECK-NEXT: andl $1, %edi
4069 ; CHECK-NEXT: kmovw %edi, %k1
4070 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
4072 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
4073 ret <2 x double> %res
4076 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
4077 ; CHECK-LABEL: test_max_sd:
4079 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
4081 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
4082 ret <2 x double> %res
4085 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
4086 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
4088 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
4090 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
4091 ret <2 x double> %res
4093 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
4095 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
4096 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
4098 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
4100 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
4101 ret <2 x double> %res
4103 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
4105 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
4106 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
4108 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
4110 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
4111 ret <4 x float> %res
4113 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
4115 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
4116 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
4118 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
4120 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
4121 ret <4 x float> %res
4123 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
4125 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
4126 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
4128 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
4131 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
4132 ret <4 x float> %res
4135 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
4136 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
4138 ; CHECK-NEXT: movl (%rdi), %eax
4139 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
4142 %b = load i32, i32* %ptr
4143 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
4144 ret <4 x float> %res
4147 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
4148 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
4150 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
4153 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
4154 ret <4 x float> %res
4157 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
4158 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
4160 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
4163 %b = load i32, i32* %ptr
4164 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
4165 ret <4 x float> %res
4167 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
4169 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
4170 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
4172 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
4175 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
4176 ret <4 x float> %res
4179 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
4180 ; CHECK-LABEL: _mm_cvtu64_ss:
4182 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
4185 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
4186 ret <4 x float> %res
4188 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
4190 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
4191 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
4193 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
4196 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
4197 ret <2 x double> %res
4199 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
4201 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
4202 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
4204 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
4207 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
4208 ret <2 x double> %res
4211 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
4212 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
4214 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
4217 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
4218 ret <2 x double> %res
4220 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
4222 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
4223 ; CHECK-LABEL: test_vpmaxq:
4225 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
4227 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
4228 <8 x i64>zeroinitializer, i8 -1)
4231 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4233 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
4234 ; CHECK-LABEL: test_vpminud:
4236 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0
4238 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
4239 <16 x i32>zeroinitializer, i16 -1)
4242 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4244 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
4245 ; CHECK-LABEL: test_vpmaxsd:
4247 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
4249 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
4250 <16 x i32>zeroinitializer, i16 -1)
4253 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4255 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4256 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
4258 ; CHECK-NEXT: kmovw %edi, %k1
4259 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
4260 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
4261 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4263 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4264 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4265 %res2 = add <16 x i32> %res, %res1
4266 ret <16 x i32> %res2
4269 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4270 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
4272 ; CHECK-NEXT: movzbl %dil, %eax
4273 ; CHECK-NEXT: kmovw %eax, %k1
4274 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1}
4275 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
4276 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4278 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4279 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4280 %res2 = add <8 x i64> %res, %res1
4284 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4286 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4287 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
4289 ; CHECK-NEXT: kmovw %edi, %k1
4290 ; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1}
4291 ; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
4292 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4294 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4295 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4296 %res2 = add <16 x i32> %res, %res1
4297 ret <16 x i32> %res2
4300 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4302 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4303 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
4305 ; CHECK-NEXT: movzbl %dil, %eax
4306 ; CHECK-NEXT: kmovw %eax, %k1
4307 ; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1}
4308 ; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
4309 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4311 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4312 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4313 %res2 = add <8 x i64> %res, %res1
4317 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4319 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4320 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_512:
4322 ; CHECK-NEXT: kmovw %edi, %k1
4323 ; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1}
4324 ; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm0
4325 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4327 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4328 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4329 %res2 = add <16 x i32> %res, %res1
4330 ret <16 x i32> %res2
4333 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4335 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4336 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_512:
4338 ; CHECK-NEXT: movzbl %dil, %eax
4339 ; CHECK-NEXT: kmovw %eax, %k1
4340 ; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1}
4341 ; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm0
4342 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4344 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4345 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4346 %res2 = add <8 x i64> %res, %res1
4350 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4351 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_512:
4353 ; CHECK-NEXT: kmovw %edi, %k1
4354 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1}
4355 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0
4356 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4358 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4359 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4360 %res2 = add <16 x i32> %res, %res1
4361 ret <16 x i32> %res2
4364 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4366 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4367 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_512:
4369 ; CHECK-NEXT: movzbl %dil, %eax
4370 ; CHECK-NEXT: kmovw %eax, %k1
4371 ; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1}
4372 ; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm0
4373 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4375 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4376 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4377 %res2 = add <8 x i64> %res, %res1
4381 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4383 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
4384 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
4386 ; CHECK-NEXT: kmovw %esi, %k1
4387 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4388 ; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
4389 ; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1
4390 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
4392 %x2 = load <16 x i32>, <16 x i32>* %x2p
4393 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4394 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
4395 %res2 = add <16 x i32> %res, %res1
4396 ret <16 x i32> %res2
4399 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
4401 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
4402 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
4404 ; CHECK-NEXT: movzbl %dil, %eax
4405 ; CHECK-NEXT: kmovw %eax, %k1
4406 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4407 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1}
4408 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1
4409 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0
4411 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
4412 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
4413 %res2 = fadd <8 x double> %res, %res1
4414 ret <8 x double> %res2
4417 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
4419 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
4420 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
4422 ; CHECK-NEXT: kmovw %edi, %k1
4423 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4424 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1}
4425 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
4426 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
4428 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
4429 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
4430 %res2 = fadd <16 x float> %res, %res1
4431 ret <16 x float> %res2
4434 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4436 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4437 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
4439 ; CHECK-NEXT: movzbl %dil, %eax
4440 ; CHECK-NEXT: kmovw %eax, %k1
4441 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4442 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1}
4443 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1
4444 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
4446 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4447 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4448 %res2 = add <8 x i64> %res, %res1
4452 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4454 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
4455 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
4457 ; CHECK-NEXT: kmovw %esi, %k1
4458 ; CHECK-NEXT: vmovaps %zmm1, %zmm2
4459 ; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z}
4460 ; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1
4461 ; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0
4463 %x2 = load <16 x i32>, <16 x i32>* %x2p
4464 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4465 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1)
4466 %res2 = add <16 x i32> %res, %res1
4467 ret <16 x i32> %res2
4470 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
4472 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
4473 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
4475 ; CHECK-NEXT: movzbl %sil, %eax
4476 ; CHECK-NEXT: kmovw %eax, %k1
4477 ; CHECK-NEXT: vmovaps %zmm1, %zmm2
4478 ; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z}
4479 ; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1
4480 ; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0
4482 %x2s = load double, double* %x2ptr
4483 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
4484 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
4485 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
4486 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1)
4487 %res2 = fadd <8 x double> %res, %res1
4488 ret <8 x double> %res2
4491 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
4493 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4494 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
4496 ; CHECK-NEXT: kmovw %edi, %k1
4497 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4498 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z}
4499 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1
4500 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
4502 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
4503 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
4504 %res2 = fadd <16 x float> %res, %res1
4505 ret <16 x float> %res2
4509 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4511 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4512 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
4514 ; CHECK-NEXT: movzbl %dil, %eax
4515 ; CHECK-NEXT: kmovw %eax, %k1
4516 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4517 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z}
4518 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1
4519 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
4521 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4522 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4523 %res2 = add <8 x i64> %res, %res1
4527 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4529 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4530 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
4532 ; CHECK-NEXT: kmovw %edi, %k1
4533 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4534 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1}
4535 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1
4536 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
4538 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4539 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4540 %res2 = add <16 x i32> %res, %res1
4541 ret <16 x i32> %res2
4544 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
4545 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
4546 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_512:
4548 ; CHECK-NEXT: movzbl %dil, %eax
4549 ; CHECK-NEXT: kmovw %eax, %k1
4550 ; CHECK-NEXT: vscalefpd {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
4551 ; CHECK-NEXT: vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0
4552 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4554 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
4555 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
4556 %res2 = fadd <8 x double> %res, %res1
4557 ret <8 x double> %res2
4560 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
4561 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4562 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_512:
4564 ; CHECK-NEXT: kmovw %edi, %k1
4565 ; CHECK-NEXT: vscalefps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
4566 ; CHECK-NEXT: vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0
4567 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4569 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
4570 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
4571 %res2 = fadd <16 x float> %res, %res1
4572 ret <16 x float> %res2
4575 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
4577 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
4578 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
4580 ; CHECK-NEXT: movzbl %dil, %eax
4581 ; CHECK-NEXT: kmovw %eax, %k1
4582 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
4583 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
4584 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4586 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
4587 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
4588 %res2 = fadd <8 x double> %res, %res1
4589 ret <8 x double> %res2
4592 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
4594 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4595 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
4597 ; CHECK-NEXT: kmovw %edi, %k1
4598 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
4599 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
4600 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4602 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
4603 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
4604 %res2 = fadd <16 x float> %res, %res1
4605 ret <16 x float> %res2
4608 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
4610 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
4611 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
4613 ; CHECK-NEXT: movzbl %dil, %eax
4614 ; CHECK-NEXT: kmovw %eax, %k1
4615 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
4616 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
4617 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4619 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
4620 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
4621 %res2 = fadd <8 x double> %res, %res1
4622 ret <8 x double> %res2
4625 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
4627 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4628 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
4630 ; CHECK-NEXT: kmovw %edi, %k1
4631 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
4632 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
4633 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4635 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
4636 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
4637 %res2 = fadd <16 x float> %res, %res1
4638 ret <16 x float> %res2
4641 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4643 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4644 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
4646 ; CHECK-NEXT: movzbl %dil, %eax
4647 ; CHECK-NEXT: kmovw %eax, %k1
4648 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
4649 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm3 = k1[0],zmm0[0],k1[2],zmm0[2],k1[4],zmm0[4],k1[6],zmm0[6]
4650 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
4651 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4652 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4654 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4655 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4656 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
4657 %res3 = add <8 x i64> %res, %res1
4658 %res4 = add <8 x i64> %res2, %res3
4662 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4664 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4665 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
4667 ; CHECK-NEXT: movzbl %dil, %eax
4668 ; CHECK-NEXT: kmovw %eax, %k1
4669 ; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
4670 ; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
4671 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4673 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4674 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4675 %res2 = add <8 x i64> %res, %res1
4679 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4681 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4682 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
4684 ; CHECK-NEXT: kmovw %edi, %k1
4685 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
4686 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
4687 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4689 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4690 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4691 %res2 = add <16 x i32> %res, %res1
4692 ret <16 x i32> %res2
4695 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4697 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4698 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
4700 ; CHECK-NEXT: kmovw %edi, %k1
4701 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
4702 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
4703 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4705 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4706 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4707 %res2 = add <16 x i32> %res, %res1
4708 ret <16 x i32> %res2
4711 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
4713 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
4714 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
4716 ; CHECK-NEXT: kmovw %edi, %k1
4717 ; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1}
4718 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
4719 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0
4720 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
4721 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4723 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
4724 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
4725 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
4726 %res3 = add <16 x i8> %res0, %res1
4727 %res4 = add <16 x i8> %res3, %res2
4731 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
4733 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4734 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
4736 ; CHECK-NEXT: movzbl %sil, %eax
4737 ; CHECK-NEXT: kmovw %eax, %k1
4738 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi)
4739 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
4741 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4742 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4746 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
4748 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
4749 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
4751 ; CHECK-NEXT: kmovw %edi, %k1
4752 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1}
4753 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
4754 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
4755 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
4756 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4758 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
4759 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
4760 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
4761 %res3 = add <16 x i8> %res0, %res1
4762 %res4 = add <16 x i8> %res3, %res2
4766 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
4768 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4769 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
4771 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi)
4772 ; CHECK-NEXT: kmovw %esi, %k1
4773 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1}
4775 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4776 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4780 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
4782 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
4783 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
4785 ; CHECK-NEXT: kmovw %edi, %k1
4786 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1}
4787 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
4788 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
4789 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
4790 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4792 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
4793 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
4794 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
4795 %res3 = add <16 x i8> %res0, %res1
4796 %res4 = add <16 x i8> %res3, %res2
4800 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
4802 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4803 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
4805 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi)
4806 ; CHECK-NEXT: kmovw %esi, %k1
4807 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1}
4809 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4810 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4814 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
4816 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
4817 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
4819 ; CHECK-NEXT: movzbl %dil, %eax
4820 ; CHECK-NEXT: kmovw %eax, %k1
4821 ; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1}
4822 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
4823 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0
4824 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4825 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
4827 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
4828 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
4829 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
4830 %res3 = add <8 x i16> %res0, %res1
4831 %res4 = add <8 x i16> %res3, %res2
4835 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
4837 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4838 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
4840 ; CHECK-NEXT: movzbl %sil, %eax
4841 ; CHECK-NEXT: kmovw %eax, %k1
4842 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi)
4843 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
4845 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4846 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4850 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
4852 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
4853 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
4855 ; CHECK-NEXT: movzbl %dil, %eax
4856 ; CHECK-NEXT: kmovw %eax, %k1
4857 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1}
4858 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
4859 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
4860 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4861 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
4863 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
4864 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
4865 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
4866 %res3 = add <8 x i16> %res0, %res1
4867 %res4 = add <8 x i16> %res3, %res2
4871 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
4873 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4874 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
4876 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi)
4877 ; CHECK-NEXT: kmovw %esi, %k1
4878 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1}
4880 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4881 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4885 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
4887 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
4888 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
4890 ; CHECK-NEXT: movzbl %dil, %eax
4891 ; CHECK-NEXT: kmovw %eax, %k1
4892 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1}
4893 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
4894 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
4895 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4896 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
4898 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
4899 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
4900 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
4901 %res3 = add <8 x i16> %res0, %res1
4902 %res4 = add <8 x i16> %res3, %res2
4906 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
4908 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4909 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
4911 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi)
4912 ; CHECK-NEXT: kmovw %esi, %k1
4913 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1}
4915 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4916 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4920 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
4922 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
4923 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
4925 ; CHECK-NEXT: movzbl %dil, %eax
4926 ; CHECK-NEXT: kmovw %eax, %k1
4927 ; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1}
4928 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
4929 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0
4930 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
4931 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
4933 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
4934 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
4935 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
4936 %res3 = add <8 x i32> %res0, %res1
4937 %res4 = add <8 x i32> %res3, %res2
4941 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
4943 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4944 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
4946 ; CHECK-NEXT: movzbl %sil, %eax
4947 ; CHECK-NEXT: kmovw %eax, %k1
4948 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi)
4949 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
4951 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4952 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4956 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
4958 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
4959 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
4961 ; CHECK-NEXT: movzbl %dil, %eax
4962 ; CHECK-NEXT: kmovw %eax, %k1
4963 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1}
4964 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
4965 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
4966 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
4967 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
4969 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
4970 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
4971 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
4972 %res3 = add <8 x i32> %res0, %res1
4973 %res4 = add <8 x i32> %res3, %res2
4977 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
4979 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4980 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
4982 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi)
4983 ; CHECK-NEXT: kmovw %esi, %k1
4984 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1}
4986 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4987 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4991 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
4993 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
4994 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
4996 ; CHECK-NEXT: movzbl %dil, %eax
4997 ; CHECK-NEXT: kmovw %eax, %k1
4998 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1}
4999 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
5000 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
5001 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
5002 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
5004 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
5005 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
5006 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
5007 %res3 = add <8 x i32> %res0, %res1
5008 %res4 = add <8 x i32> %res3, %res2
5012 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
5014 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
5015 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
5017 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi)
5018 ; CHECK-NEXT: kmovw %esi, %k1
5019 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1}
5021 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
5022 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
5026 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
5028 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
5029 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
5031 ; CHECK-NEXT: kmovw %edi, %k1
5032 ; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1}
5033 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
5034 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0
5035 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
5036 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5038 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
5039 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
5040 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
5041 %res3 = add <16 x i8> %res0, %res1
5042 %res4 = add <16 x i8> %res3, %res2
5046 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
5048 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5049 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
5051 ; CHECK-NEXT: kmovw %esi, %k1
5052 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi)
5053 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
5055 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5056 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5060 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
5062 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
5063 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
5065 ; CHECK-NEXT: kmovw %edi, %k1
5066 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1}
5067 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
5068 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
5069 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
5070 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5072 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
5073 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
5074 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
5075 %res3 = add <16 x i8> %res0, %res1
5076 %res4 = add <16 x i8> %res3, %res2
5080 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
5082 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5083 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
5085 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi)
5086 ; CHECK-NEXT: kmovw %esi, %k1
5087 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1}
5089 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5090 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5094 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
5096 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
5097 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
5099 ; CHECK-NEXT: kmovw %edi, %k1
5100 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1}
5101 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
5102 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
5103 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
5104 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5106 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
5107 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
5108 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
5109 %res3 = add <16 x i8> %res0, %res1
5110 %res4 = add <16 x i8> %res3, %res2
5114 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
5116 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5117 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
5119 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi)
5120 ; CHECK-NEXT: kmovw %esi, %k1
5121 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1}
5123 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5124 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5128 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
5130 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
5131 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
5133 ; CHECK-NEXT: kmovw %edi, %k1
5134 ; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1}
5135 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
5136 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0
5137 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
5138 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
5140 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
5141 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
5142 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
5143 %res3 = add <16 x i16> %res0, %res1
5144 %res4 = add <16 x i16> %res3, %res2
5145 ret <16 x i16> %res4
5148 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
5150 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5151 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
5153 ; CHECK-NEXT: kmovw %esi, %k1
5154 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi)
5155 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
5157 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5158 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5162 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
5164 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
5165 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
5167 ; CHECK-NEXT: kmovw %edi, %k1
5168 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1}
5169 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
5170 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
5171 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
5172 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
5174 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
5175 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
5176 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
5177 %res3 = add <16 x i16> %res0, %res1
5178 %res4 = add <16 x i16> %res3, %res2
5179 ret <16 x i16> %res4
5182 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
5184 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5185 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
5187 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi)
5188 ; CHECK-NEXT: kmovw %esi, %k1
5189 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1}
5191 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5192 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5196 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
5198 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
5199 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
5201 ; CHECK-NEXT: kmovw %edi, %k1
5202 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1}
5203 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
5204 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
5205 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
5206 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
5208 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
5209 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
5210 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
5211 %res3 = add <16 x i16> %res0, %res1
5212 %res4 = add <16 x i16> %res3, %res2
5213 ret <16 x i16> %res4
5216 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
5218 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5219 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
5221 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi)
5222 ; CHECK-NEXT: kmovw %esi, %k1
5223 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1}
5225 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5226 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5230 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
5232 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
5233 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
5235 ; CHECK-NEXT: movzbl %dil, %eax
5236 ; CHECK-NEXT: kmovw %eax, %k1
5237 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
5238 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
5239 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5241 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
5242 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
5243 %res2 = fadd <8 x double> %res, %res1
5244 ret <8 x double> %res2
5247 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
5249 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
5250 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
5252 ; CHECK-NEXT: kmovw %edi, %k1
5253 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1}
5254 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
5255 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5257 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
5258 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
5259 %res2 = fadd <16 x float> %res, %res1
5260 ret <16 x float> %res2
5263 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
5265 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5266 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
5268 ; CHECK-NEXT: movzbl %dil, %eax
5269 ; CHECK-NEXT: kmovw %eax, %k1
5270 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1}
5271 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0
5272 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5274 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
5275 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
5276 %res2 = add <8 x i32> %res, %res1
5280 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
5282 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
5283 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
5285 ; CHECK-NEXT: movzbl %dil, %eax
5286 ; CHECK-NEXT: kmovw %eax, %k1
5287 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1}
5288 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0
5289 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
5291 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
5292 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
5293 %res2 = fadd <8 x float> %res, %res1
5294 ret <8 x float> %res2
5297 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
5299 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5300 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
5302 ; CHECK-NEXT: movzbl %dil, %eax
5303 ; CHECK-NEXT: kmovw %eax, %k1
5304 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
5305 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
5306 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5308 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
5309 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
5310 %res2 = add <8 x i32> %res, %res1
5314 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
5316 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5317 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
5319 ; CHECK-NEXT: kmovw %edi, %k1
5320 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
5321 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
5322 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5324 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
5325 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
5326 %res2 = add <16 x i32> %res, %res1
5327 ret <16 x i32> %res2
5330 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
5332 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
5333 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
5335 ; CHECK-NEXT: movzbl %dil, %eax
5336 ; CHECK-NEXT: kmovw %eax, %k1
5337 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1}
5338 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0
5339 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5341 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
5342 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
5343 %res2 = fadd <8 x double> %res, %res1
5344 ret <8 x double> %res2
5347 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
5349 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5350 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
5352 ; CHECK-NEXT: kmovw %edi, %k1
5353 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
5354 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
5355 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5357 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
5358 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
5359 %res2 = add <16 x i32> %res, %res1
5360 ret <16 x i32> %res2
5363 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
5365 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5366 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
5368 ; CHECK-NEXT: movzbl %dil, %eax
5369 ; CHECK-NEXT: kmovw %eax, %k1
5370 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
5371 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
5372 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5374 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
5375 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
5376 %res2 = add <8 x i32> %res, %res1
5380 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
5382 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
5383 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
5385 ; CHECK-NEXT: movzbl %dil, %eax
5386 ; CHECK-NEXT: kmovw %eax, %k1
5387 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
5388 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
5389 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5391 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
5392 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
5393 %res2 = fadd <8 x double> %res, %res1
5394 ret <8 x double> %res2
5398 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
5400 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
5401 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
5403 ; CHECK-NEXT: kmovw %edi, %k1
5404 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1}
5405 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
5406 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5408 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
5409 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
5410 %res2 = fadd <16 x float> %res, %res1
5411 ret <16 x float> %res2
5414 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
5416 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5417 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
5419 ; CHECK-NEXT: movzbl %dil, %eax
5420 ; CHECK-NEXT: kmovw %eax, %k1
5421 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
5422 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
5423 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5425 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
5426 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
5427 %res2 = add <8 x i32> %res, %res1
5431 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
5433 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5434 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
5436 ; CHECK-NEXT: kmovw %edi, %k1
5437 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
5438 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
5439 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5441 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
5442 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
5443 %res2 = add <16 x i32> %res, %res1
5444 ret <16 x i32> %res2
5447 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
5449 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5450 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
5452 ; CHECK-NEXT: kmovw %edi, %k1
5453 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
5454 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
5455 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5457 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
5458 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
5459 %res2 = add <16 x i32> %res, %res1
5460 ret <16 x i32> %res2
5464 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
5465 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
5466 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
5468 ; CHECK-NEXT: andl $1, %edi
5469 ; CHECK-NEXT: kmovw %edi, %k1
5470 ; CHECK-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1}
5471 ; CHECK-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
5472 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
5474 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
5475 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
5476 %res2 = fadd <4 x float> %res, %res1
5477 ret <4 x float> %res2
5480 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
5481 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
5482 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd:
5484 ; CHECK-NEXT: andl $1, %edi
5485 ; CHECK-NEXT: kmovw %edi, %k1
5486 ; CHECK-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
5487 ; CHECK-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
5488 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
5490 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
5491 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
5492 %res2 = fadd <2 x double> %res, %res1
5493 ret <2 x double> %res2
5496 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
5498 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
5499 ; CHECK-LABEL: test_getexp_ss:
5501 ; CHECK-NEXT: andl $1, %edi
5502 ; CHECK-NEXT: kmovw %edi, %k1
5503 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
5504 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
5505 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
5506 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
5507 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
5508 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
5509 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
5510 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
5512 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
5513 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
5514 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
5515 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
5517 %res.1 = fadd <4 x float> %res0, %res1
5518 %res.2 = fadd <4 x float> %res2, %res3
5519 %res = fadd <4 x float> %res.1, %res.2
5520 ret <4 x float> %res
5523 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
5525 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
5526 ; CHECK-LABEL: test_getexp_sd:
5528 ; CHECK-NEXT: andl $1, %edi
5529 ; CHECK-NEXT: kmovw %edi, %k1
5530 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
5531 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
5532 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
5533 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
5534 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
5535 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
5536 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
5537 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
5539 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
5540 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
5541 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
5542 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
5544 %res.1 = fadd <2 x double> %res0, %res1
5545 %res.2 = fadd <2 x double> %res2, %res3
5546 %res = fadd <2 x double> %res.1, %res.2
5547 ret <2 x double> %res
5550 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
5552 define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
5553 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
5555 ; CHECK-NEXT: andl $1, %edi
5556 ; CHECK-NEXT: kmovw %edi, %k1
5557 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
5558 ; CHECK-NEXT: kmovw %k0, %eax
5559 ; CHECK-NEXT: shlb $7, %al
5560 ; CHECK-NEXT: sarb $7, %al
5563 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
5567 define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
5568 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
5570 ; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
5571 ; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1
5572 ; CHECK-NEXT: korw %k0, %k1, %k0
5573 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1
5574 ; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2
5575 ; CHECK-NEXT: korw %k1, %k2, %k1
5576 ; CHECK-NEXT: andl $1, %edi
5577 ; CHECK-NEXT: kmovw %edi, %k2
5578 ; CHECK-NEXT: kandw %k2, %k1, %k1
5579 ; CHECK-NEXT: korw %k1, %k0, %k0
5580 ; CHECK-NEXT: kmovw %k0, %eax
5581 ; CHECK-NEXT: shlb $7, %al
5582 ; CHECK-NEXT: sarb $7, %al
5585 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
5586 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
5587 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
5588 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
5590 %res11 = or i8 %res1, %res2
5591 %res12 = or i8 %res3, %res4
5592 %res13 = or i8 %res11, %res12
5596 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
5598 define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
5599 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
5601 ; CHECK-NEXT: andl $1, %edi
5602 ; CHECK-NEXT: kmovw %edi, %k1
5603 ; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
5604 ; CHECK-NEXT: kmovw %k0, %eax
5605 ; CHECK-NEXT: shlb $7, %al
5606 ; CHECK-NEXT: sarb $7, %al
5609 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
5614 define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
5615 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
5617 ; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1
5618 ; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
5619 ; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1
5620 ; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
5621 ; CHECK-NEXT: andl $1, %edi
5622 ; CHECK-NEXT: kmovw %edi, %k2
5623 ; CHECK-NEXT: kandw %k2, %k1, %k1
5624 ; CHECK-NEXT: kandw %k1, %k0, %k0
5625 ; CHECK-NEXT: kmovw %k0, %eax
5626 ; CHECK-NEXT: shlb $7, %al
5627 ; CHECK-NEXT: sarb $7, %al
5629 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
5630 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
5631 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
5632 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
5634 %res11 = and i8 %res1, %res2
5635 %res12 = and i8 %res3, %res4
5636 %res13 = and i8 %res11, %res12
5640 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
5642 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
5643 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
5645 ; CHECK-NEXT: kmovw %edi, %k1
5646 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5647 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5648 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5650 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
5651 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
5652 %res2 = fadd <16 x float> %res, %res1
5653 ret <16 x float> %res2
5656 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
5658 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
5659 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
5661 ; CHECK-NEXT: movzbl %dil, %eax
5662 ; CHECK-NEXT: kmovw %eax, %k1
5663 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5664 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5665 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5666 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
5667 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
5669 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
5670 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
5671 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
5673 %res3 = fadd <8 x double> %res, %res1
5674 %res4 = fadd <8 x double> %res3, %res2
5675 ret <8 x double> %res4
5678 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
5680 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
5681 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
5683 ; CHECK-NEXT: kmovw %edi, %k1
5684 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5685 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5686 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
5688 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
5689 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
5690 %res2 = add <16 x i32> %res, %res1
5691 ret <16 x i32> %res2
5694 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
5696 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
5697 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
5699 ; CHECK-NEXT: movzbl %dil, %eax
5700 ; CHECK-NEXT: kmovw %eax, %k1
5701 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5702 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5703 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
5705 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
5706 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
5707 %res2 = add <8 x i64> %res, %res1
5711 declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
5713 define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
5714 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512:
5716 ; CHECK-NEXT: movzbl %dil, %eax
5717 ; CHECK-NEXT: kmovw %eax, %k1
5718 ; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1}
5719 ; CHECK-NEXT: vgetmantpd $11,{sae}, %zmm0, %zmm0
5720 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5722 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
5723 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
5724 %res2 = fadd <8 x double> %res, %res1
5725 ret <8 x double> %res2
5728 declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
5730 define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
5731 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512:
5733 ; CHECK-NEXT: kmovw %edi, %k1
5734 ; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1}
5735 ; CHECK-NEXT: vgetmantps $11,{sae}, %zmm0, %zmm0
5736 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5738 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
5739 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
5740 %res2 = fadd <16 x float> %res, %res1
5741 ret <16 x float> %res2
5744 declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
5746 define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5747 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
5749 ; CHECK-NEXT: andl $1, %edi
5750 ; CHECK-NEXT: kmovw %edi, %k1
5751 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
5752 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
5753 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
5754 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
5755 ; CHECK-NEXT: vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1}
5756 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
5757 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
5758 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
5760 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
5761 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
5762 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8)
5763 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4)
5764 %res11 = fadd <2 x double> %res, %res1
5765 %res12 = fadd <2 x double> %res2, %res3
5766 %res13 = fadd <2 x double> %res11, %res12
5767 ret <2 x double> %res13
5770 declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
5772 define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5773 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
5775 ; CHECK-NEXT: andl $1, %edi
5776 ; CHECK-NEXT: kmovw %edi, %k1
5777 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
5778 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
5779 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4
5780 ; CHECK-NEXT: vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0
5781 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
5782 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
5783 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
5785 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
5786 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
5787 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8)
5788 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4)
5789 %res11 = fadd <4 x float> %res, %res1
5790 %res12 = fadd <4 x float> %res2, %res3
5791 %res13 = fadd <4 x float> %res11, %res12
5792 ret <4 x float> %res13
5795 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
5797 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
5798 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
5800 ; CHECK-NEXT: movzbl %dil, %eax
5801 ; CHECK-NEXT: kmovw %eax, %k1
5802 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6]
5803 ; CHECK-NEXT: vshufpd {{.*#+}} zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6]
5804 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
5805 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
5806 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
5808 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
5809 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
5810 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
5812 %res3 = fadd <8 x double> %res, %res1
5813 %res4 = fadd <8 x double> %res3, %res2
5814 ret <8 x double> %res4
5817 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
5819 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
5820 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
5822 ; CHECK-NEXT: kmovw %edi, %k1
5823 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12]
5824 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
5825 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5827 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
5828 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
5829 %res2 = fadd <16 x float> %res, %res1
5830 ret <16 x float> %res2
5833 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
5835 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
5836 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
5838 ; CHECK-NEXT: movzbl %dil, %eax
5839 ; CHECK-NEXT: kmovw %eax, %k1
5840 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 = zmm1[0,1,3,2,5,4,6,6]
5841 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = k1[0,1,3,2,5,4,6,6]
5842 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6]
5843 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
5844 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5846 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
5847 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
5848 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
5849 %res3 = fadd <8 x double> %res, %res1
5850 %res4 = fadd <8 x double> %res3, %res2
5851 ret <8 x double> %res4
5854 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
5856 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
5857 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
5859 ; CHECK-NEXT: kmovw %edi, %k1
5860 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5861 ; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5862 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5863 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
5864 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5866 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
5867 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
5868 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
5869 %res3 = fadd <16 x float> %res, %res1
5870 %res4 = fadd <16 x float> %res3, %res2
5871 ret <16 x float> %res4
5874 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
5876 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
5877 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
5879 ; CHECK-NEXT: movzbl %dil, %eax
5880 ; CHECK-NEXT: kmovw %eax, %k1
5881 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
5882 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
5883 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
5884 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
5885 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
5887 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
5888 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
5889 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
5890 %res3 = fadd <8 x double> %res, %res1
5891 %res4 = fadd <8 x double> %res2, %res3
5892 ret <8 x double> %res4
5895 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
5897 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
5898 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
5900 ; CHECK-NEXT: kmovw %edi, %k1
5901 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
5902 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
5903 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
5904 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
5905 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
5907 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
5908 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
5909 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
5910 %res3 = fadd <16 x float> %res, %res1
5911 %res4 = fadd <16 x float> %res2, %res3
5912 ret <16 x float> %res4
5915 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
5917 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {
5918 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
5920 ; CHECK-NEXT: kmovw %edi, %k1
5921 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
5922 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
5923 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
5924 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5925 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
5927 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4)
5928 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1)
5929 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4)
5930 %res3 = fadd <16 x float> %res, %res1
5931 %res4 = fadd <16 x float> %res2, %res3
5932 ret <16 x float> %res4
5935 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8)
5937 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) {
5938 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
5940 ; CHECK-NEXT: kmovw %edi, %k1
5941 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
5942 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
5943 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
5944 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
5945 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
5947 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4)
5948 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1)
5949 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4)
5950 %res3 = add <16 x i32> %res, %res1
5951 %res4 = add <16 x i32> %res2, %res3
5952 ret <16 x i32> %res4
5955 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
5957 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
5958 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
5960 ; CHECK-NEXT: movzbl %dil, %eax
5961 ; CHECK-NEXT: kmovw %eax, %k1
5962 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
5963 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
5964 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
5965 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
5966 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
5968 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
5969 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
5970 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
5971 %res3 = fadd <8 x double> %res, %res1
5972 %res4 = fadd <8 x double> %res2, %res3
5973 ret <8 x double> %res4
5976 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
5978 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
5979 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
5981 ; CHECK-NEXT: movzbl %dil, %eax
5982 ; CHECK-NEXT: kmovw %eax, %k1
5983 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
5984 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
5985 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
5986 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
5987 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
5989 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
5990 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
5991 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
5992 %res3 = add <8 x i64> %res, %res1
5993 %res4 = add <8 x i64> %res2, %res3
5997 declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32)
5999 define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
6000 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
6002 ; CHECK-NEXT: andl $1, %edi
6003 ; CHECK-NEXT: kmovw %edi, %k1
6004 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
6005 ; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
6006 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
6008 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
6009 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
6010 %res2 = fadd <2 x double> %res, %res1
6011 ret <2 x double> %res2
6014 declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32)
6016 define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
6017 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
6019 ; CHECK-NEXT: andl $1, %edi
6020 ; CHECK-NEXT: kmovw %edi, %k1
6021 ; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
6022 ; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
6023 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
6025 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
6026 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
6027 %res2 = fadd <4 x float> %res, %res1
6028 ret <4 x float> %res2
6031 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
6033 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
6034 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
6036 ; CHECK-NEXT: kmovw %edi, %k1
6037 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6038 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
6039 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
6040 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
6042 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
6043 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
6044 %res2 = add <16 x i32> %res, %res1
6045 ret <16 x i32> %res2
6048 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
6050 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
6051 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
6053 ; CHECK-NEXT: kmovw %edi, %k1
6054 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6055 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
6056 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
6057 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
6059 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
6060 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
6061 %res2 = add <16 x i32> %res, %res1
6062 ret <16 x i32> %res2
6065 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
6067 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
6068 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
6070 ; CHECK-NEXT: movzbl %dil, %eax
6071 ; CHECK-NEXT: kmovw %eax, %k1
6072 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6073 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
6074 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
6075 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
6077 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
6078 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
6079 %res2 = add <8 x i64> %res, %res1
6083 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
6085 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
6086 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
6088 ; CHECK-NEXT: movzbl %dil, %eax
6089 ; CHECK-NEXT: kmovw %eax, %k1
6090 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6091 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
6092 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
6093 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
6095 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
6096 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
6097 %res2 = add <8 x i64> %res, %res1
6101 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
6103 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
6104 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512:
6106 ; CHECK-NEXT: kmovw %edi, %k1
6107 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
6108 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
6109 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
6110 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
6111 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
6113 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
6114 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
6115 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
6116 %res3 = fadd <16 x float> %res, %res1
6117 %res4 = fadd <16 x float> %res2, %res3
6118 ret <16 x float> %res4
6121 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
6123 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
6124 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512:
6126 ; CHECK-NEXT: kmovw %edi, %k1
6127 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
6128 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
6129 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
6130 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
6131 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
6133 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
6134 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
6135 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
6136 %res3 = fadd <16 x float> %res, %res1
6137 %res4 = fadd <16 x float> %res2, %res3
6138 ret <16 x float> %res4
6141 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
6143 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
6144 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512:
6146 ; CHECK-NEXT: movzbl %dil, %eax
6147 ; CHECK-NEXT: kmovw %eax, %k1
6148 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6]
6149 ; CHECK-NEXT: vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6]
6150 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
6151 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
6152 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
6154 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
6155 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
6156 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
6157 %res3 = fadd <8 x double> %res, %res1
6158 %res4 = fadd <8 x double> %res2, %res3
6159 ret <8 x double> %res4