1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
101 ; CHECK-LABEL: test_sqrt_pd_512
103 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
104 ret <8 x double> %res
106 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
108 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 ; CHECK-LABEL: test_sqrt_ps_512
111 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
112 ret <16 x float> %res
114 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
115 ; CHECK-LABEL: test_sqrt_round_ps_512
116 ; CHECK: vsqrtps {rz-sae}
117 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
118 ret <16 x float> %res
120 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
122 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
123 ; CHECK-LABEL: test_getexp_pd_512
125 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
126 ret <8 x double> %res
128 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
129 ; CHECK-LABEL: test_getexp_round_pd_512
130 ; CHECK: vgetexppd {sae}
131 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
132 ret <8 x double> %res
134 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
136 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
137 ; CHECK-LABEL: test_getexp_ps_512
139 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
140 ret <16 x float> %res
143 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
144 ; CHECK-LABEL: test_getexp_round_ps_512
145 ; CHECK: vgetexpps {sae}
146 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
147 ret <16 x float> %res
149 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
151 declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
153 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
154 ; CHECK-LABEL: test_sqrt_ss:
156 ; CHECK-NEXT: andl $1, %edi
157 ; CHECK-NEXT: kmovw %edi, %k1
158 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
159 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
160 ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
161 ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
162 ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
163 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
164 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
165 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
167 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
168 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
169 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
170 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
172 %res.1 = fadd <4 x float> %res0, %res1
173 %res.2 = fadd <4 x float> %res2, %res3
174 %res = fadd <4 x float> %res.1, %res.2
178 declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
180 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
181 ; CHECK-LABEL: test_sqrt_sd:
183 ; CHECK-NEXT: andl $1, %edi
184 ; CHECK-NEXT: kmovw %edi, %k1
185 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
186 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
187 ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
188 ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
189 ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
190 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
191 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
192 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
194 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
195 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
196 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
197 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
199 %res.1 = fadd <2 x double> %res0, %res1
200 %res.2 = fadd <2 x double> %res2, %res3
201 %res = fadd <2 x double> %res.1, %res.2
202 ret <2 x double> %res
205 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
206 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
207 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
210 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
212 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
213 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
214 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
215 ret <2 x double> %res
217 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
219 define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
220 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
221 ; CHECK: vcvttsd2si {sae}{{.*}}encoding: [0x62
222 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
223 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
224 %res2 = add i64 %res0, %res1
227 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
229 define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
230 ; CHECK: vcvttsd2usi {{.*}}encoding: [0x62
231 ; CHECK: vcvttsd2usi {sae}{{.*}}encoding: [0x62
232 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
233 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
234 %res2 = add i32 %res0, %res1
237 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
239 define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
240 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
241 ; CHECK: vcvttsd2si {sae}{{.*}}encoding: [0x62
242 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
243 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
244 %res2 = add i32 %res0, %res1
247 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
251 define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
252 ; CHECK: vcvttsd2usi {{.*}}encoding: [0x62
253 ; CHECK: vcvttsd2usi {sae}{{.*}}encoding: [0x62
254 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
255 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
256 %res2 = add i64 %res0, %res1
259 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
261 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
262 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
263 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
266 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
269 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
270 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
271 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
274 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
277 define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
278 ; CHECK: vcvttss2si {sae}{{.*}}encoding: [0x62
279 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
280 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
281 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
282 %res2 = add i32 %res0, %res1
285 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
287 define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
288 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
289 ; CHECK: vcvttss2si {sae}{{.*}}encoding: [0x62
290 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
291 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
292 %res2 = add i64 %res0, %res1
295 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
297 define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
298 ; CHECK: vcvttss2usi {sae}{{.*}}encoding: [0x62
299 ; CHECK: vcvttss2usi {{.*}}encoding: [0x62
300 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
301 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
302 %res2 = add i32 %res0, %res1
305 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
307 define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
308 ; CHECK: vcvttss2usi {{.*}}encoding: [0x62
309 ; CHECK: vcvttss2usi {sae}{{.*}}encoding: [0x62
310 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
311 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
312 %res2 = add i64 %res0, %res1
315 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
317 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
318 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
319 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
322 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
324 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
325 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
326 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
327 ret <16 x float> %res
329 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
332 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
333 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
334 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
338 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
340 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
341 ; CHECK: vbroadcastss
342 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
343 ret <16 x float> %res
345 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
347 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
348 ; CHECK: vbroadcastsd
349 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
350 ret <8 x double> %res
352 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
354 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
355 ; CHECK: vbroadcastss
356 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
357 ret <16 x float> %res
359 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
361 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
362 ; CHECK: vbroadcastsd
363 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
364 ret <8 x double> %res
366 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
368 define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
369 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
371 ; CHECK-NEXT: kmovw %edi, %k1
372 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1}
373 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 {%k1} {z}
374 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0
375 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
376 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
378 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
379 %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
380 %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
381 %res3 = add <16 x i32> %res, %res1
382 %res4 = add <16 x i32> %res2, %res3
385 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
387 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
388 ; CHECK: vpbroadcastd
389 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
392 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
394 define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
395 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
397 ; CHECK-NEXT: movzbl %dil, %eax
398 ; CHECK-NEXT: kmovw %eax, %k1
399 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1}
400 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 {%k1} {z}
401 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0
402 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
403 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
405 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
406 %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
407 %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
408 %res3 = add <8 x i64> %res, %res1
409 %res4 = add <8 x i64> %res2, %res3
412 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
414 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
415 ; CHECK: vpbroadcastq
416 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
419 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
421 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
422 ; CHECK-LABEL: test_conflict_d:
424 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0
425 ; CHECK-NEXT: retq ## encoding: [0xc3]
426 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
430 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
432 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
433 ; CHECK-LABEL: test_conflict_q:
435 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
437 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
441 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
443 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
444 ; CHECK-LABEL: test_maskz_conflict_d:
446 ; CHECK-NEXT: kmovw %edi, %k1
447 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
449 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
453 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
454 ; CHECK-LABEL: test_mask_conflict_q:
456 ; CHECK-NEXT: movzbl %dil, %eax
457 ; CHECK-NEXT: kmovw %eax, %k1
458 ; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
459 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
461 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
465 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
466 ; CHECK-LABEL: test_lzcnt_d:
468 ; CHECK-NEXT: vplzcntd %zmm0, %zmm0
470 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
474 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
476 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
477 ; CHECK-LABEL: test_lzcnt_q:
479 ; CHECK-NEXT: vplzcntq %zmm0, %zmm0
481 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
485 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
488 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
489 ; CHECK-LABEL: test_mask_lzcnt_d:
491 ; CHECK-NEXT: kmovw %edi, %k1
492 ; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
493 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
495 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
499 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
500 ; CHECK-LABEL: test_mask_lzcnt_q:
502 ; CHECK-NEXT: movzbl %dil, %eax
503 ; CHECK-NEXT: kmovw %eax, %k1
504 ; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
505 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
506 ; CHECK-NEXT: retq ## encoding: [0xc3]
507 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
511 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
512 ; CHECK: vblendmps %zmm1, %zmm0
513 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
514 ret <16 x float> %res
517 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
519 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
520 ; CHECK: vblendmpd %zmm1, %zmm0
521 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
522 ret <8 x double> %res
525 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
526 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
527 ; CHECK: vblendmpd (%
528 %b = load <8 x double>, <8 x double>* %ptr
529 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
530 ret <8 x double> %res
532 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
534 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
536 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
539 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
541 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
543 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
546 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
548 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
549 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
550 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
553 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
555 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
556 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
557 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
560 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
563 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
565 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
566 <8 x double>zeroinitializer, i8 -1, i32 4)
567 ret <8 x double> %res
569 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
570 <8 x double>, i8, i32)
572 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
574 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
575 <8 x double>zeroinitializer, i8 -1, i32 4)
576 ret <8 x double> %res
578 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
579 <8 x double>, i8, i32)
581 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
583 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512
586 ; CHECK: vpabsd{{.*}}{%k1}
587 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
588 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
589 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
590 %res2 = add <16 x i32> %res, %res1
594 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
596 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512
599 ; CHECK: vpabsq{{.*}}{%k1}
600 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
601 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
602 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
603 %res2 = add <8 x i64> %res, %res1
607 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
608 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
609 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
612 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
614 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
615 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
616 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
619 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
621 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
622 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
623 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
627 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
629 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
630 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
631 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
635 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
637 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
638 ; CHECK-LABEL: test_mask_store_aligned_ps:
640 ; CHECK-NEXT: kmovw %esi, %k1
641 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
643 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
647 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
649 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
650 ; CHECK-LABEL: test_mask_store_aligned_pd:
652 ; CHECK-NEXT: kmovw %esi, %k1
653 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
655 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
659 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
661 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
662 ; CHECK-LABEL: test_maskz_load_aligned_ps:
664 ; CHECK-NEXT: kmovw %esi, %k1
665 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
667 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
668 ret <16 x float> %res
671 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
673 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
674 ; CHECK-LABEL: test_maskz_load_aligned_pd:
676 ; CHECK-NEXT: kmovw %esi, %k1
677 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
679 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
680 ret <8 x double> %res
683 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
685 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
686 ; CHECK-LABEL: test_load_aligned_ps:
688 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
690 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
691 ret <16 x float> %res
694 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
695 ; CHECK-LABEL: test_load_aligned_pd:
697 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
699 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
700 ret <8 x double> %res
703 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
705 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
706 ; CHECK-LABEL: test_valign_q:
707 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
708 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
712 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
713 ; CHECK-LABEL: test_mask_valign_q:
714 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
715 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
719 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
721 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
722 ; CHECK-LABEL: test_maskz_valign_d:
723 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
724 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
728 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
730 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
731 ; CHECK-LABEL: test_mask_store_ss
732 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
733 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
737 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
739 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
740 ; CHECK-LABEL: test_pcmpeq_d
741 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
742 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
746 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
747 ; CHECK-LABEL: test_mask_pcmpeq_d
748 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
749 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
753 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
755 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
756 ; CHECK-LABEL: test_pcmpeq_q
757 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
758 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
762 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
763 ; CHECK-LABEL: test_mask_pcmpeq_q
764 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
765 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
769 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
771 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
772 ; CHECK-LABEL: test_pcmpgt_d
773 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
774 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
778 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
779 ; CHECK-LABEL: test_mask_pcmpgt_d
780 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
781 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
785 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
787 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
788 ; CHECK-LABEL: test_pcmpgt_q
789 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
790 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
794 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
795 ; CHECK-LABEL: test_mask_pcmpgt_q
796 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
797 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
801 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
803 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
804 ; CHECK_LABEL: test_cmp_d_512
805 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
806 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
807 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
808 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
809 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
810 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
811 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
812 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
813 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
814 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
815 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
816 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
817 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
818 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
819 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
820 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
821 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
822 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
823 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
824 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
825 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
826 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
827 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
828 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
832 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
833 ; CHECK_LABEL: test_mask_cmp_d_512
834 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
835 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
836 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
837 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
838 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
839 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
840 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
841 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
842 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
843 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
844 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
845 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
846 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
847 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
848 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
849 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
850 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
851 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
852 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
853 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
854 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
855 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
856 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
857 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
861 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
863 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
864 ; CHECK_LABEL: test_ucmp_d_512
865 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
866 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
867 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
868 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
869 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
870 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
871 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
872 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
873 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
874 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
875 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
876 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
877 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
878 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
879 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
880 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
881 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
882 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
883 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
884 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
885 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
886 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
887 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
888 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
892 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
893 ; CHECK_LABEL: test_mask_ucmp_d_512
894 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
895 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
896 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
897 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
898 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
899 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
900 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
901 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
902 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
903 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
904 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
905 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
906 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
907 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
908 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
909 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
910 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
911 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
912 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
913 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
914 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
915 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
916 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
917 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
921 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
923 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
924 ; CHECK_LABEL: test_cmp_q_512
925 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
926 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
927 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
928 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
929 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
930 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
931 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
932 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
933 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
934 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
935 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
936 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
937 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
938 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
939 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
940 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
941 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
942 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
943 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
944 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
945 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
946 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
947 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
948 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
952 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
953 ; CHECK_LABEL: test_mask_cmp_q_512
954 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
955 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
956 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
957 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
958 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
959 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
960 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
961 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
962 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
963 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
964 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
965 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
966 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
967 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
968 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
969 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
970 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
971 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
972 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
973 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
974 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
975 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
976 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
977 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
981 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
983 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
984 ; CHECK_LABEL: test_ucmp_q_512
985 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
986 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
987 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
988 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
989 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
990 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
991 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
992 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
993 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
994 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
995 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
996 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
997 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
998 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
999 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1000 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
1001 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
1002 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1003 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
1004 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
1005 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1006 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
1007 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
1008 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1012 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1013 ; CHECK_LABEL: test_mask_ucmp_q_512
1014 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
1015 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
1016 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1017 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
1018 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
1019 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1020 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
1021 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
1022 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1023 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
1024 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1025 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1026 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
1027 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1028 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1029 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
1030 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1031 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1032 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
1033 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1034 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1035 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
1036 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1037 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1041 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1043 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
1044 ; CHECK-LABEL: test_mask_vextractf32x4:
1045 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
1046 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
1047 ret <4 x float> %res
1050 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
1052 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
1053 ; CHECK-LABEL: test_mask_vextracti64x4:
1054 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
1055 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask)
1059 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
1061 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
1062 ; CHECK-LABEL: test_maskz_vextracti32x4:
1063 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
1064 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
1068 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
1070 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
1071 ; CHECK-LABEL: test_vextractf64x4:
1072 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
1073 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1)
1074 ret <4 x double> %res
1077 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
1079 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1080 ; CHECK-LABEL: test_x86_avx512_pslli_d
1082 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1086 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1087 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
1088 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
1089 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1093 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1094 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
1095 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1096 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1100 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1102 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1103 ; CHECK-LABEL: test_x86_avx512_pslli_q
1105 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1109 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1110 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
1111 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
1112 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1116 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1117 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
1118 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1119 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1123 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1125 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1126 ; CHECK-LABEL: test_x86_avx512_psrli_d
1128 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1132 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1133 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1134 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1135 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1139 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1140 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1141 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1142 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1146 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1148 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1149 ; CHECK-LABEL: test_x86_avx512_psrli_q
1151 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1155 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1156 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1157 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1158 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1162 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1163 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1164 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1165 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1169 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1171 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1172 ; CHECK-LABEL: test_x86_avx512_psrai_d
1174 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1178 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1179 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1180 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1181 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1185 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1186 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1187 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1188 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1192 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1194 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1195 ; CHECK-LABEL: test_x86_avx512_psrai_q
1197 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1201 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1202 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1203 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1204 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1208 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1209 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1210 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1211 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1215 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1217 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1218 ; CHECK-LABEL: test_x86_avx512_psll_d
1220 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1224 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1225 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1226 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1227 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1231 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1232 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1233 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1234 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1238 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1240 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1241 ; CHECK-LABEL: test_x86_avx512_psll_q
1243 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1247 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1248 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1249 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1250 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1254 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1255 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1256 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1257 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1261 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1263 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1264 ; CHECK-LABEL: test_x86_avx512_psrl_d
1266 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1270 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1271 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1272 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1273 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1277 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1278 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1279 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1280 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1284 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1286 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1287 ; CHECK-LABEL: test_x86_avx512_psrl_q
1289 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1293 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1294 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1295 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1296 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1300 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1301 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1302 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1303 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1307 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1309 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1310 ; CHECK-LABEL: test_x86_avx512_psra_d
1312 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1316 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1317 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1318 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1319 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1323 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1324 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1325 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1326 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1330 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1332 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1333 ; CHECK-LABEL: test_x86_avx512_psra_q
1335 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1339 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1340 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1341 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1342 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1346 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1347 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1348 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1349 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1353 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1355 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1356 ; CHECK-LABEL: test_x86_avx512_psllv_d
1358 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1362 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1363 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1364 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1365 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1369 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1370 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1371 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1372 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1376 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1378 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1379 ; CHECK-LABEL: test_x86_avx512_psllv_q
1381 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1385 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1386 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1387 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1388 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1392 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1393 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1394 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1395 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1399 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1402 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1403 ; CHECK-LABEL: test_x86_avx512_psrav_d
1405 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1409 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1410 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1411 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1412 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1416 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1417 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1418 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1419 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1423 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1425 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1426 ; CHECK-LABEL: test_x86_avx512_psrav_q
1428 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1432 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1433 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1434 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1435 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1439 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1440 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1441 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1442 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1446 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1448 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1449 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1451 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1455 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1456 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1457 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1458 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1462 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1463 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1464 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1465 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1469 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1471 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1472 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1474 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1478 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1479 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1480 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1481 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1485 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1486 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1487 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1488 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1492 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1494 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1495 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1497 %b = load <8 x i64>, <8 x i64>* %ptr
1498 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1502 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1503 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1504 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1506 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1507 ; CHECK-LABEL: test_vsubps_rn
1508 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
1509 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1510 <16 x float> zeroinitializer, i16 -1, i32 0)
1511 ret <16 x float> %res
1514 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1515 ; CHECK-LABEL: test_vsubps_rd
1516 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
1517 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1518 <16 x float> zeroinitializer, i16 -1, i32 1)
1519 ret <16 x float> %res
1522 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1523 ; CHECK-LABEL: test_vsubps_ru
1524 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
1525 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1526 <16 x float> zeroinitializer, i16 -1, i32 2)
1527 ret <16 x float> %res
1530 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1531 ; CHECK-LABEL: test_vsubps_rz
1532 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
1533 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1534 <16 x float> zeroinitializer, i16 -1, i32 3)
1535 ret <16 x float> %res
1538 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1539 ; CHECK-LABEL: test_vmulps_rn
1540 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
1541 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1542 <16 x float> zeroinitializer, i16 -1, i32 0)
1543 ret <16 x float> %res
1546 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1547 ; CHECK-LABEL: test_vmulps_rd
1548 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
1549 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1550 <16 x float> zeroinitializer, i16 -1, i32 1)
1551 ret <16 x float> %res
1554 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1555 ; CHECK-LABEL: test_vmulps_ru
1556 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
1557 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1558 <16 x float> zeroinitializer, i16 -1, i32 2)
1559 ret <16 x float> %res
1562 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1563 ; CHECK-LABEL: test_vmulps_rz
1564 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
1565 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1566 <16 x float> zeroinitializer, i16 -1, i32 3)
1567 ret <16 x float> %res
1571 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1572 ; CHECK-LABEL: test_vmulps_mask_rn
1573 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
1574 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1575 <16 x float> zeroinitializer, i16 %mask, i32 0)
1576 ret <16 x float> %res
1579 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1580 ; CHECK-LABEL: test_vmulps_mask_rd
1581 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
1582 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1583 <16 x float> zeroinitializer, i16 %mask, i32 1)
1584 ret <16 x float> %res
1587 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1588 ; CHECK-LABEL: test_vmulps_mask_ru
1589 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
1590 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1591 <16 x float> zeroinitializer, i16 %mask, i32 2)
1592 ret <16 x float> %res
1595 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1596 ; CHECK-LABEL: test_vmulps_mask_rz
1597 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
1598 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1599 <16 x float> zeroinitializer, i16 %mask, i32 3)
1600 ret <16 x float> %res
1603 ;; With Passthru value
1604 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1605 ; CHECK-LABEL: test_vmulps_mask_passthru_rn
1606 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
1607 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1608 <16 x float> %passthru, i16 %mask, i32 0)
1609 ret <16 x float> %res
1612 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1613 ; CHECK-LABEL: test_vmulps_mask_passthru_rd
1614 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
1615 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1616 <16 x float> %passthru, i16 %mask, i32 1)
1617 ret <16 x float> %res
1620 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1621 ; CHECK-LABEL: test_vmulps_mask_passthru_ru
1622 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
1623 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1624 <16 x float> %passthru, i16 %mask, i32 2)
1625 ret <16 x float> %res
1628 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1629 ; CHECK-LABEL: test_vmulps_mask_passthru_rz
1630 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
1631 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1632 <16 x float> %passthru, i16 %mask, i32 3)
1633 ret <16 x float> %res
1637 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1638 ; CHECK-LABEL: test_vmulpd_mask_rn
1639 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
1640 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1641 <8 x double> zeroinitializer, i8 %mask, i32 0)
1642 ret <8 x double> %res
1645 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1646 ; CHECK-LABEL: test_vmulpd_mask_rd
1647 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
1648 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1649 <8 x double> zeroinitializer, i8 %mask, i32 1)
1650 ret <8 x double> %res
1653 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1654 ; CHECK-LABEL: test_vmulpd_mask_ru
1655 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
1656 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1657 <8 x double> zeroinitializer, i8 %mask, i32 2)
1658 ret <8 x double> %res
1661 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1662 ; CHECK-LABEL: test_vmulpd_mask_rz
1663 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
1664 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1665 <8 x double> zeroinitializer, i8 %mask, i32 3)
1666 ret <8 x double> %res
1669 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1670 ;CHECK-LABEL: test_xor_epi32
1671 ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1672 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1673 ret < 16 x i32> %res
1676 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1677 ;CHECK-LABEL: test_mask_xor_epi32
1678 ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1679 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1680 ret < 16 x i32> %res
1683 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1685 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1686 ;CHECK-LABEL: test_or_epi32
1687 ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1688 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1689 ret < 16 x i32> %res
1692 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1693 ;CHECK-LABEL: test_mask_or_epi32
1694 ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1695 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1696 ret < 16 x i32> %res
1699 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1701 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1702 ;CHECK-LABEL: test_and_epi32
1703 ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1704 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1705 ret < 16 x i32> %res
1708 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1709 ;CHECK-LABEL: test_mask_and_epi32
1710 ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1711 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1712 ret < 16 x i32> %res
1715 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1717 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1718 ;CHECK-LABEL: test_xor_epi64
1719 ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1720 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1724 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1725 ;CHECK-LABEL: test_mask_xor_epi64
1726 ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1727 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1731 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1733 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1734 ;CHECK-LABEL: test_or_epi64
1735 ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1736 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1740 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1741 ;CHECK-LABEL: test_mask_or_epi64
1742 ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1743 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1747 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1749 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
1750 ;CHECK-LABEL: test_and_epi64
1751 ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
1752 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1756 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1757 ;CHECK-LABEL: test_mask_and_epi64
1758 ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1759 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1763 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1766 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1767 ;CHECK-LABEL: test_mask_add_epi32_rr
1768 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
1769 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1770 ret < 16 x i32> %res
1773 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1774 ;CHECK-LABEL: test_mask_add_epi32_rrk
1775 ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1776 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1777 ret < 16 x i32> %res
1780 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1781 ;CHECK-LABEL: test_mask_add_epi32_rrkz
1782 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1783 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1784 ret < 16 x i32> %res
1787 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1788 ;CHECK-LABEL: test_mask_add_epi32_rm
1789 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
1790 %b = load <16 x i32>, <16 x i32>* %ptr_b
1791 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1792 ret < 16 x i32> %res
1795 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1796 ;CHECK-LABEL: test_mask_add_epi32_rmk
1797 ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
1798 %b = load <16 x i32>, <16 x i32>* %ptr_b
1799 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1800 ret < 16 x i32> %res
1803 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1804 ;CHECK-LABEL: test_mask_add_epi32_rmkz
1805 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
1806 %b = load <16 x i32>, <16 x i32>* %ptr_b
1807 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1808 ret < 16 x i32> %res
1811 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1812 ;CHECK-LABEL: test_mask_add_epi32_rmb
1813 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
1814 %q = load i32, i32* %ptr_b
1815 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1816 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1817 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1818 ret < 16 x i32> %res
1821 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1822 ;CHECK-LABEL: test_mask_add_epi32_rmbk
1823 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
1824 %q = load i32, i32* %ptr_b
1825 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1826 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1827 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1828 ret < 16 x i32> %res
1831 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1832 ;CHECK-LABEL: test_mask_add_epi32_rmbkz
1833 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
1834 %q = load i32, i32* %ptr_b
1835 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1836 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1837 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1838 ret < 16 x i32> %res
1841 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1843 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1844 ;CHECK-LABEL: test_mask_sub_epi32_rr
1845 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
1846 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1847 ret < 16 x i32> %res
1850 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1851 ;CHECK-LABEL: test_mask_sub_epi32_rrk
1852 ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
1853 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1854 ret < 16 x i32> %res
1857 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1858 ;CHECK-LABEL: test_mask_sub_epi32_rrkz
1859 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
1860 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1861 ret < 16 x i32> %res
1864 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1865 ;CHECK-LABEL: test_mask_sub_epi32_rm
1866 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
1867 %b = load <16 x i32>, <16 x i32>* %ptr_b
1868 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1869 ret < 16 x i32> %res
1872 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1873 ;CHECK-LABEL: test_mask_sub_epi32_rmk
1874 ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
1875 %b = load <16 x i32>, <16 x i32>* %ptr_b
1876 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1877 ret < 16 x i32> %res
1880 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1881 ;CHECK-LABEL: test_mask_sub_epi32_rmkz
1882 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
1883 %b = load <16 x i32>, <16 x i32>* %ptr_b
1884 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1885 ret < 16 x i32> %res
1888 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1889 ;CHECK-LABEL: test_mask_sub_epi32_rmb
1890 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
1891 %q = load i32, i32* %ptr_b
1892 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1893 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1894 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1895 ret < 16 x i32> %res
1898 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1899 ;CHECK-LABEL: test_mask_sub_epi32_rmbk
1900 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
1901 %q = load i32, i32* %ptr_b
1902 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1903 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1904 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1905 ret < 16 x i32> %res
1908 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1909 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
1910 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
1911 %q = load i32, i32* %ptr_b
1912 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1913 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1914 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1915 ret < 16 x i32> %res
1918 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1920 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1921 ;CHECK-LABEL: test_mask_add_epi64_rr
1922 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
1923 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1927 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1928 ;CHECK-LABEL: test_mask_add_epi64_rrk
1929 ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
1930 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1934 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1935 ;CHECK-LABEL: test_mask_add_epi64_rrkz
1936 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
1937 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1941 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1942 ;CHECK-LABEL: test_mask_add_epi64_rm
1943 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
1944 %b = load <8 x i64>, <8 x i64>* %ptr_b
1945 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1949 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1950 ;CHECK-LABEL: test_mask_add_epi64_rmk
1951 ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
1952 %b = load <8 x i64>, <8 x i64>* %ptr_b
1953 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1957 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1958 ;CHECK-LABEL: test_mask_add_epi64_rmkz
1959 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
1960 %b = load <8 x i64>, <8 x i64>* %ptr_b
1961 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1965 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1966 ;CHECK-LABEL: test_mask_add_epi64_rmb
1967 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
1968 %q = load i64, i64* %ptr_b
1969 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1970 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1971 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1975 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1976 ;CHECK-LABEL: test_mask_add_epi64_rmbk
1977 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
1978 %q = load i64, i64* %ptr_b
1979 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1980 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1981 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1985 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1986 ;CHECK-LABEL: test_mask_add_epi64_rmbkz
1987 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
1988 %q = load i64, i64* %ptr_b
1989 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1990 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1991 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1995 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1997 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1998 ;CHECK-LABEL: test_mask_sub_epi64_rr
1999 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
2000 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2004 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2005 ;CHECK-LABEL: test_mask_sub_epi64_rrk
2006 ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
2007 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2011 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2012 ;CHECK-LABEL: test_mask_sub_epi64_rrkz
2013 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
2014 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2018 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2019 ;CHECK-LABEL: test_mask_sub_epi64_rm
2020 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
2021 %b = load <8 x i64>, <8 x i64>* %ptr_b
2022 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2026 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2027 ;CHECK-LABEL: test_mask_sub_epi64_rmk
2028 ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
2029 %b = load <8 x i64>, <8 x i64>* %ptr_b
2030 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2034 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2035 ;CHECK-LABEL: test_mask_sub_epi64_rmkz
2036 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
2037 %b = load <8 x i64>, <8 x i64>* %ptr_b
2038 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2042 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2043 ;CHECK-LABEL: test_mask_sub_epi64_rmb
2044 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
2045 %q = load i64, i64* %ptr_b
2046 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2047 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2048 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2052 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2053 ;CHECK-LABEL: test_mask_sub_epi64_rmbk
2054 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
2055 %q = load i64, i64* %ptr_b
2056 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2057 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2058 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2062 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2063 ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
2064 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
2065 %q = load i64, i64* %ptr_b
2066 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2067 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2068 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2072 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2074 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2075 ;CHECK-LABEL: test_mask_mul_epi32_rr
2076 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
2077 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2081 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2082 ;CHECK-LABEL: test_mask_mul_epi32_rrk
2083 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
2084 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2088 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2089 ;CHECK-LABEL: test_mask_mul_epi32_rrkz
2090 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
2091 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2095 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2096 ;CHECK-LABEL: test_mask_mul_epi32_rm
2097 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
2098 %b = load <16 x i32>, <16 x i32>* %ptr_b
2099 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2103 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2104 ;CHECK-LABEL: test_mask_mul_epi32_rmk
2105 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
2106 %b = load <16 x i32>, <16 x i32>* %ptr_b
2107 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2111 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2112 ;CHECK-LABEL: test_mask_mul_epi32_rmkz
2113 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
2114 %b = load <16 x i32>, <16 x i32>* %ptr_b
2115 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2119 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
2120 ;CHECK-LABEL: test_mask_mul_epi32_rmb
2121 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
2122 %q = load i64, i64* %ptr_b
2123 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2124 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2125 %b = bitcast <8 x i64> %b64 to <16 x i32>
2126 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2130 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2131 ;CHECK-LABEL: test_mask_mul_epi32_rmbk
2132 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
2133 %q = load i64, i64* %ptr_b
2134 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2135 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2136 %b = bitcast <8 x i64> %b64 to <16 x i32>
2137 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2141 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2142 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
2143 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
2144 %q = load i64, i64* %ptr_b
2145 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2146 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2147 %b = bitcast <8 x i64> %b64 to <16 x i32>
2148 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2152 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2154 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
2155 ;CHECK-LABEL: test_mask_mul_epu32_rr
2156 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
2157 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2161 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2162 ;CHECK-LABEL: test_mask_mul_epu32_rrk
2163 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
2164 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2168 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2169 ;CHECK-LABEL: test_mask_mul_epu32_rrkz
2170 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
2171 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2175 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2176 ;CHECK-LABEL: test_mask_mul_epu32_rm
2177 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
2178 %b = load <16 x i32>, <16 x i32>* %ptr_b
2179 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2183 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2184 ;CHECK-LABEL: test_mask_mul_epu32_rmk
2185 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
2186 %b = load <16 x i32>, <16 x i32>* %ptr_b
2187 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2191 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2192 ;CHECK-LABEL: test_mask_mul_epu32_rmkz
2193 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
2194 %b = load <16 x i32>, <16 x i32>* %ptr_b
2195 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2199 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
2200 ;CHECK-LABEL: test_mask_mul_epu32_rmb
2201 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
2202 %q = load i64, i64* %ptr_b
2203 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2204 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2205 %b = bitcast <8 x i64> %b64 to <16 x i32>
2206 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2210 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2211 ;CHECK-LABEL: test_mask_mul_epu32_rmbk
2212 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
2213 %q = load i64, i64* %ptr_b
2214 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2215 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2216 %b = bitcast <8 x i64> %b64 to <16 x i32>
2217 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2221 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2222 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
2223 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
2224 %q = load i64, i64* %ptr_b
2225 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2226 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2227 %b = bitcast <8 x i64> %b64 to <16 x i32>
2228 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2232 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2234 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2235 ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
2236 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2237 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2241 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2242 ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
2243 ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2244 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2245 ret < 16 x i32> %res
2248 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2249 ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
2250 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2251 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2252 ret < 16 x i32> %res
2255 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2256 ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
2257 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2258 %b = load <16 x i32>, <16 x i32>* %ptr_b
2259 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2260 ret < 16 x i32> %res
2263 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2264 ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
2265 ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2266 %b = load <16 x i32>, <16 x i32>* %ptr_b
2267 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2268 ret < 16 x i32> %res
2271 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2272 ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
2273 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2274 %b = load <16 x i32>, <16 x i32>* %ptr_b
2275 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2276 ret < 16 x i32> %res
2279 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2280 ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
2281 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2282 %q = load i32, i32* %ptr_b
2283 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2284 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2285 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2286 ret < 16 x i32> %res
2289 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2290 ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
2291 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2292 %q = load i32, i32* %ptr_b
2293 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2294 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2295 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2296 ret < 16 x i32> %res
2299 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2300 ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
2301 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2302 %q = load i32, i32* %ptr_b
2303 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2304 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2305 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2306 ret < 16 x i32> %res
2309 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2311 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2312 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
2313 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2314 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2315 ret <16 x float> %res
2317 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2318 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
2319 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2320 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2321 ret <16 x float> %res
2323 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2324 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
2325 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2326 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2327 ret <16 x float> %res
2330 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2331 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
2332 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2333 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2334 ret <16 x float> %res
2338 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2339 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
2340 ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
2341 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2342 ret <16 x float> %res
2345 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2346 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
2347 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2348 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2349 ret <16 x float> %res
2351 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2352 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
2353 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2354 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2355 ret <16 x float> %res
2357 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2358 ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
2359 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2360 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2361 ret <16 x float> %res
2364 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2365 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
2366 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2367 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2368 ret <16 x float> %res
2372 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2373 ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
2374 ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
2375 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2376 ret <16 x float> %res
2380 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2381 ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
2382 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
2383 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2384 ret <16 x float> %res
2386 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2387 ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
2388 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2389 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2390 ret <16 x float> %res
2392 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2393 ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
2394 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
2395 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2396 ret <16 x float> %res
2399 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2400 ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
2401 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
2402 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2403 ret <16 x float> %res
2406 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2407 ;CHECK-LABEL: test_mm512_add_round_ps_current
2408 ;CHECK: vaddps %zmm1, %zmm0, %zmm0
2409 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2410 ret <16 x float> %res
2412 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2414 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2415 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
2416 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2417 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2418 ret <16 x float> %res
2420 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2421 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
2422 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2423 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2424 ret <16 x float> %res
2426 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2427 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
2428 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2429 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2430 ret <16 x float> %res
2433 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2434 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
2435 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2436 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2437 ret <16 x float> %res
2441 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2442 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
2443 ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
2444 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2445 ret <16 x float> %res
2448 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2449 ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
2450 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2451 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2452 ret <16 x float> %res
2454 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2455 ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
2456 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2457 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2458 ret <16 x float> %res
2460 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2461 ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
2462 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2463 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2464 ret <16 x float> %res
2467 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2468 ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
2469 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2470 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2471 ret <16 x float> %res
2474 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2475 ;CHECK-LABEL: test_mm512_sub_round_ps_current
2476 ;CHECK: vsubps %zmm1, %zmm0, %zmm0
2477 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2478 ret <16 x float> %res
2481 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2482 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
2483 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2484 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2485 ret <16 x float> %res
2487 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2488 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
2489 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2490 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2491 ret <16 x float> %res
2493 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2494 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
2495 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2496 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2497 ret <16 x float> %res
2500 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2501 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
2502 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2503 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2504 ret <16 x float> %res
2508 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2509 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
2510 ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
2511 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2512 ret <16 x float> %res
2515 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2516 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
2517 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2518 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2519 ret <16 x float> %res
2521 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2522 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
2523 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2524 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2525 ret <16 x float> %res
2527 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2528 ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
2529 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2530 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2531 ret <16 x float> %res
2534 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2535 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
2536 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2537 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2538 ret <16 x float> %res
2542 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2543 ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
2544 ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
2545 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2546 ret <16 x float> %res
2550 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2551 ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
2552 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
2553 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2554 ret <16 x float> %res
2556 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2557 ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
2558 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2559 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2560 ret <16 x float> %res
2562 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2563 ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
2564 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
2565 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2566 ret <16 x float> %res
2569 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2570 ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
2571 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
2572 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2573 ret <16 x float> %res
2576 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2577 ;CHECK-LABEL: test_mm512_div_round_ps_current
2578 ;CHECK: vdivps %zmm1, %zmm0, %zmm0
2579 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2580 ret <16 x float> %res
2582 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2584 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2585 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
2586 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2587 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2588 ret <16 x float> %res
2591 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2592 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
2593 ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
2594 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2595 ret <16 x float> %res
2598 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2599 ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
2600 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2601 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2602 ret <16 x float> %res
2605 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2606 ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
2607 ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
2608 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2609 ret <16 x float> %res
2612 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2613 ;CHECK-LABEL: test_mm512_min_round_ps_sae
2614 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
2615 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2616 ret <16 x float> %res
2619 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2620 ;CHECK-LABEL: test_mm512_min_round_ps_current
2621 ;CHECK: vminps %zmm1, %zmm0, %zmm0
2622 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2623 ret <16 x float> %res
2625 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2627 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2628 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
2629 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2630 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2631 ret <16 x float> %res
2634 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2635 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
2636 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
2637 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2638 ret <16 x float> %res
2641 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2642 ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
2643 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2644 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2645 ret <16 x float> %res
2648 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2649 ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
2650 ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
2651 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2652 ret <16 x float> %res
2655 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2656 ;CHECK-LABEL: test_mm512_max_round_ps_sae
2657 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
2658 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2659 ret <16 x float> %res
2662 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2663 ;CHECK-LABEL: test_mm512_max_round_ps_current
2664 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
2665 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2666 ret <16 x float> %res
2668 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2670 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2672 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2673 ; CHECK-LABEL: test_mask_add_ss_rn
2674 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2675 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
2676 ret <4 x float> %res
2679 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2680 ; CHECK-LABEL: test_mask_add_ss_rd
2681 ; CHECK: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2682 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
2683 ret <4 x float> %res
2686 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2687 ; CHECK-LABEL: test_mask_add_ss_ru
2688 ; CHECK: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2689 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
2690 ret <4 x float> %res
2693 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2694 ; CHECK-LABEL: test_mask_add_ss_rz
2695 ; CHECK: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2696 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
2697 ret <4 x float> %res
2700 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2701 ; CHECK-LABEL: test_mask_add_ss_current
2702 ; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
2703 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2704 ret <4 x float> %res
2707 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2708 ; CHECK-LABEL: test_maskz_add_ss_rn
2709 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2710 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
2711 ret <4 x float> %res
2714 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
2715 ; CHECK-LABEL: test_add_ss_rn
2716 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
2717 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
2718 ret <4 x float> %res
2721 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2723 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2724 ; CHECK-LABEL: test_mask_add_sd_rn
2725 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2726 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
2727 ret <2 x double> %res
2730 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2731 ; CHECK-LABEL: test_mask_add_sd_rd
2732 ; CHECK: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2733 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
2734 ret <2 x double> %res
2737 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2738 ; CHECK-LABEL: test_mask_add_sd_ru
2739 ; CHECK: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2740 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
2741 ret <2 x double> %res
2744 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2745 ; CHECK-LABEL: test_mask_add_sd_rz
2746 ; CHECK: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2747 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
2748 ret <2 x double> %res
2751 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2752 ; CHECK-LABEL: test_mask_add_sd_current
2753 ; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
2754 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2755 ret <2 x double> %res
2758 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2759 ; CHECK-LABEL: test_maskz_add_sd_rn
2760 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2761 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
2762 ret <2 x double> %res
2765 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
2766 ; CHECK-LABEL: test_add_sd_rn
2767 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
2768 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
2769 ret <2 x double> %res
2772 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2774 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2775 ; CHECK-LABEL: test_mask_max_ss_sae
2776 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2777 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
2778 ret <4 x float> %res
2781 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2782 ; CHECK-LABEL: test_maskz_max_ss_sae
2783 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2784 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
2785 ret <4 x float> %res
2788 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
2789 ; CHECK-LABEL: test_max_ss_sae
2790 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0
2791 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
2792 ret <4 x float> %res
2795 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2796 ; CHECK-LABEL: test_mask_max_ss
2797 ; CHECK: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
2798 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2799 ret <4 x float> %res
2802 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2803 ; CHECK-LABEL: test_maskz_max_ss
2804 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
2805 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
2806 ret <4 x float> %res
2809 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
2810 ; CHECK-LABEL: test_max_ss
2811 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0
2812 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
2813 ret <4 x float> %res
2815 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2817 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2818 ; CHECK-LABEL: test_mask_max_sd_sae
2819 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2820 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
2821 ret <2 x double> %res
2824 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2825 ; CHECK-LABEL: test_maskz_max_sd_sae
2826 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2827 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
2828 ret <2 x double> %res
2831 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
2832 ; CHECK-LABEL: test_max_sd_sae
2833 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
2834 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
2835 ret <2 x double> %res
2838 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2839 ; CHECK-LABEL: test_mask_max_sd
2840 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
2841 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2842 ret <2 x double> %res
2845 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2846 ; CHECK-LABEL: test_maskz_max_sd
2847 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
2848 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
2849 ret <2 x double> %res
2852 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
2853 ; CHECK-LABEL: test_max_sd
2854 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0
2855 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
2856 ret <2 x double> %res
2859 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
2860 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
2862 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
2864 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
2865 ret <2 x double> %res
2867 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
2869 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
2870 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
2872 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
2874 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
2875 ret <2 x double> %res
2877 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
2879 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
2880 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
2882 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
2884 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
2885 ret <4 x float> %res
2887 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
2889 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
2890 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
2892 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
2894 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
2895 ret <4 x float> %res
2897 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
2899 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
2900 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
2902 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
2905 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2906 ret <4 x float> %res
2909 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
2910 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
2912 ; CHECK-NEXT: movl (%rdi), %eax
2913 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
2916 %b = load i32, i32* %ptr
2917 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2918 ret <4 x float> %res
2921 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
2922 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
2924 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
2927 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2928 ret <4 x float> %res
2931 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
2932 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
2934 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
2937 %b = load i32, i32* %ptr
2938 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2939 ret <4 x float> %res
2941 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
2943 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
2944 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
2946 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
2949 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
2950 ret <4 x float> %res
2953 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
2954 ; CHECK-LABEL: _mm_cvtu64_ss:
2956 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
2959 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
2960 ret <4 x float> %res
2962 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
2964 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
2965 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
2967 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
2970 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
2971 ret <2 x double> %res
2973 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
2975 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
2976 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
2978 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
2981 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
2982 ret <2 x double> %res
2985 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
2986 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
2988 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
2991 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
2992 ret <2 x double> %res
2994 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
2996 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
2997 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
2998 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
2999 <8 x i64>zeroinitializer, i8 -1)
3002 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3004 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
3005 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
3006 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
3007 <16 x i32>zeroinitializer, i16 -1)
3010 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3012 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
3013 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
3014 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
3015 <16 x i32>zeroinitializer, i16 -1)
3018 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3020 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512
3022 ; CHECK: vpmaxsd %zmm
3024 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3025 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3026 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3027 %res2 = add <16 x i32> %res, %res1
3028 ret <16 x i32> %res2
3031 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512
3033 ; CHECK: vpmaxsq %zmm
3035 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3036 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3037 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3038 %res2 = add <8 x i64> %res, %res1
3042 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3044 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512
3046 ; CHECK: vpmaxud %zmm
3048 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3049 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3050 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3051 %res2 = add <16 x i32> %res, %res1
3052 ret <16 x i32> %res2
3055 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3057 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512
3059 ; CHECK: vpmaxuq %zmm
3061 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3062 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3063 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3064 %res2 = add <8 x i64> %res, %res1
3068 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3070 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512
3072 ; CHECK: vpminsd %zmm
3074 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3075 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3076 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3077 %res2 = add <16 x i32> %res, %res1
3078 ret <16 x i32> %res2
3081 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3083 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512
3085 ; CHECK: vpminsq %zmm
3087 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3088 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3089 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3090 %res2 = add <8 x i64> %res, %res1
3094 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512
3096 ; CHECK: vpminud %zmm
3098 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3099 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3100 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3101 %res2 = add <16 x i32> %res, %res1
3102 ret <16 x i32> %res2
3105 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3107 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512
3109 ; CHECK: vpminuq %zmm
3111 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3112 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3113 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3114 %res2 = add <8 x i64> %res, %res1
3118 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3120 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
3123 ; CHECK: vpermi2d {{.*}}{%k1}
3124 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3125 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3126 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3127 %res2 = add <16 x i32> %res, %res1
3128 ret <16 x i32> %res2
3131 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
3133 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
3136 ; CHECK: vpermi2pd {{.*}}{%k1}
3137 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
3138 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
3139 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
3140 %res2 = fadd <8 x double> %res, %res1
3141 ret <8 x double> %res2
3144 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
3146 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
3149 ; CHECK: vpermi2ps {{.*}}{%k1}
3150 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
3151 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
3152 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
3153 %res2 = fadd <16 x float> %res, %res1
3154 ret <16 x float> %res2
3157 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3159 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
3162 ; CHECK: vpermi2q {{.*}}{%k1}
3163 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3164 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3165 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3166 %res2 = add <8 x i64> %res, %res1
3170 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3172 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
3175 ; CHECK: vpermt2d {{.*}}{%k1} {z}
3176 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3177 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3178 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3179 %res2 = add <16 x i32> %res, %res1
3180 ret <16 x i32> %res2
3183 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
3185 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
3188 ; CHECK: vpermt2pd {{.*}}{%k1} {z}
3189 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3190 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3191 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3192 %res2 = fadd <8 x double> %res, %res1
3193 ret <8 x double> %res2
3196 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
3198 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
3201 ; CHECK: vpermt2ps {{.*}}{%k1} {z}
3202 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3203 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3204 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3205 %res2 = fadd <16 x float> %res, %res1
3206 ret <16 x float> %res2
3210 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3212 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
3215 ; CHECK: vpermt2q {{.*}}{%k1} {z}
3216 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3217 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3218 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3219 %res2 = add <8 x i64> %res, %res1
3223 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3225 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
3228 ; CHECK: vpermt2d {{.*}}{%k1}
3230 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3231 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3232 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3233 %res2 = add <16 x i32> %res, %res1
3234 ret <16 x i32> %res2
3237 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
3238 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512
3241 ; CHECK: vscalefpd{{.*}}{%k1}
3242 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3243 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
3244 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
3245 %res2 = fadd <8 x double> %res, %res1
3246 ret <8 x double> %res2
3249 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3250 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512
3253 ; CHECK: vscalefps{{.*}}{%k1}
3254 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3255 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
3256 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
3257 %res2 = fadd <16 x float> %res, %res1
3258 ret <16 x float> %res2
3261 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
3263 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3264 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
3266 ; CHECK-NEXT: movzbl %dil, %eax
3267 ; CHECK-NEXT: kmovw %eax, %k1
3268 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1}
3269 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm0
3270 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3271 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3272 %res2 = fadd <8 x double> %res, %res1
3273 ret <8 x double> %res2
3276 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
3278 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3279 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
3281 ; CHECK-NEXT: kmovw %edi, %k1
3282 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1}
3283 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm0
3284 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3285 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3286 %res2 = fadd <16 x float> %res, %res1
3287 ret <16 x float> %res2
3290 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
3292 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3293 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
3295 ; CHECK-NEXT: movzbl %dil, %eax
3296 ; CHECK-NEXT: kmovw %eax, %k1
3297 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1}
3298 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm0
3299 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3300 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3301 %res2 = fadd <8 x double> %res, %res1
3302 ret <8 x double> %res2
3305 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
3307 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3308 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
3310 ; CHECK-NEXT: kmovw %edi, %k1
3311 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1}
3312 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm0
3313 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3314 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3315 %res2 = fadd <16 x float> %res, %res1
3316 ret <16 x float> %res2
3319 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3321 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3322 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
3324 ; CHECK-NEXT: movzbl %dil, %eax
3325 ; CHECK-NEXT: kmovw %eax, %k1
3326 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1}
3327 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z}
3328 ; CHECK-NEXT: vpunpcklqdq {{.*#+}}
3329 ; CHECK: vpaddq %zmm0, %zmm2, %zmm0
3330 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
3332 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3333 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3334 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
3335 %res3 = add <8 x i64> %res, %res1
3336 %res4 = add <8 x i64> %res2, %res3
3340 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3342 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3343 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
3345 ; CHECK-NEXT: movzbl %dil, %eax
3346 ; CHECK-NEXT: kmovw %eax, %k1
3347 ; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1}
3348 ; CHECK-NEXT: vpunpckhqdq {{.*#+}}
3349 ; CHECK: vpaddq %zmm0, %zmm2, %zmm0
3351 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3352 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3353 %res2 = add <8 x i64> %res, %res1
3357 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3359 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3360 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
3362 ; CHECK-NEXT: kmovw %edi, %k1
3363 ; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1}
3364 ; CHECK-NEXT: vpunpckhdq {{.*#+}}
3365 ; CHECK: vpaddd %zmm0, %zmm2, %zmm0
3367 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3368 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3369 %res2 = add <16 x i32> %res, %res1
3370 ret <16 x i32> %res2
3373 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3375 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3376 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
3378 ; CHECK-NEXT: kmovw %edi, %k1
3379 ; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1}
3380 ; CHECK-NEXT: vpunpckldq {{.*#+}}
3381 ; CHECK: vpaddd %zmm0, %zmm2, %zmm0
3383 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3384 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3385 %res2 = add <16 x i32> %res, %res1
3386 ret <16 x i32> %res2
3389 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
3391 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3392 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
3393 ; CHECK: vpmovqb %zmm0, %xmm1 {%k1}
3394 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
3395 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0
3396 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3397 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3398 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3399 %res3 = add <16 x i8> %res0, %res1
3400 %res4 = add <16 x i8> %res3, %res2
3404 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3406 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3407 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
3408 ; CHECK: vpmovqb %zmm0, (%rdi)
3409 ; CHECK: vpmovqb %zmm0, (%rdi) {%k1}
3410 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3411 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3415 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
3417 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3418 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
3419 ; CHECK: vpmovsqb %zmm0, %xmm1 {%k1}
3420 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
3421 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
3422 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3423 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3424 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3425 %res3 = add <16 x i8> %res0, %res1
3426 %res4 = add <16 x i8> %res3, %res2
3430 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3432 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3433 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
3434 ; CHECK: vpmovsqb %zmm0, (%rdi)
3435 ; CHECK: vpmovsqb %zmm0, (%rdi) {%k1}
3436 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3437 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3441 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
3443 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3444 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
3445 ; CHECK: vpmovusqb %zmm0, %xmm1 {%k1}
3446 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
3447 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
3448 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3449 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3450 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3451 %res3 = add <16 x i8> %res0, %res1
3452 %res4 = add <16 x i8> %res3, %res2
3456 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3458 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3459 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
3460 ; CHECK: vpmovusqb %zmm0, (%rdi)
3461 ; CHECK: vpmovusqb %zmm0, (%rdi) {%k1}
3462 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3463 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3467 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
3469 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3470 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
3471 ; CHECK: vpmovqw %zmm0, %xmm1 {%k1}
3472 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
3473 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0
3474 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3475 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3476 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3477 %res3 = add <8 x i16> %res0, %res1
3478 %res4 = add <8 x i16> %res3, %res2
3482 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3484 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3485 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
3486 ; CHECK: vpmovqw %zmm0, (%rdi)
3487 ; CHECK: vpmovqw %zmm0, (%rdi) {%k1}
3488 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3489 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3493 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
3495 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3496 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
3497 ; CHECK: vpmovsqw %zmm0, %xmm1 {%k1}
3498 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
3499 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
3500 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3501 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3502 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3503 %res3 = add <8 x i16> %res0, %res1
3504 %res4 = add <8 x i16> %res3, %res2
3508 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3510 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3511 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
3512 ; CHECK: vpmovsqw %zmm0, (%rdi)
3513 ; CHECK: vpmovsqw %zmm0, (%rdi) {%k1}
3514 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3515 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3519 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
3521 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3522 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
3523 ; CHECK: vpmovusqw %zmm0, %xmm1 {%k1}
3524 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
3525 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
3526 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3527 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3528 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3529 %res3 = add <8 x i16> %res0, %res1
3530 %res4 = add <8 x i16> %res3, %res2
3534 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3536 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3537 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
3538 ; CHECK: vpmovusqw %zmm0, (%rdi)
3539 ; CHECK: vpmovusqw %zmm0, (%rdi) {%k1}
3540 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3541 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3545 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
3547 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3548 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
3549 ; CHECK: vpmovqd %zmm0, %ymm1 {%k1}
3550 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
3551 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0
3552 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3553 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3554 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3555 %res3 = add <8 x i32> %res0, %res1
3556 %res4 = add <8 x i32> %res3, %res2
3560 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3562 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3563 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
3564 ; CHECK: vpmovqd %zmm0, (%rdi)
3565 ; CHECK: vpmovqd %zmm0, (%rdi) {%k1}
3566 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3567 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3571 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
3573 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3574 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
3575 ; CHECK: vpmovsqd %zmm0, %ymm1 {%k1}
3576 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
3577 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
3578 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3579 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3580 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3581 %res3 = add <8 x i32> %res0, %res1
3582 %res4 = add <8 x i32> %res3, %res2
3586 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3588 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3589 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
3590 ; CHECK: vpmovsqd %zmm0, (%rdi)
3591 ; CHECK: vpmovsqd %zmm0, (%rdi) {%k1}
3592 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3593 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3597 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
3599 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3600 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
3601 ; CHECK: vpmovusqd %zmm0, %ymm1 {%k1}
3602 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
3603 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
3604 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3605 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3606 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3607 %res3 = add <8 x i32> %res0, %res1
3608 %res4 = add <8 x i32> %res3, %res2
3612 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3614 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3615 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
3616 ; CHECK: vpmovusqd %zmm0, (%rdi)
3617 ; CHECK: vpmovusqd %zmm0, (%rdi) {%k1}
3618 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3619 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3623 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
3625 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3626 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
3627 ; CHECK: vpmovdb %zmm0, %xmm1 {%k1}
3628 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
3629 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0
3630 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3631 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3632 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3633 %res3 = add <16 x i8> %res0, %res1
3634 %res4 = add <16 x i8> %res3, %res2
3638 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
3640 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3641 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
3642 ; CHECK: vpmovdb %zmm0, (%rdi)
3643 ; CHECK: vpmovdb %zmm0, (%rdi) {%k1}
3644 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3645 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3649 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
3651 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3652 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
3653 ; CHECK: vpmovsdb %zmm0, %xmm1 {%k1}
3654 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
3655 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
3656 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3657 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3658 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3659 %res3 = add <16 x i8> %res0, %res1
3660 %res4 = add <16 x i8> %res3, %res2
3664 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
3666 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3667 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
3668 ; CHECK: vpmovsdb %zmm0, (%rdi)
3669 ; CHECK: vpmovsdb %zmm0, (%rdi) {%k1}
3670 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3671 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3675 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
3677 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3678 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
3679 ; CHECK: vpmovusdb %zmm0, %xmm1 {%k1}
3680 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
3681 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
3682 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3683 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3684 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3685 %res3 = add <16 x i8> %res0, %res1
3686 %res4 = add <16 x i8> %res3, %res2
3690 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
3692 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3693 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
3694 ; CHECK: vpmovusdb %zmm0, (%rdi)
3695 ; CHECK: vpmovusdb %zmm0, (%rdi) {%k1}
3696 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3697 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3701 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
3703 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3704 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
3705 ; CHECK: vpmovdw %zmm0, %ymm1 {%k1}
3706 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
3707 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0
3708 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3709 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3710 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3711 %res3 = add <16 x i16> %res0, %res1
3712 %res4 = add <16 x i16> %res3, %res2
3713 ret <16 x i16> %res4
3716 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3718 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3719 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
3720 ; CHECK: vpmovdw %zmm0, (%rdi)
3721 ; CHECK: vpmovdw %zmm0, (%rdi) {%k1}
3722 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3723 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3727 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
3729 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3730 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
3731 ; CHECK: vpmovsdw %zmm0, %ymm1 {%k1}
3732 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
3733 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
3734 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3735 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3736 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3737 %res3 = add <16 x i16> %res0, %res1
3738 %res4 = add <16 x i16> %res3, %res2
3739 ret <16 x i16> %res4
3742 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3744 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3745 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
3746 ; CHECK: vpmovsdw %zmm0, (%rdi)
3747 ; CHECK: vpmovsdw %zmm0, (%rdi) {%k1}
3748 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3749 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3753 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
3755 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3756 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
3757 ; CHECK: vpmovusdw %zmm0, %ymm1 {%k1}
3758 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
3759 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
3760 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3761 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3762 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3763 %res3 = add <16 x i16> %res0, %res1
3764 %res4 = add <16 x i16> %res3, %res2
3765 ret <16 x i16> %res4
3768 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3770 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3771 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
3772 ; CHECK: vpmovusdw %zmm0, (%rdi)
3773 ; CHECK: vpmovusdw %zmm0, (%rdi) {%k1}
3774 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3775 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3779 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
3781 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3782 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
3784 ; CHECK-NEXT: movzbl %dil, %eax
3785 ; CHECK-NEXT: kmovw %eax, %k1
3786 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
3787 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
3788 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3790 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3791 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3792 %res2 = fadd <8 x double> %res, %res1
3793 ret <8 x double> %res2
3796 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3798 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3799 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
3801 ; CHECK-NEXT: kmovw %edi, %k1
3802 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1}
3803 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
3804 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3806 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3807 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3808 %res2 = fadd <16 x float> %res, %res1
3809 ret <16 x float> %res2
3812 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3814 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3815 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
3817 ; CHECK-NEXT: movzbl %dil, %eax
3818 ; CHECK-NEXT: kmovw %eax, %k1
3819 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1}
3820 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0
3821 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3823 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3824 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3825 %res2 = add <8 x i32> %res, %res1
3829 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
3831 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
3832 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
3834 ; CHECK-NEXT: movzbl %dil, %eax
3835 ; CHECK-NEXT: kmovw %eax, %k1
3836 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1}
3837 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0
3838 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
3840 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
3841 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
3842 %res2 = fadd <8 x float> %res, %res1
3843 ret <8 x float> %res2
3846 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3848 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3849 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
3851 ; CHECK-NEXT: movzbl %dil, %eax
3852 ; CHECK-NEXT: kmovw %eax, %k1
3853 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
3854 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
3855 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3857 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
3858 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3859 %res2 = add <8 x i32> %res, %res1
3863 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3865 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3866 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
3868 ; CHECK-NEXT: kmovw %edi, %k1
3869 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
3870 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
3871 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3873 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3874 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3875 %res2 = add <16 x i32> %res, %res1
3876 ret <16 x i32> %res2
3879 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
3881 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
3882 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
3884 ; CHECK-NEXT: movzbl %dil, %eax
3885 ; CHECK-NEXT: kmovw %eax, %k1
3886 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1}
3887 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0
3888 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3890 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
3891 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
3892 %res2 = fadd <8 x double> %res, %res1
3893 ret <8 x double> %res2
3896 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3898 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3899 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
3901 ; CHECK-NEXT: kmovw %edi, %k1
3902 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
3903 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
3904 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3906 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3907 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3908 %res2 = add <16 x i32> %res, %res1
3909 ret <16 x i32> %res2
3912 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3914 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3915 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
3917 ; CHECK-NEXT: movzbl %dil, %eax
3918 ; CHECK-NEXT: kmovw %eax, %k1
3919 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
3920 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
3921 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3923 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3924 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3925 %res2 = add <8 x i32> %res, %res1
3929 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
3931 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3932 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
3934 ; CHECK-NEXT: movzbl %dil, %eax
3935 ; CHECK-NEXT: kmovw %eax, %k1
3936 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
3937 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
3938 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3940 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3941 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3942 %res2 = fadd <8 x double> %res, %res1
3943 ret <8 x double> %res2
3947 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3949 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3950 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
3952 ; CHECK-NEXT: kmovw %edi, %k1
3953 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1}
3954 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
3955 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3957 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3958 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3959 %res2 = fadd <16 x float> %res, %res1
3960 ret <16 x float> %res2
3963 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3965 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3966 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
3968 ; CHECK-NEXT: movzbl %dil, %eax
3969 ; CHECK-NEXT: kmovw %eax, %k1
3970 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
3971 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
3972 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3974 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3975 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3976 %res2 = add <8 x i32> %res, %res1
3980 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3982 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3983 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
3985 ; CHECK-NEXT: kmovw %edi, %k1
3986 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
3987 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
3988 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3990 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
3991 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
3992 %res2 = add <16 x i32> %res, %res1
3993 ret <16 x i32> %res2
3996 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3998 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3999 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
4001 ; CHECK-NEXT: kmovw %edi, %k1
4002 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
4003 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
4004 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
4006 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
4007 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
4008 %res2 = add <16 x i32> %res, %res1
4009 ret <16 x i32> %res2
4013 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
4014 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ss
4017 ; CHECK: vscalefss {{.*}}{%k1}
4018 ; CHECK: vscalefss {rn-sae}
4019 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
4020 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
4021 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
4022 %res2 = fadd <4 x float> %res, %res1
4023 ret <4 x float> %res2
4026 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
4027 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_sd
4030 ; CHECK: vscalefsd {{.*}}{%k1}
4031 ; CHECK: vscalefsd {rn-sae}
4032 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
4033 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
4034 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
4035 %res2 = fadd <2 x double> %res, %res1
4036 ret <2 x double> %res2
4039 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
4041 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
4042 ; CHECK-LABEL: test_getexp_ss:
4044 ; CHECK-NEXT: andl $1, %edi
4045 ; CHECK-NEXT: kmovw %edi, %k1
4046 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
4047 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
4048 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
4049 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
4050 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
4051 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
4052 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
4053 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4055 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
4056 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
4057 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
4058 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
4060 %res.1 = fadd <4 x float> %res0, %res1
4061 %res.2 = fadd <4 x float> %res2, %res3
4062 %res = fadd <4 x float> %res.1, %res.2
4063 ret <4 x float> %res
4066 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
4068 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
4069 ; CHECK-LABEL: test_getexp_sd:
4071 ; CHECK-NEXT: andl $1, %edi
4072 ; CHECK-NEXT: kmovw %edi, %k1
4073 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
4074 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
4075 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
4076 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
4077 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
4078 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
4079 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
4080 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4082 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
4083 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
4084 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
4085 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
4087 %res.1 = fadd <2 x double> %res0, %res1
4088 %res.2 = fadd <2 x double> %res2, %res3
4089 %res = fadd <2 x double> %res.1, %res.2
4090 ret <2 x double> %res
4093 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
4095 define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
4096 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
4098 ; CHECK-NEXT: andl $1, %edi
4099 ; CHECK-NEXT: kmovw %edi, %k1
4100 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
4101 ; CHECK-NEXT: kmovw %k0, %eax
4102 ; CHECK-NEXT: shlb $7, %al
4103 ; CHECK-NEXT: sarb $7, %al
4106 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
4110 define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
4111 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
4113 ; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
4114 ; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1
4115 ; CHECK-NEXT: korw %k0, %k1, %k0
4116 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1
4117 ; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2
4118 ; CHECK-NEXT: korw %k1, %k2, %k1
4119 ; CHECK-NEXT: andl $1, %edi
4120 ; CHECK-NEXT: kmovw %edi, %k2
4121 ; CHECK-NEXT: kandw %k2, %k1, %k1
4122 ; CHECK-NEXT: korw %k1, %k0, %k0
4123 ; CHECK-NEXT: kmovw %k0, %eax
4124 ; CHECK-NEXT: shlb $7, %al
4125 ; CHECK-NEXT: sarb $7, %al
4128 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
4129 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
4130 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
4131 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
4133 %res11 = or i8 %res1, %res2
4134 %res12 = or i8 %res3, %res4
4135 %res13 = or i8 %res11, %res12
4139 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
4141 define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
4142 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
4144 ; CHECK-NEXT: andl $1, %edi
4145 ; CHECK-NEXT: kmovw %edi, %k1
4146 ; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
4147 ; CHECK-NEXT: kmovw %k0, %eax
4148 ; CHECK-NEXT: shlb $7, %al
4149 ; CHECK-NEXT: sarb $7, %al
4152 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
4157 define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
4158 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
4160 ; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1
4161 ; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
4162 ; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1
4163 ; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
4164 ; CHECK-NEXT: andl $1, %edi
4165 ; CHECK-NEXT: kmovw %edi, %k2
4166 ; CHECK-NEXT: kandw %k2, %k1, %k1
4167 ; CHECK-NEXT: kandw %k1, %k0, %k0
4168 ; CHECK-NEXT: kmovw %k0, %eax
4169 ; CHECK-NEXT: shlb $7, %al
4170 ; CHECK-NEXT: sarb $7, %al
4172 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
4173 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
4174 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
4175 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
4177 %res11 = and i8 %res1, %res2
4178 %res12 = and i8 %res3, %res4
4179 %res13 = and i8 %res11, %res12
4183 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
4185 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
4186 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
4188 ; CHECK-NEXT: kmovw %edi, %k1
4189 ; CHECK-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1}
4190 ; CHECK-NEXT: ## zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4191 ; CHECK-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm0
4192 ; CHECK-NEXT: ## zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4193 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4195 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
4196 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
4197 %res2 = fadd <16 x float> %res, %res1
4198 ret <16 x float> %res2
4201 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
4203 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
4204 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
4206 ; CHECK-NEXT: movzbl %dil, %eax
4207 ; CHECK-NEXT: kmovw %eax, %k1
4208 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1}
4209 ; CHECK-NEXT: ## zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4210 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
4211 ; CHECK-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4212 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0
4213 ; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4214 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4215 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
4217 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
4218 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
4219 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
4221 %res3 = fadd <8 x double> %res, %res1
4222 %res4 = fadd <8 x double> %res3, %res2
4223 ret <8 x double> %res4
4226 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
4228 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
4229 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
4231 ; CHECK-NEXT: kmovw %edi, %k1
4232 ; CHECK-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1}
4233 ; CHECK-NEXT: ## zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4234 ; CHECK-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm0
4235 ; CHECK-NEXT: ## zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4236 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4238 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
4239 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
4240 %res2 = add <16 x i32> %res, %res1
4241 ret <16 x i32> %res2
4244 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
4246 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
4247 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
4249 ; CHECK-NEXT: movzbl %dil, %eax
4250 ; CHECK-NEXT: kmovw %eax, %k1
4251 ; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1}
4252 ; CHECK-NEXT: ## zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4253 ; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm0
4254 ; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4255 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4257 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
4258 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
4259 %res2 = add <8 x i64> %res, %res1
4263 declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
4265 define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
4266 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512:
4268 ; CHECK-NEXT: movzbl %dil, %eax
4269 ; CHECK-NEXT: kmovw %eax, %k1
4270 ; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1}
4271 ; CHECK-NEXT: vgetmantpd $11,{sae}, %zmm0, %zmm0
4272 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4274 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
4275 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
4276 %res2 = fadd <8 x double> %res, %res1
4277 ret <8 x double> %res2
4280 declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
4282 define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
4283 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512:
4285 ; CHECK-NEXT: kmovw %edi, %k1
4286 ; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1}
4287 ; CHECK-NEXT: vgetmantps $11,{sae}, %zmm0, %zmm0
4288 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4290 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
4291 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
4292 %res2 = fadd <16 x float> %res, %res1
4293 ret <16 x float> %res2
4296 declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
4298 define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
4299 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
4301 ; CHECK-NEXT: andl $1, %edi
4302 ; CHECK-NEXT: kmovw %edi, %k1
4303 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
4304 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
4305 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
4306 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
4307 ; CHECK-NEXT: vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1}
4308 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
4309 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
4310 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
4312 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
4313 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
4314 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8)
4315 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4)
4316 %res11 = fadd <2 x double> %res, %res1
4317 %res12 = fadd <2 x double> %res2, %res3
4318 %res13 = fadd <2 x double> %res11, %res12
4319 ret <2 x double> %res13
4322 declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
4324 define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
4325 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
4327 ; CHECK-NEXT: andl $1, %edi
4328 ; CHECK-NEXT: kmovw %edi, %k1
4329 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
4330 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
4331 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4
4332 ; CHECK-NEXT: vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0
4333 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
4334 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
4335 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4337 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
4338 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
4339 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8)
4340 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4)
4341 %res11 = fadd <4 x float> %res, %res1
4342 %res12 = fadd <4 x float> %res2, %res3
4343 %res13 = fadd <4 x float> %res11, %res12
4344 ret <4 x float> %res13
4347 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
4349 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
4350 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
4352 ; CHECK-NEXT: movzbl %dil, %eax
4353 ; CHECK-NEXT: kmovw %eax, %k1
4354 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1}
4355 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
4356 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0
4357 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4358 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
4360 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
4361 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
4362 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
4364 %res3 = fadd <8 x double> %res, %res1
4365 %res4 = fadd <8 x double> %res3, %res2
4366 ret <8 x double> %res4
4369 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
4371 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
4372 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
4374 ; CHECK-NEXT: kmovw %edi, %k1
4375 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1}
4376 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm0
4377 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4379 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
4380 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
4381 %res2 = fadd <16 x float> %res, %res1
4382 ret <16 x float> %res2
4385 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
4387 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
4388 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
4390 ; CHECK-NEXT: movzbl %dil, %eax
4391 ; CHECK-NEXT: kmovw %eax, %k1
4392 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1}
4393 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z}
4394 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0
4395 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
4396 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4398 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
4399 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
4400 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
4401 %res3 = fadd <8 x double> %res, %res1
4402 %res4 = fadd <8 x double> %res3, %res2
4403 ret <8 x double> %res4
4406 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
4408 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
4409 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
4411 ; CHECK-NEXT: kmovw %edi, %k1
4412 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1}
4413 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z}
4414 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0
4415 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
4416 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4418 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
4419 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
4420 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
4421 %res3 = fadd <16 x float> %res, %res1
4422 %res4 = fadd <16 x float> %res3, %res2
4423 ret <16 x float> %res4
4426 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
4428 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
4429 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
4431 ; CHECK-NEXT: movzbl %dil, %eax
4432 ; CHECK-NEXT: kmovw %eax, %k1
4433 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
4434 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
4435 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
4436 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
4437 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
4439 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
4440 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
4441 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
4442 %res3 = fadd <8 x double> %res, %res1
4443 %res4 = fadd <8 x double> %res2, %res3
4444 ret <8 x double> %res4
4447 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
4449 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
4450 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
4452 ; CHECK-NEXT: kmovw %edi, %k1
4453 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
4454 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
4455 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
4456 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
4457 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
4459 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
4460 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
4461 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
4462 %res3 = fadd <16 x float> %res, %res1
4463 %res4 = fadd <16 x float> %res2, %res3
4464 ret <16 x float> %res4
4467 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
4469 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {
4470 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
4472 ; CHECK-NEXT: kmovw %edi, %k1
4473 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
4474 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
4475 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
4476 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4477 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
4479 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4)
4480 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1)
4481 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4)
4482 %res3 = fadd <16 x float> %res, %res1
4483 %res4 = fadd <16 x float> %res2, %res3
4484 ret <16 x float> %res4
4487 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8)
4489 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) {
4490 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
4492 ; CHECK-NEXT: kmovw %edi, %k1
4493 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
4494 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
4495 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
4496 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4497 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4499 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4)
4500 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1)
4501 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4)
4502 %res3 = add <16 x i32> %res, %res1
4503 %res4 = add <16 x i32> %res2, %res3
4504 ret <16 x i32> %res4
4507 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
4509 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
4510 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
4512 ; CHECK-NEXT: movzbl %dil, %eax
4513 ; CHECK-NEXT: kmovw %eax, %k1
4514 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
4515 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
4516 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
4517 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4518 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
4520 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
4521 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
4522 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
4523 %res3 = fadd <8 x double> %res, %res1
4524 %res4 = fadd <8 x double> %res2, %res3
4525 ret <8 x double> %res4
4528 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
4530 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
4531 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
4533 ; CHECK-NEXT: movzbl %dil, %eax
4534 ; CHECK-NEXT: kmovw %eax, %k1
4535 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
4536 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
4537 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4538 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4539 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4541 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
4542 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
4543 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
4544 %res3 = add <8 x i64> %res, %res1
4545 %res4 = add <8 x i64> %res2, %res3
4549 declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32)
4551 define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
4552 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
4553 ; CHECK: kmovw %edi, %k1
4554 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
4555 ; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
4556 ; CHECK-NEXT: %xmm0, %xmm2, %xmm0
4558 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
4559 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
4560 %res2 = fadd <2 x double> %res, %res1
4561 ret <2 x double> %res2
4564 declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32)
4566 define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
4567 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
4568 ; CHECK: kmovw %edi, %k1
4569 ; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
4570 ; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
4571 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
4573 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
4574 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
4575 %res2 = fadd <4 x float> %res, %res1
4576 ret <4 x float> %res2
4579 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
4581 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
4582 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
4584 ; CHECK-NEXT: kmovw %edi, %k1
4585 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4586 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
4587 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
4588 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4590 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
4591 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
4592 %res2 = add <16 x i32> %res, %res1
4593 ret <16 x i32> %res2
4596 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
4598 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
4599 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
4601 ; CHECK-NEXT: kmovw %edi, %k1
4602 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4603 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
4604 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
4605 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4607 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
4608 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
4609 %res2 = add <16 x i32> %res, %res1
4610 ret <16 x i32> %res2
4613 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
4615 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
4616 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
4618 ; CHECK-NEXT: movzbl %dil, %eax
4619 ; CHECK-NEXT: kmovw %eax, %k1
4620 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4621 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
4622 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
4623 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4625 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
4626 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
4627 %res2 = add <8 x i64> %res, %res1
4631 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
4633 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
4634 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
4636 ; CHECK-NEXT: movzbl %dil, %eax
4637 ; CHECK-NEXT: kmovw %eax, %k1
4638 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4639 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
4640 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
4641 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4643 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
4644 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
4645 %res2 = add <8 x i64> %res, %res1