1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
101 ; CHECK-LABEL: test_sqrt_pd_512
103 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
104 ret <8 x double> %res
106 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
108 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 ; CHECK-LABEL: test_sqrt_ps_512
111 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
112 ret <16 x float> %res
114 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
115 ; CHECK-LABEL: test_sqrt_round_ps_512
116 ; CHECK: vsqrtps {rz-sae}
117 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
118 ret <16 x float> %res
120 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
122 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
123 ; CHECK-LABEL: test_getexp_pd_512
125 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
126 ret <8 x double> %res
128 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
129 ; CHECK-LABEL: test_getexp_round_pd_512
130 ; CHECK: vgetexppd {sae}
131 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
132 ret <8 x double> %res
134 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
136 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
137 ; CHECK-LABEL: test_getexp_ps_512
139 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
140 ret <16 x float> %res
143 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
144 ; CHECK-LABEL: test_getexp_round_ps_512
145 ; CHECK: vgetexpps {sae}
146 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
147 ret <16 x float> %res
149 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
151 declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
153 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
154 ; CHECK-LABEL: test_sqrt_ss:
156 ; CHECK-NEXT: andl $1, %edi
157 ; CHECK-NEXT: kmovw %edi, %k1
158 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
159 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
160 ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
161 ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
162 ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
163 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
164 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
165 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
167 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
168 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
169 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
170 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
172 %res.1 = fadd <4 x float> %res0, %res1
173 %res.2 = fadd <4 x float> %res2, %res3
174 %res = fadd <4 x float> %res.1, %res.2
178 declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
180 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
181 ; CHECK-LABEL: test_sqrt_sd:
183 ; CHECK-NEXT: andl $1, %edi
184 ; CHECK-NEXT: kmovw %edi, %k1
185 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
186 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
187 ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
188 ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
189 ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
190 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
191 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
192 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
194 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
195 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
196 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
197 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
199 %res.1 = fadd <2 x double> %res0, %res1
200 %res.2 = fadd <2 x double> %res2, %res3
201 %res = fadd <2 x double> %res.1, %res.2
202 ret <2 x double> %res
205 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
206 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
207 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
210 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
212 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
213 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
214 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
215 ret <2 x double> %res
217 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
219 define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
220 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
221 ; CHECK: vcvttsd2si {sae}{{.*}}encoding: [0x62
222 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
223 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
224 %res2 = add i64 %res0, %res1
227 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
229 define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
230 ; CHECK: vcvttsd2usi {{.*}}encoding: [0x62
231 ; CHECK: vcvttsd2usi {sae}{{.*}}encoding: [0x62
232 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
233 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
234 %res2 = add i32 %res0, %res1
237 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
239 define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
240 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
241 ; CHECK: vcvttsd2si {sae}{{.*}}encoding: [0x62
242 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
243 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
244 %res2 = add i32 %res0, %res1
247 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
251 define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
252 ; CHECK: vcvttsd2usi {{.*}}encoding: [0x62
253 ; CHECK: vcvttsd2usi {sae}{{.*}}encoding: [0x62
254 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
255 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
256 %res2 = add i64 %res0, %res1
259 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
261 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
262 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
263 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
266 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
269 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
270 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
271 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
274 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
277 define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
278 ; CHECK: vcvttss2si {sae}{{.*}}encoding: [0x62
279 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
280 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
281 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
282 %res2 = add i32 %res0, %res1
285 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
287 define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
288 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
289 ; CHECK: vcvttss2si {sae}{{.*}}encoding: [0x62
290 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
291 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
292 %res2 = add i64 %res0, %res1
295 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
297 define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
298 ; CHECK: vcvttss2usi {sae}{{.*}}encoding: [0x62
299 ; CHECK: vcvttss2usi {{.*}}encoding: [0x62
300 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
301 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
302 %res2 = add i32 %res0, %res1
305 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
307 define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
308 ; CHECK: vcvttss2usi {{.*}}encoding: [0x62
309 ; CHECK: vcvttss2usi {sae}{{.*}}encoding: [0x62
310 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
311 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
312 %res2 = add i64 %res0, %res1
315 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
317 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
318 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
319 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
322 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
324 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
325 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
326 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
327 ret <16 x float> %res
329 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
332 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
333 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
334 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
338 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
340 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
341 ; CHECK: vbroadcastss
342 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
343 ret <16 x float> %res
345 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
347 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
348 ; CHECK: vbroadcastsd
349 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
350 ret <8 x double> %res
352 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
354 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
355 ; CHECK: vbroadcastss
356 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
357 ret <16 x float> %res
359 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
361 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
362 ; CHECK: vbroadcastsd
363 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
364 ret <8 x double> %res
366 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
368 define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
369 ; CHECK: vpbroadcastd
370 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
373 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
375 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
376 ; CHECK: vpbroadcastd
377 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
380 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
382 define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
383 ; CHECK: vpbroadcastq
384 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
387 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
389 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
390 ; CHECK: vpbroadcastq
391 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
394 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
396 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
397 ; CHECK-LABEL: test_conflict_d:
399 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0
400 ; CHECK-NEXT: retq ## encoding: [0xc3]
401 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
405 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
407 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
408 ; CHECK-LABEL: test_conflict_q:
410 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
412 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
416 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
418 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
419 ; CHECK-LABEL: test_maskz_conflict_d:
421 ; CHECK-NEXT: kmovw %edi, %k1
422 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
424 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
428 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
429 ; CHECK-LABEL: test_mask_conflict_q:
431 ; CHECK-NEXT: movzbl %dil, %eax
432 ; CHECK-NEXT: kmovw %eax, %k1
433 ; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
434 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
436 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
440 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
441 ; CHECK-LABEL: test_lzcnt_d:
443 ; CHECK-NEXT: vplzcntd %zmm0, %zmm0
445 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
449 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
451 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
452 ; CHECK-LABEL: test_lzcnt_q:
454 ; CHECK-NEXT: vplzcntq %zmm0, %zmm0
456 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
460 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
463 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
464 ; CHECK-LABEL: test_mask_lzcnt_d:
466 ; CHECK-NEXT: kmovw %edi, %k1
467 ; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
468 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
470 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
474 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
475 ; CHECK-LABEL: test_mask_lzcnt_q:
477 ; CHECK-NEXT: movzbl %dil, %eax
478 ; CHECK-NEXT: kmovw %eax, %k1
479 ; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
480 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
481 ; CHECK-NEXT: retq ## encoding: [0xc3]
482 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
486 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
487 ; CHECK: vblendmps %zmm1, %zmm0
488 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
489 ret <16 x float> %res
492 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
494 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
495 ; CHECK: vblendmpd %zmm1, %zmm0
496 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
497 ret <8 x double> %res
500 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
501 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
502 ; CHECK: vblendmpd (%
503 %b = load <8 x double>, <8 x double>* %ptr
504 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
505 ret <8 x double> %res
507 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
509 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
511 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
514 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
516 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
518 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
521 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
523 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
524 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
525 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
528 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
530 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
531 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
532 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
535 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
538 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
540 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
541 <8 x double>zeroinitializer, i8 -1, i32 4)
542 ret <8 x double> %res
544 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
545 <8 x double>, i8, i32)
547 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
549 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
550 <8 x double>zeroinitializer, i8 -1, i32 4)
551 ret <8 x double> %res
553 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
554 <8 x double>, i8, i32)
556 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
558 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512
561 ; CHECK: vpabsd{{.*}}{%k1}
562 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
563 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
564 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
565 %res2 = add <16 x i32> %res, %res1
569 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
571 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512
574 ; CHECK: vpabsq{{.*}}{%k1}
575 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
576 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
577 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
578 %res2 = add <8 x i64> %res, %res1
582 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
583 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
584 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
587 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
589 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
590 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
591 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
594 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
596 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
597 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
598 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
602 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
604 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
605 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
606 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
610 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
612 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
613 ; CHECK-LABEL: test_mask_store_aligned_ps:
615 ; CHECK-NEXT: kmovw %esi, %k1
616 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
618 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
622 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
624 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
625 ; CHECK-LABEL: test_mask_store_aligned_pd:
627 ; CHECK-NEXT: kmovw %esi, %k1
628 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
630 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
634 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
636 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
637 ; CHECK-LABEL: test_maskz_load_aligned_ps:
639 ; CHECK-NEXT: kmovw %esi, %k1
640 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
642 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
643 ret <16 x float> %res
646 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
648 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
649 ; CHECK-LABEL: test_maskz_load_aligned_pd:
651 ; CHECK-NEXT: kmovw %esi, %k1
652 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
654 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
655 ret <8 x double> %res
658 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
660 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
661 ; CHECK-LABEL: test_load_aligned_ps:
663 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
665 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
666 ret <16 x float> %res
669 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
670 ; CHECK-LABEL: test_load_aligned_pd:
672 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
674 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
675 ret <8 x double> %res
678 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
680 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
681 ; CHECK-LABEL: test_valign_q:
682 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
683 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
687 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
688 ; CHECK-LABEL: test_mask_valign_q:
689 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
690 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
694 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
696 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
697 ; CHECK-LABEL: test_maskz_valign_d:
698 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
699 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
703 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
705 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
706 ; CHECK-LABEL: test_mask_store_ss
707 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
708 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
712 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
714 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
715 ; CHECK-LABEL: test_pcmpeq_d
716 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
717 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
721 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
722 ; CHECK-LABEL: test_mask_pcmpeq_d
723 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
724 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
728 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
730 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
731 ; CHECK-LABEL: test_pcmpeq_q
732 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
733 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
737 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
738 ; CHECK-LABEL: test_mask_pcmpeq_q
739 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
740 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
744 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
746 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
747 ; CHECK-LABEL: test_pcmpgt_d
748 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
749 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
753 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
754 ; CHECK-LABEL: test_mask_pcmpgt_d
755 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
756 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
760 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
762 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
763 ; CHECK-LABEL: test_pcmpgt_q
764 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
765 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
769 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
770 ; CHECK-LABEL: test_mask_pcmpgt_q
771 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
772 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
776 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
778 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
779 ; CHECK_LABEL: test_cmp_d_512
780 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
781 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
782 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
783 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
784 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
785 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
786 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
787 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
788 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
789 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
790 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
791 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
792 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
793 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
794 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
795 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
796 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
797 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
798 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
799 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
800 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
801 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
802 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
803 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
807 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
808 ; CHECK_LABEL: test_mask_cmp_d_512
809 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
810 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
811 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
812 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
813 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
814 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
815 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
816 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
817 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
818 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
819 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
820 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
821 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
822 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
823 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
824 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
825 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
826 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
827 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
828 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
829 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
830 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
831 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
832 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
836 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
838 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
839 ; CHECK_LABEL: test_ucmp_d_512
840 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
841 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
842 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
843 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
844 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
845 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
846 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
847 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
848 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
849 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
850 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
851 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
852 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
853 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
854 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
855 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
856 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
857 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
858 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
859 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
860 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
861 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
862 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
863 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
867 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
868 ; CHECK_LABEL: test_mask_ucmp_d_512
869 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
870 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
871 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
872 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
873 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
874 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
875 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
876 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
877 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
878 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
879 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
880 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
881 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
882 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
883 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
884 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
885 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
886 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
887 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
888 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
889 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
890 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
891 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
892 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
896 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
898 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
899 ; CHECK_LABEL: test_cmp_q_512
900 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
901 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
902 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
903 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
904 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
905 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
906 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
907 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
908 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
909 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
910 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
911 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
912 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
913 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
914 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
915 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
916 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
917 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
918 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
919 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
920 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
921 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
922 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
923 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
927 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
928 ; CHECK_LABEL: test_mask_cmp_q_512
929 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
930 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
931 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
932 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
933 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
934 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
935 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
936 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
937 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
938 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
939 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
940 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
941 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
942 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
943 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
944 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
945 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
946 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
947 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
948 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
949 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
950 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
951 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
952 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
956 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
958 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
959 ; CHECK_LABEL: test_ucmp_q_512
960 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
961 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
962 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
963 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
964 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
965 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
966 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
967 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
968 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
969 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
970 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
971 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
972 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
973 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
974 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
975 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
976 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
977 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
978 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
979 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
980 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
981 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
982 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
983 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
987 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
988 ; CHECK_LABEL: test_mask_ucmp_q_512
989 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
990 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
991 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
992 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
993 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
994 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
995 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
996 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
997 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
998 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
999 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1000 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1001 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
1002 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1003 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1004 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
1005 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1006 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1007 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
1008 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1009 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1010 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
1011 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1012 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1016 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1018 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
1019 ; CHECK-LABEL: test_mask_vextractf32x4:
1020 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
1021 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
1022 ret <4 x float> %res
1025 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
1027 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
1028 ; CHECK-LABEL: test_mask_vextracti64x4:
1029 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
1030 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask)
1034 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
1036 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
1037 ; CHECK-LABEL: test_maskz_vextracti32x4:
1038 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
1039 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
1043 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
1045 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
1046 ; CHECK-LABEL: test_vextractf64x4:
1047 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
1048 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1)
1049 ret <4 x double> %res
1052 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
1054 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1055 ; CHECK-LABEL: test_x86_avx512_pslli_d
1057 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1061 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1062 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
1063 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
1064 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1068 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1069 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
1070 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1071 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1075 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1077 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1078 ; CHECK-LABEL: test_x86_avx512_pslli_q
1080 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1084 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1085 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
1086 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
1087 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1091 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1092 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
1093 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1094 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1098 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1100 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1101 ; CHECK-LABEL: test_x86_avx512_psrli_d
1103 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1107 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1108 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1109 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1110 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1114 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1115 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1116 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1117 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1121 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1123 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1124 ; CHECK-LABEL: test_x86_avx512_psrli_q
1126 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1130 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1131 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1132 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1133 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1137 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1138 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1139 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1140 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1144 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1146 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1147 ; CHECK-LABEL: test_x86_avx512_psrai_d
1149 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1153 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1154 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1155 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1156 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1160 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1161 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1162 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1163 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1167 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1169 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1170 ; CHECK-LABEL: test_x86_avx512_psrai_q
1172 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1176 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1177 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1178 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1179 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1183 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1184 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1185 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1186 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1190 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1192 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1193 ; CHECK-LABEL: test_x86_avx512_psll_d
1195 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1199 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1200 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1201 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1202 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1206 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1207 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1208 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1209 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1213 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1215 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1216 ; CHECK-LABEL: test_x86_avx512_psll_q
1218 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1222 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1223 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1224 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1225 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1229 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1230 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1231 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1232 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1236 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1238 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1239 ; CHECK-LABEL: test_x86_avx512_psrl_d
1241 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1245 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1246 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1247 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1248 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1252 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1253 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1254 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1255 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1259 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1261 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1262 ; CHECK-LABEL: test_x86_avx512_psrl_q
1264 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1268 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1269 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1270 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1271 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1275 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1276 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1277 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1278 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1282 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1284 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1285 ; CHECK-LABEL: test_x86_avx512_psra_d
1287 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1291 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1292 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1293 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1294 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1298 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1299 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1300 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1301 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1305 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1307 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1308 ; CHECK-LABEL: test_x86_avx512_psra_q
1310 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1314 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1315 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1316 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1317 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1321 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1322 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1323 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1324 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1328 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1330 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1331 ; CHECK-LABEL: test_x86_avx512_psllv_d
1333 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1337 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1338 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1339 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1340 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1344 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1345 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1346 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1347 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1351 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1353 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1354 ; CHECK-LABEL: test_x86_avx512_psllv_q
1356 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1360 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1361 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1362 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1363 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1367 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1368 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1369 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1370 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1374 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1377 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1378 ; CHECK-LABEL: test_x86_avx512_psrav_d
1380 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1384 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1385 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1386 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1387 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1391 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1392 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1393 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1394 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1398 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1400 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1401 ; CHECK-LABEL: test_x86_avx512_psrav_q
1403 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1407 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1408 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1409 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1410 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1414 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1415 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1416 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1417 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1421 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1423 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1424 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1426 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1430 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1431 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1432 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1433 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1437 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1438 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1439 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1440 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1444 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1446 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1447 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1449 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1453 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1454 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1455 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1456 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1460 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1461 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1462 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1463 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1467 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1469 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1470 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1472 %b = load <8 x i64>, <8 x i64>* %ptr
1473 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1477 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1478 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1479 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1481 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1482 ; CHECK-LABEL: test_vsubps_rn
1483 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
1484 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1485 <16 x float> zeroinitializer, i16 -1, i32 0)
1486 ret <16 x float> %res
1489 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1490 ; CHECK-LABEL: test_vsubps_rd
1491 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
1492 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1493 <16 x float> zeroinitializer, i16 -1, i32 1)
1494 ret <16 x float> %res
1497 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1498 ; CHECK-LABEL: test_vsubps_ru
1499 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
1500 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1501 <16 x float> zeroinitializer, i16 -1, i32 2)
1502 ret <16 x float> %res
1505 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1506 ; CHECK-LABEL: test_vsubps_rz
1507 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
1508 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1509 <16 x float> zeroinitializer, i16 -1, i32 3)
1510 ret <16 x float> %res
1513 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1514 ; CHECK-LABEL: test_vmulps_rn
1515 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
1516 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1517 <16 x float> zeroinitializer, i16 -1, i32 0)
1518 ret <16 x float> %res
1521 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1522 ; CHECK-LABEL: test_vmulps_rd
1523 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
1524 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1525 <16 x float> zeroinitializer, i16 -1, i32 1)
1526 ret <16 x float> %res
1529 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1530 ; CHECK-LABEL: test_vmulps_ru
1531 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
1532 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1533 <16 x float> zeroinitializer, i16 -1, i32 2)
1534 ret <16 x float> %res
1537 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1538 ; CHECK-LABEL: test_vmulps_rz
1539 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
1540 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1541 <16 x float> zeroinitializer, i16 -1, i32 3)
1542 ret <16 x float> %res
1546 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1547 ; CHECK-LABEL: test_vmulps_mask_rn
1548 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
1549 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1550 <16 x float> zeroinitializer, i16 %mask, i32 0)
1551 ret <16 x float> %res
1554 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1555 ; CHECK-LABEL: test_vmulps_mask_rd
1556 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
1557 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1558 <16 x float> zeroinitializer, i16 %mask, i32 1)
1559 ret <16 x float> %res
1562 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1563 ; CHECK-LABEL: test_vmulps_mask_ru
1564 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
1565 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1566 <16 x float> zeroinitializer, i16 %mask, i32 2)
1567 ret <16 x float> %res
1570 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1571 ; CHECK-LABEL: test_vmulps_mask_rz
1572 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
1573 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1574 <16 x float> zeroinitializer, i16 %mask, i32 3)
1575 ret <16 x float> %res
1578 ;; With Passthru value
1579 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1580 ; CHECK-LABEL: test_vmulps_mask_passthru_rn
1581 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
1582 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1583 <16 x float> %passthru, i16 %mask, i32 0)
1584 ret <16 x float> %res
1587 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1588 ; CHECK-LABEL: test_vmulps_mask_passthru_rd
1589 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
1590 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1591 <16 x float> %passthru, i16 %mask, i32 1)
1592 ret <16 x float> %res
1595 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1596 ; CHECK-LABEL: test_vmulps_mask_passthru_ru
1597 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
1598 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1599 <16 x float> %passthru, i16 %mask, i32 2)
1600 ret <16 x float> %res
1603 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1604 ; CHECK-LABEL: test_vmulps_mask_passthru_rz
1605 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
1606 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1607 <16 x float> %passthru, i16 %mask, i32 3)
1608 ret <16 x float> %res
1612 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1613 ; CHECK-LABEL: test_vmulpd_mask_rn
1614 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
1615 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1616 <8 x double> zeroinitializer, i8 %mask, i32 0)
1617 ret <8 x double> %res
1620 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1621 ; CHECK-LABEL: test_vmulpd_mask_rd
1622 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
1623 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1624 <8 x double> zeroinitializer, i8 %mask, i32 1)
1625 ret <8 x double> %res
1628 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1629 ; CHECK-LABEL: test_vmulpd_mask_ru
1630 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
1631 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1632 <8 x double> zeroinitializer, i8 %mask, i32 2)
1633 ret <8 x double> %res
1636 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1637 ; CHECK-LABEL: test_vmulpd_mask_rz
1638 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
1639 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1640 <8 x double> zeroinitializer, i8 %mask, i32 3)
1641 ret <8 x double> %res
1644 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1645 ;CHECK-LABEL: test_xor_epi32
1646 ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1647 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1648 ret < 16 x i32> %res
1651 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1652 ;CHECK-LABEL: test_mask_xor_epi32
1653 ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1654 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1655 ret < 16 x i32> %res
1658 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1660 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1661 ;CHECK-LABEL: test_or_epi32
1662 ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1663 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1664 ret < 16 x i32> %res
1667 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1668 ;CHECK-LABEL: test_mask_or_epi32
1669 ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1670 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1671 ret < 16 x i32> %res
1674 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1676 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1677 ;CHECK-LABEL: test_and_epi32
1678 ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1679 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1680 ret < 16 x i32> %res
1683 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1684 ;CHECK-LABEL: test_mask_and_epi32
1685 ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1686 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1687 ret < 16 x i32> %res
1690 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1692 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1693 ;CHECK-LABEL: test_xor_epi64
1694 ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1695 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1699 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1700 ;CHECK-LABEL: test_mask_xor_epi64
1701 ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1702 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1706 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1708 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1709 ;CHECK-LABEL: test_or_epi64
1710 ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1711 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1715 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1716 ;CHECK-LABEL: test_mask_or_epi64
1717 ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1718 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1722 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1724 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
1725 ;CHECK-LABEL: test_and_epi64
1726 ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
1727 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1731 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1732 ;CHECK-LABEL: test_mask_and_epi64
1733 ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1734 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1738 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1741 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1742 ;CHECK-LABEL: test_mask_add_epi32_rr
1743 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
1744 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1745 ret < 16 x i32> %res
1748 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1749 ;CHECK-LABEL: test_mask_add_epi32_rrk
1750 ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1751 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1752 ret < 16 x i32> %res
1755 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1756 ;CHECK-LABEL: test_mask_add_epi32_rrkz
1757 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1758 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1759 ret < 16 x i32> %res
1762 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1763 ;CHECK-LABEL: test_mask_add_epi32_rm
1764 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
1765 %b = load <16 x i32>, <16 x i32>* %ptr_b
1766 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1767 ret < 16 x i32> %res
1770 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1771 ;CHECK-LABEL: test_mask_add_epi32_rmk
1772 ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
1773 %b = load <16 x i32>, <16 x i32>* %ptr_b
1774 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1775 ret < 16 x i32> %res
1778 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1779 ;CHECK-LABEL: test_mask_add_epi32_rmkz
1780 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
1781 %b = load <16 x i32>, <16 x i32>* %ptr_b
1782 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1783 ret < 16 x i32> %res
1786 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1787 ;CHECK-LABEL: test_mask_add_epi32_rmb
1788 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
1789 %q = load i32, i32* %ptr_b
1790 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1791 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1792 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1793 ret < 16 x i32> %res
1796 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1797 ;CHECK-LABEL: test_mask_add_epi32_rmbk
1798 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
1799 %q = load i32, i32* %ptr_b
1800 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1801 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1802 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1803 ret < 16 x i32> %res
1806 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1807 ;CHECK-LABEL: test_mask_add_epi32_rmbkz
1808 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
1809 %q = load i32, i32* %ptr_b
1810 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1811 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1812 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1813 ret < 16 x i32> %res
1816 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1818 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1819 ;CHECK-LABEL: test_mask_sub_epi32_rr
1820 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
1821 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1822 ret < 16 x i32> %res
1825 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1826 ;CHECK-LABEL: test_mask_sub_epi32_rrk
1827 ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
1828 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1829 ret < 16 x i32> %res
1832 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1833 ;CHECK-LABEL: test_mask_sub_epi32_rrkz
1834 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
1835 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1836 ret < 16 x i32> %res
1839 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1840 ;CHECK-LABEL: test_mask_sub_epi32_rm
1841 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
1842 %b = load <16 x i32>, <16 x i32>* %ptr_b
1843 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1844 ret < 16 x i32> %res
1847 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1848 ;CHECK-LABEL: test_mask_sub_epi32_rmk
1849 ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
1850 %b = load <16 x i32>, <16 x i32>* %ptr_b
1851 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1852 ret < 16 x i32> %res
1855 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1856 ;CHECK-LABEL: test_mask_sub_epi32_rmkz
1857 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
1858 %b = load <16 x i32>, <16 x i32>* %ptr_b
1859 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1860 ret < 16 x i32> %res
1863 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1864 ;CHECK-LABEL: test_mask_sub_epi32_rmb
1865 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
1866 %q = load i32, i32* %ptr_b
1867 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1868 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1869 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1870 ret < 16 x i32> %res
1873 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1874 ;CHECK-LABEL: test_mask_sub_epi32_rmbk
1875 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
1876 %q = load i32, i32* %ptr_b
1877 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1878 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1879 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1880 ret < 16 x i32> %res
1883 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1884 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
1885 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
1886 %q = load i32, i32* %ptr_b
1887 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1888 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1889 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1890 ret < 16 x i32> %res
1893 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1895 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1896 ;CHECK-LABEL: test_mask_add_epi64_rr
1897 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
1898 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1902 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1903 ;CHECK-LABEL: test_mask_add_epi64_rrk
1904 ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
1905 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1909 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1910 ;CHECK-LABEL: test_mask_add_epi64_rrkz
1911 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
1912 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1916 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1917 ;CHECK-LABEL: test_mask_add_epi64_rm
1918 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
1919 %b = load <8 x i64>, <8 x i64>* %ptr_b
1920 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1924 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1925 ;CHECK-LABEL: test_mask_add_epi64_rmk
1926 ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
1927 %b = load <8 x i64>, <8 x i64>* %ptr_b
1928 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1932 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1933 ;CHECK-LABEL: test_mask_add_epi64_rmkz
1934 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
1935 %b = load <8 x i64>, <8 x i64>* %ptr_b
1936 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1940 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1941 ;CHECK-LABEL: test_mask_add_epi64_rmb
1942 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
1943 %q = load i64, i64* %ptr_b
1944 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1945 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1946 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1950 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1951 ;CHECK-LABEL: test_mask_add_epi64_rmbk
1952 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
1953 %q = load i64, i64* %ptr_b
1954 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1955 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1956 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1960 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
1961 ;CHECK-LABEL: test_mask_add_epi64_rmbkz
1962 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
1963 %q = load i64, i64* %ptr_b
1964 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1965 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1966 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1970 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1972 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1973 ;CHECK-LABEL: test_mask_sub_epi64_rr
1974 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
1975 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1979 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1980 ;CHECK-LABEL: test_mask_sub_epi64_rrk
1981 ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
1982 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1986 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1987 ;CHECK-LABEL: test_mask_sub_epi64_rrkz
1988 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
1989 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1993 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1994 ;CHECK-LABEL: test_mask_sub_epi64_rm
1995 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
1996 %b = load <8 x i64>, <8 x i64>* %ptr_b
1997 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2001 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2002 ;CHECK-LABEL: test_mask_sub_epi64_rmk
2003 ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
2004 %b = load <8 x i64>, <8 x i64>* %ptr_b
2005 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2009 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2010 ;CHECK-LABEL: test_mask_sub_epi64_rmkz
2011 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
2012 %b = load <8 x i64>, <8 x i64>* %ptr_b
2013 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2017 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2018 ;CHECK-LABEL: test_mask_sub_epi64_rmb
2019 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
2020 %q = load i64, i64* %ptr_b
2021 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2022 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2023 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2027 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2028 ;CHECK-LABEL: test_mask_sub_epi64_rmbk
2029 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
2030 %q = load i64, i64* %ptr_b
2031 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2032 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2033 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2037 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2038 ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
2039 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
2040 %q = load i64, i64* %ptr_b
2041 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2042 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2043 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2047 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2049 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2050 ;CHECK-LABEL: test_mask_mul_epi32_rr
2051 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
2052 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2056 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2057 ;CHECK-LABEL: test_mask_mul_epi32_rrk
2058 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
2059 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2063 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2064 ;CHECK-LABEL: test_mask_mul_epi32_rrkz
2065 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
2066 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2070 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2071 ;CHECK-LABEL: test_mask_mul_epi32_rm
2072 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
2073 %b = load <16 x i32>, <16 x i32>* %ptr_b
2074 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2078 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2079 ;CHECK-LABEL: test_mask_mul_epi32_rmk
2080 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
2081 %b = load <16 x i32>, <16 x i32>* %ptr_b
2082 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2086 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2087 ;CHECK-LABEL: test_mask_mul_epi32_rmkz
2088 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
2089 %b = load <16 x i32>, <16 x i32>* %ptr_b
2090 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2094 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
2095 ;CHECK-LABEL: test_mask_mul_epi32_rmb
2096 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
2097 %q = load i64, i64* %ptr_b
2098 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2099 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2100 %b = bitcast <8 x i64> %b64 to <16 x i32>
2101 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2105 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2106 ;CHECK-LABEL: test_mask_mul_epi32_rmbk
2107 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
2108 %q = load i64, i64* %ptr_b
2109 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2110 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2111 %b = bitcast <8 x i64> %b64 to <16 x i32>
2112 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2116 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2117 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
2118 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
2119 %q = load i64, i64* %ptr_b
2120 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2121 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2122 %b = bitcast <8 x i64> %b64 to <16 x i32>
2123 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2127 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2129 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
2130 ;CHECK-LABEL: test_mask_mul_epu32_rr
2131 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
2132 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2136 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2137 ;CHECK-LABEL: test_mask_mul_epu32_rrk
2138 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
2139 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2143 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2144 ;CHECK-LABEL: test_mask_mul_epu32_rrkz
2145 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
2146 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2150 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2151 ;CHECK-LABEL: test_mask_mul_epu32_rm
2152 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
2153 %b = load <16 x i32>, <16 x i32>* %ptr_b
2154 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2158 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2159 ;CHECK-LABEL: test_mask_mul_epu32_rmk
2160 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
2161 %b = load <16 x i32>, <16 x i32>* %ptr_b
2162 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2166 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2167 ;CHECK-LABEL: test_mask_mul_epu32_rmkz
2168 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
2169 %b = load <16 x i32>, <16 x i32>* %ptr_b
2170 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2174 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
2175 ;CHECK-LABEL: test_mask_mul_epu32_rmb
2176 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
2177 %q = load i64, i64* %ptr_b
2178 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2179 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2180 %b = bitcast <8 x i64> %b64 to <16 x i32>
2181 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2185 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2186 ;CHECK-LABEL: test_mask_mul_epu32_rmbk
2187 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
2188 %q = load i64, i64* %ptr_b
2189 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2190 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2191 %b = bitcast <8 x i64> %b64 to <16 x i32>
2192 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2196 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2197 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
2198 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
2199 %q = load i64, i64* %ptr_b
2200 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2201 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2202 %b = bitcast <8 x i64> %b64 to <16 x i32>
2203 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2207 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2209 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2210 ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
2211 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2212 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2216 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2217 ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
2218 ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2219 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2220 ret < 16 x i32> %res
2223 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2224 ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
2225 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2226 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2227 ret < 16 x i32> %res
2230 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2231 ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
2232 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2233 %b = load <16 x i32>, <16 x i32>* %ptr_b
2234 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2235 ret < 16 x i32> %res
2238 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2239 ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
2240 ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2241 %b = load <16 x i32>, <16 x i32>* %ptr_b
2242 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2243 ret < 16 x i32> %res
2246 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2247 ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
2248 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2249 %b = load <16 x i32>, <16 x i32>* %ptr_b
2250 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2251 ret < 16 x i32> %res
2254 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2255 ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
2256 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2257 %q = load i32, i32* %ptr_b
2258 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2259 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2260 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2261 ret < 16 x i32> %res
2264 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2265 ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
2266 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2267 %q = load i32, i32* %ptr_b
2268 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2269 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2270 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2271 ret < 16 x i32> %res
2274 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2275 ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
2276 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2277 %q = load i32, i32* %ptr_b
2278 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2279 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2280 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2281 ret < 16 x i32> %res
2284 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2286 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2287 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
2288 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2289 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2290 ret <16 x float> %res
2292 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2293 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
2294 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2295 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2296 ret <16 x float> %res
2298 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2299 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
2300 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2301 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2302 ret <16 x float> %res
2305 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2306 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
2307 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2308 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2309 ret <16 x float> %res
2313 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2314 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
2315 ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
2316 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2317 ret <16 x float> %res
2320 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2321 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
2322 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2323 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2324 ret <16 x float> %res
2326 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2327 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
2328 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2329 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2330 ret <16 x float> %res
2332 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2333 ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
2334 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2335 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2336 ret <16 x float> %res
2339 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2340 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
2341 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2342 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2343 ret <16 x float> %res
2347 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2348 ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
2349 ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
2350 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2351 ret <16 x float> %res
2355 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2356 ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
2357 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
2358 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2359 ret <16 x float> %res
2361 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2362 ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
2363 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2364 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2365 ret <16 x float> %res
2367 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2368 ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
2369 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
2370 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2371 ret <16 x float> %res
2374 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2375 ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
2376 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
2377 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2378 ret <16 x float> %res
2381 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2382 ;CHECK-LABEL: test_mm512_add_round_ps_current
2383 ;CHECK: vaddps %zmm1, %zmm0, %zmm0
2384 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2385 ret <16 x float> %res
2387 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2389 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2390 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
2391 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2392 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2393 ret <16 x float> %res
2395 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2396 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
2397 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2398 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2399 ret <16 x float> %res
2401 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2402 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
2403 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2404 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2405 ret <16 x float> %res
2408 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2409 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
2410 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2411 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2412 ret <16 x float> %res
2416 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2417 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
2418 ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
2419 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2420 ret <16 x float> %res
2423 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2424 ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
2425 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2426 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2427 ret <16 x float> %res
2429 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2430 ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
2431 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2432 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2433 ret <16 x float> %res
2435 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2436 ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
2437 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2438 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2439 ret <16 x float> %res
2442 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2443 ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
2444 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2445 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2446 ret <16 x float> %res
2449 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2450 ;CHECK-LABEL: test_mm512_sub_round_ps_current
2451 ;CHECK: vsubps %zmm1, %zmm0, %zmm0
2452 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2453 ret <16 x float> %res
2456 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2457 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
2458 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2459 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2460 ret <16 x float> %res
2462 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2463 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
2464 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2465 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2466 ret <16 x float> %res
2468 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2469 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
2470 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2471 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2472 ret <16 x float> %res
2475 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2476 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
2477 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2478 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2479 ret <16 x float> %res
2483 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2484 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
2485 ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
2486 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2487 ret <16 x float> %res
2490 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2491 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
2492 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2493 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2494 ret <16 x float> %res
2496 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2497 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
2498 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2499 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2500 ret <16 x float> %res
2502 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2503 ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
2504 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2505 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2506 ret <16 x float> %res
2509 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2510 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
2511 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2512 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2513 ret <16 x float> %res
2517 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2518 ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
2519 ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
2520 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2521 ret <16 x float> %res
2525 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2526 ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
2527 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
2528 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2529 ret <16 x float> %res
2531 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2532 ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
2533 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2534 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2535 ret <16 x float> %res
2537 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2538 ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
2539 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
2540 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2541 ret <16 x float> %res
2544 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2545 ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
2546 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
2547 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2548 ret <16 x float> %res
2551 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2552 ;CHECK-LABEL: test_mm512_div_round_ps_current
2553 ;CHECK: vdivps %zmm1, %zmm0, %zmm0
2554 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2555 ret <16 x float> %res
2557 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2559 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2560 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
2561 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2562 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2563 ret <16 x float> %res
2566 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2567 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
2568 ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
2569 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2570 ret <16 x float> %res
2573 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2574 ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
2575 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2576 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2577 ret <16 x float> %res
2580 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2581 ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
2582 ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
2583 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2584 ret <16 x float> %res
2587 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2588 ;CHECK-LABEL: test_mm512_min_round_ps_sae
2589 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
2590 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2591 ret <16 x float> %res
2594 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2595 ;CHECK-LABEL: test_mm512_min_round_ps_current
2596 ;CHECK: vminps %zmm1, %zmm0, %zmm0
2597 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2598 ret <16 x float> %res
2600 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2602 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2603 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
2604 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2605 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2606 ret <16 x float> %res
2609 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2610 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
2611 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
2612 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2613 ret <16 x float> %res
2616 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2617 ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
2618 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2619 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2620 ret <16 x float> %res
2623 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2624 ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
2625 ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
2626 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2627 ret <16 x float> %res
2630 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2631 ;CHECK-LABEL: test_mm512_max_round_ps_sae
2632 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
2633 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2634 ret <16 x float> %res
2637 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2638 ;CHECK-LABEL: test_mm512_max_round_ps_current
2639 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
2640 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2641 ret <16 x float> %res
2643 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2645 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2647 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2648 ; CHECK-LABEL: test_mask_add_ss_rn
2649 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2650 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
2651 ret <4 x float> %res
2654 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2655 ; CHECK-LABEL: test_mask_add_ss_rd
2656 ; CHECK: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2657 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
2658 ret <4 x float> %res
2661 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2662 ; CHECK-LABEL: test_mask_add_ss_ru
2663 ; CHECK: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2664 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
2665 ret <4 x float> %res
2668 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2669 ; CHECK-LABEL: test_mask_add_ss_rz
2670 ; CHECK: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2671 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
2672 ret <4 x float> %res
2675 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2676 ; CHECK-LABEL: test_mask_add_ss_current
2677 ; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
2678 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2679 ret <4 x float> %res
2682 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2683 ; CHECK-LABEL: test_maskz_add_ss_rn
2684 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2685 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
2686 ret <4 x float> %res
2689 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
2690 ; CHECK-LABEL: test_add_ss_rn
2691 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
2692 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
2693 ret <4 x float> %res
2696 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2698 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2699 ; CHECK-LABEL: test_mask_add_sd_rn
2700 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2701 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
2702 ret <2 x double> %res
2705 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2706 ; CHECK-LABEL: test_mask_add_sd_rd
2707 ; CHECK: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2708 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
2709 ret <2 x double> %res
2712 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2713 ; CHECK-LABEL: test_mask_add_sd_ru
2714 ; CHECK: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2715 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
2716 ret <2 x double> %res
2719 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2720 ; CHECK-LABEL: test_mask_add_sd_rz
2721 ; CHECK: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2722 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
2723 ret <2 x double> %res
2726 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2727 ; CHECK-LABEL: test_mask_add_sd_current
2728 ; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
2729 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2730 ret <2 x double> %res
2733 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2734 ; CHECK-LABEL: test_maskz_add_sd_rn
2735 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2736 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
2737 ret <2 x double> %res
2740 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
2741 ; CHECK-LABEL: test_add_sd_rn
2742 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
2743 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
2744 ret <2 x double> %res
2747 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2749 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2750 ; CHECK-LABEL: test_mask_max_ss_sae
2751 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2752 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
2753 ret <4 x float> %res
2756 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2757 ; CHECK-LABEL: test_maskz_max_ss_sae
2758 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2759 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
2760 ret <4 x float> %res
2763 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
2764 ; CHECK-LABEL: test_max_ss_sae
2765 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0
2766 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
2767 ret <4 x float> %res
2770 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2771 ; CHECK-LABEL: test_mask_max_ss
2772 ; CHECK: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
2773 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2774 ret <4 x float> %res
2777 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2778 ; CHECK-LABEL: test_maskz_max_ss
2779 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
2780 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
2781 ret <4 x float> %res
2784 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
2785 ; CHECK-LABEL: test_max_ss
2786 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0
2787 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
2788 ret <4 x float> %res
2790 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2792 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2793 ; CHECK-LABEL: test_mask_max_sd_sae
2794 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2795 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
2796 ret <2 x double> %res
2799 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2800 ; CHECK-LABEL: test_maskz_max_sd_sae
2801 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2802 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
2803 ret <2 x double> %res
2806 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
2807 ; CHECK-LABEL: test_max_sd_sae
2808 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
2809 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
2810 ret <2 x double> %res
2813 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2814 ; CHECK-LABEL: test_mask_max_sd
2815 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
2816 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2817 ret <2 x double> %res
2820 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2821 ; CHECK-LABEL: test_maskz_max_sd
2822 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
2823 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
2824 ret <2 x double> %res
2827 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
2828 ; CHECK-LABEL: test_max_sd
2829 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0
2830 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
2831 ret <2 x double> %res
2834 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
2835 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
2837 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
2839 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
2840 ret <2 x double> %res
2842 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
2844 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
2845 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
2847 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
2849 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
2850 ret <2 x double> %res
2852 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
2854 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
2855 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
2857 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
2859 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
2860 ret <4 x float> %res
2862 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
2864 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
2865 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
2867 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
2869 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
2870 ret <4 x float> %res
2872 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
2874 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
2875 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
2877 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
2880 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2881 ret <4 x float> %res
2884 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
2885 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
2887 ; CHECK-NEXT: movl (%rdi), %eax
2888 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
2891 %b = load i32, i32* %ptr
2892 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2893 ret <4 x float> %res
2896 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
2897 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
2899 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
2902 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2903 ret <4 x float> %res
2906 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
2907 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
2909 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
2912 %b = load i32, i32* %ptr
2913 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2914 ret <4 x float> %res
2916 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
2918 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
2919 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
2921 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
2924 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
2925 ret <4 x float> %res
2928 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
2929 ; CHECK-LABEL: _mm_cvtu64_ss:
2931 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
2934 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
2935 ret <4 x float> %res
2937 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
2939 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
2940 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
2942 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
2945 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
2946 ret <2 x double> %res
2948 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
2950 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
2951 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
2953 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
2956 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
2957 ret <2 x double> %res
2960 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
2961 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
2963 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
2966 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
2967 ret <2 x double> %res
2969 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
2971 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
2972 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
2973 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
2974 <8 x i64>zeroinitializer, i8 -1)
2977 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2979 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
2980 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
2981 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
2982 <16 x i32>zeroinitializer, i16 -1)
2985 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2987 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
2988 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
2989 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
2990 <16 x i32>zeroinitializer, i16 -1)
2993 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2995 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512
2997 ; CHECK: vpmaxsd %zmm
2999 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3000 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3001 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3002 %res2 = add <16 x i32> %res, %res1
3003 ret <16 x i32> %res2
3006 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512
3008 ; CHECK: vpmaxsq %zmm
3010 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3011 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3012 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3013 %res2 = add <8 x i64> %res, %res1
3017 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3019 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512
3021 ; CHECK: vpmaxud %zmm
3023 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3024 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3025 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3026 %res2 = add <16 x i32> %res, %res1
3027 ret <16 x i32> %res2
3030 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3032 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512
3034 ; CHECK: vpmaxuq %zmm
3036 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3037 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3038 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3039 %res2 = add <8 x i64> %res, %res1
3043 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3045 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512
3047 ; CHECK: vpminsd %zmm
3049 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3050 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3051 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3052 %res2 = add <16 x i32> %res, %res1
3053 ret <16 x i32> %res2
3056 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3058 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512
3060 ; CHECK: vpminsq %zmm
3062 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3063 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3064 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3065 %res2 = add <8 x i64> %res, %res1
3069 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512
3071 ; CHECK: vpminud %zmm
3073 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3074 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3075 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3076 %res2 = add <16 x i32> %res, %res1
3077 ret <16 x i32> %res2
3080 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3082 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512
3084 ; CHECK: vpminuq %zmm
3086 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3087 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3088 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3089 %res2 = add <8 x i64> %res, %res1
3093 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3095 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
3098 ; CHECK: vpermi2d {{.*}}{%k1}
3099 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3100 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3101 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3102 %res2 = add <16 x i32> %res, %res1
3103 ret <16 x i32> %res2
3106 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
3108 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
3111 ; CHECK: vpermi2pd {{.*}}{%k1}
3112 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
3113 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
3114 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
3115 %res2 = fadd <8 x double> %res, %res1
3116 ret <8 x double> %res2
3119 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
3121 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
3124 ; CHECK: vpermi2ps {{.*}}{%k1}
3125 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
3126 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
3127 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
3128 %res2 = fadd <16 x float> %res, %res1
3129 ret <16 x float> %res2
3132 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3134 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
3137 ; CHECK: vpermi2q {{.*}}{%k1}
3138 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3139 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3140 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3141 %res2 = add <8 x i64> %res, %res1
3145 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3147 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
3150 ; CHECK: vpermt2d {{.*}}{%k1} {z}
3151 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3152 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3153 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3154 %res2 = add <16 x i32> %res, %res1
3155 ret <16 x i32> %res2
3158 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
3160 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
3163 ; CHECK: vpermt2pd {{.*}}{%k1} {z}
3164 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3165 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3166 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3167 %res2 = fadd <8 x double> %res, %res1
3168 ret <8 x double> %res2
3171 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
3173 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
3176 ; CHECK: vpermt2ps {{.*}}{%k1} {z}
3177 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3178 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3179 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3180 %res2 = fadd <16 x float> %res, %res1
3181 ret <16 x float> %res2
3185 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3187 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
3190 ; CHECK: vpermt2q {{.*}}{%k1} {z}
3191 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3192 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3193 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3194 %res2 = add <8 x i64> %res, %res1
3198 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3200 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
3203 ; CHECK: vpermt2d {{.*}}{%k1}
3205 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3206 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3207 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3208 %res2 = add <16 x i32> %res, %res1
3209 ret <16 x i32> %res2
3212 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
3213 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512
3216 ; CHECK: vscalefpd{{.*}}{%k1}
3217 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3218 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
3219 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
3220 %res2 = fadd <8 x double> %res, %res1
3221 ret <8 x double> %res2
3224 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3225 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512
3228 ; CHECK: vscalefps{{.*}}{%k1}
3229 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3230 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
3231 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
3232 %res2 = fadd <16 x float> %res, %res1
3233 ret <16 x float> %res2
3236 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
3238 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3239 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
3241 ; CHECK-NEXT: movzbl %dil, %eax
3242 ; CHECK-NEXT: kmovw %eax, %k1
3243 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1}
3244 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm0
3245 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3246 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3247 %res2 = fadd <8 x double> %res, %res1
3248 ret <8 x double> %res2
3251 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
3253 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3254 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
3256 ; CHECK-NEXT: kmovw %edi, %k1
3257 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1}
3258 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm0
3259 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3260 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3261 %res2 = fadd <16 x float> %res, %res1
3262 ret <16 x float> %res2
3265 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
3267 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3268 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
3270 ; CHECK-NEXT: movzbl %dil, %eax
3271 ; CHECK-NEXT: kmovw %eax, %k1
3272 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1}
3273 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm0
3274 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3275 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3276 %res2 = fadd <8 x double> %res, %res1
3277 ret <8 x double> %res2
3280 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
3282 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3283 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
3285 ; CHECK-NEXT: kmovw %edi, %k1
3286 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1}
3287 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm0
3288 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3289 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3290 %res2 = fadd <16 x float> %res, %res1
3291 ret <16 x float> %res2
3294 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3296 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3297 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
3299 ; CHECK-NEXT: movzbl %dil, %eax
3300 ; CHECK-NEXT: kmovw %eax, %k1
3301 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1}
3302 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z}
3303 ; CHECK-NEXT: vpunpcklqdq {{.*#+}}
3304 ; CHECK: vpaddq %zmm0, %zmm2, %zmm0
3305 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
3307 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3308 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3309 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
3310 %res3 = add <8 x i64> %res, %res1
3311 %res4 = add <8 x i64> %res2, %res3
3315 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3317 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3318 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
3320 ; CHECK-NEXT: movzbl %dil, %eax
3321 ; CHECK-NEXT: kmovw %eax, %k1
3322 ; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1}
3323 ; CHECK-NEXT: vpunpckhqdq {{.*#+}}
3324 ; CHECK: vpaddq %zmm0, %zmm2, %zmm0
3326 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3327 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3328 %res2 = add <8 x i64> %res, %res1
3332 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3334 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3335 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
3337 ; CHECK-NEXT: kmovw %edi, %k1
3338 ; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1}
3339 ; CHECK-NEXT: vpunpckhdq {{.*#+}}
3340 ; CHECK: vpaddd %zmm0, %zmm2, %zmm0
3342 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3343 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3344 %res2 = add <16 x i32> %res, %res1
3345 ret <16 x i32> %res2
3348 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3350 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3351 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
3353 ; CHECK-NEXT: kmovw %edi, %k1
3354 ; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1}
3355 ; CHECK-NEXT: vpunpckldq {{.*#+}}
3356 ; CHECK: vpaddd %zmm0, %zmm2, %zmm0
3358 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3359 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3360 %res2 = add <16 x i32> %res, %res1
3361 ret <16 x i32> %res2
3364 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
3366 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3367 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
3368 ; CHECK: vpmovqb %zmm0, %xmm1 {%k1}
3369 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
3370 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0
3371 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3372 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3373 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3374 %res3 = add <16 x i8> %res0, %res1
3375 %res4 = add <16 x i8> %res3, %res2
3379 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3381 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3382 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
3383 ; CHECK: vpmovqb %zmm0, (%rdi)
3384 ; CHECK: vpmovqb %zmm0, (%rdi) {%k1}
3385 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3386 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3390 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
3392 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3393 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
3394 ; CHECK: vpmovsqb %zmm0, %xmm1 {%k1}
3395 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
3396 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
3397 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3398 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3399 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3400 %res3 = add <16 x i8> %res0, %res1
3401 %res4 = add <16 x i8> %res3, %res2
3405 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3407 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3408 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
3409 ; CHECK: vpmovsqb %zmm0, (%rdi)
3410 ; CHECK: vpmovsqb %zmm0, (%rdi) {%k1}
3411 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3412 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3416 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
3418 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3419 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
3420 ; CHECK: vpmovusqb %zmm0, %xmm1 {%k1}
3421 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
3422 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
3423 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3424 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3425 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3426 %res3 = add <16 x i8> %res0, %res1
3427 %res4 = add <16 x i8> %res3, %res2
3431 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3433 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3434 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
3435 ; CHECK: vpmovusqb %zmm0, (%rdi)
3436 ; CHECK: vpmovusqb %zmm0, (%rdi) {%k1}
3437 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3438 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3442 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
3444 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3445 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
3446 ; CHECK: vpmovqw %zmm0, %xmm1 {%k1}
3447 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
3448 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0
3449 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3450 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3451 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3452 %res3 = add <8 x i16> %res0, %res1
3453 %res4 = add <8 x i16> %res3, %res2
3457 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3459 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3460 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
3461 ; CHECK: vpmovqw %zmm0, (%rdi)
3462 ; CHECK: vpmovqw %zmm0, (%rdi) {%k1}
3463 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3464 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3468 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
3470 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3471 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
3472 ; CHECK: vpmovsqw %zmm0, %xmm1 {%k1}
3473 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
3474 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
3475 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3476 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3477 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3478 %res3 = add <8 x i16> %res0, %res1
3479 %res4 = add <8 x i16> %res3, %res2
3483 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3485 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3486 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
3487 ; CHECK: vpmovsqw %zmm0, (%rdi)
3488 ; CHECK: vpmovsqw %zmm0, (%rdi) {%k1}
3489 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3490 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3494 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
3496 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3497 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
3498 ; CHECK: vpmovusqw %zmm0, %xmm1 {%k1}
3499 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
3500 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
3501 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3502 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3503 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3504 %res3 = add <8 x i16> %res0, %res1
3505 %res4 = add <8 x i16> %res3, %res2
3509 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3511 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3512 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
3513 ; CHECK: vpmovusqw %zmm0, (%rdi)
3514 ; CHECK: vpmovusqw %zmm0, (%rdi) {%k1}
3515 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3516 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3520 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
3522 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3523 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
3524 ; CHECK: vpmovqd %zmm0, %ymm1 {%k1}
3525 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
3526 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0
3527 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3528 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3529 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3530 %res3 = add <8 x i32> %res0, %res1
3531 %res4 = add <8 x i32> %res3, %res2
3535 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3537 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3538 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
3539 ; CHECK: vpmovqd %zmm0, (%rdi)
3540 ; CHECK: vpmovqd %zmm0, (%rdi) {%k1}
3541 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3542 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3546 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
3548 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3549 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
3550 ; CHECK: vpmovsqd %zmm0, %ymm1 {%k1}
3551 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
3552 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
3553 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3554 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3555 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3556 %res3 = add <8 x i32> %res0, %res1
3557 %res4 = add <8 x i32> %res3, %res2
3561 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3563 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3564 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
3565 ; CHECK: vpmovsqd %zmm0, (%rdi)
3566 ; CHECK: vpmovsqd %zmm0, (%rdi) {%k1}
3567 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3568 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3572 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
3574 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3575 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
3576 ; CHECK: vpmovusqd %zmm0, %ymm1 {%k1}
3577 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
3578 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
3579 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3580 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3581 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3582 %res3 = add <8 x i32> %res0, %res1
3583 %res4 = add <8 x i32> %res3, %res2
3587 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3589 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3590 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
3591 ; CHECK: vpmovusqd %zmm0, (%rdi)
3592 ; CHECK: vpmovusqd %zmm0, (%rdi) {%k1}
3593 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3594 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3598 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
3600 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3601 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
3602 ; CHECK: vpmovdb %zmm0, %xmm1 {%k1}
3603 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
3604 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0
3605 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3606 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3607 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3608 %res3 = add <16 x i8> %res0, %res1
3609 %res4 = add <16 x i8> %res3, %res2
3613 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
3615 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3616 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
3617 ; CHECK: vpmovdb %zmm0, (%rdi)
3618 ; CHECK: vpmovdb %zmm0, (%rdi) {%k1}
3619 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3620 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3624 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
3626 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3627 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
3628 ; CHECK: vpmovsdb %zmm0, %xmm1 {%k1}
3629 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
3630 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
3631 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3632 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3633 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3634 %res3 = add <16 x i8> %res0, %res1
3635 %res4 = add <16 x i8> %res3, %res2
3639 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
3641 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3642 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
3643 ; CHECK: vpmovsdb %zmm0, (%rdi)
3644 ; CHECK: vpmovsdb %zmm0, (%rdi) {%k1}
3645 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3646 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3650 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
3652 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3653 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
3654 ; CHECK: vpmovusdb %zmm0, %xmm1 {%k1}
3655 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
3656 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
3657 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3658 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3659 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3660 %res3 = add <16 x i8> %res0, %res1
3661 %res4 = add <16 x i8> %res3, %res2
3665 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
3667 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3668 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
3669 ; CHECK: vpmovusdb %zmm0, (%rdi)
3670 ; CHECK: vpmovusdb %zmm0, (%rdi) {%k1}
3671 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3672 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3676 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
3678 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3679 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
3680 ; CHECK: vpmovdw %zmm0, %ymm1 {%k1}
3681 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
3682 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0
3683 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3684 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3685 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3686 %res3 = add <16 x i16> %res0, %res1
3687 %res4 = add <16 x i16> %res3, %res2
3688 ret <16 x i16> %res4
3691 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3693 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3694 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
3695 ; CHECK: vpmovdw %zmm0, (%rdi)
3696 ; CHECK: vpmovdw %zmm0, (%rdi) {%k1}
3697 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3698 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3702 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
3704 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3705 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
3706 ; CHECK: vpmovsdw %zmm0, %ymm1 {%k1}
3707 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
3708 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
3709 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3710 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3711 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3712 %res3 = add <16 x i16> %res0, %res1
3713 %res4 = add <16 x i16> %res3, %res2
3714 ret <16 x i16> %res4
3717 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3719 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3720 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
3721 ; CHECK: vpmovsdw %zmm0, (%rdi)
3722 ; CHECK: vpmovsdw %zmm0, (%rdi) {%k1}
3723 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3724 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3728 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
3730 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3731 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
3732 ; CHECK: vpmovusdw %zmm0, %ymm1 {%k1}
3733 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
3734 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
3735 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3736 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3737 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3738 %res3 = add <16 x i16> %res0, %res1
3739 %res4 = add <16 x i16> %res3, %res2
3740 ret <16 x i16> %res4
3743 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3745 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3746 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
3747 ; CHECK: vpmovusdw %zmm0, (%rdi)
3748 ; CHECK: vpmovusdw %zmm0, (%rdi) {%k1}
3749 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3750 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3754 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
3756 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3757 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
3759 ; CHECK-NEXT: movzbl %dil, %eax
3760 ; CHECK-NEXT: kmovw %eax, %k1
3761 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
3762 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
3763 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3765 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3766 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3767 %res2 = fadd <8 x double> %res, %res1
3768 ret <8 x double> %res2
3771 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3773 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3774 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
3776 ; CHECK-NEXT: kmovw %edi, %k1
3777 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1}
3778 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
3779 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3781 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3782 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3783 %res2 = fadd <16 x float> %res, %res1
3784 ret <16 x float> %res2
3787 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3789 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3790 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
3792 ; CHECK-NEXT: movzbl %dil, %eax
3793 ; CHECK-NEXT: kmovw %eax, %k1
3794 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1}
3795 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0
3796 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3798 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3799 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3800 %res2 = add <8 x i32> %res, %res1
3804 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
3806 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
3807 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
3809 ; CHECK-NEXT: movzbl %dil, %eax
3810 ; CHECK-NEXT: kmovw %eax, %k1
3811 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1}
3812 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0
3813 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
3815 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
3816 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
3817 %res2 = fadd <8 x float> %res, %res1
3818 ret <8 x float> %res2
3821 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3823 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3824 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
3826 ; CHECK-NEXT: movzbl %dil, %eax
3827 ; CHECK-NEXT: kmovw %eax, %k1
3828 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
3829 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
3830 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3832 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
3833 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3834 %res2 = add <8 x i32> %res, %res1
3838 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3840 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3841 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
3843 ; CHECK-NEXT: kmovw %edi, %k1
3844 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
3845 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
3846 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3848 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3849 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3850 %res2 = add <16 x i32> %res, %res1
3851 ret <16 x i32> %res2
3854 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
3856 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
3857 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
3859 ; CHECK-NEXT: movzbl %dil, %eax
3860 ; CHECK-NEXT: kmovw %eax, %k1
3861 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1}
3862 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0
3863 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3865 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
3866 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
3867 %res2 = fadd <8 x double> %res, %res1
3868 ret <8 x double> %res2
3871 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3873 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3874 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
3876 ; CHECK-NEXT: kmovw %edi, %k1
3877 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
3878 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
3879 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3881 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3882 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3883 %res2 = add <16 x i32> %res, %res1
3884 ret <16 x i32> %res2
3887 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3889 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3890 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
3892 ; CHECK-NEXT: movzbl %dil, %eax
3893 ; CHECK-NEXT: kmovw %eax, %k1
3894 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
3895 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
3896 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3898 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3899 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3900 %res2 = add <8 x i32> %res, %res1
3904 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
3906 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3907 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
3909 ; CHECK-NEXT: movzbl %dil, %eax
3910 ; CHECK-NEXT: kmovw %eax, %k1
3911 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
3912 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
3913 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3915 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3916 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3917 %res2 = fadd <8 x double> %res, %res1
3918 ret <8 x double> %res2
3922 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3924 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3925 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
3927 ; CHECK-NEXT: kmovw %edi, %k1
3928 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1}
3929 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
3930 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3932 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3933 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3934 %res2 = fadd <16 x float> %res, %res1
3935 ret <16 x float> %res2
3938 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3940 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3941 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
3943 ; CHECK-NEXT: movzbl %dil, %eax
3944 ; CHECK-NEXT: kmovw %eax, %k1
3945 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
3946 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
3947 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3949 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3950 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3951 %res2 = add <8 x i32> %res, %res1
3955 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3957 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3958 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
3960 ; CHECK-NEXT: kmovw %edi, %k1
3961 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
3962 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
3963 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3965 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
3966 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
3967 %res2 = add <16 x i32> %res, %res1
3968 ret <16 x i32> %res2
3971 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3973 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3974 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
3976 ; CHECK-NEXT: kmovw %edi, %k1
3977 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
3978 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
3979 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3981 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
3982 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
3983 %res2 = add <16 x i32> %res, %res1
3984 ret <16 x i32> %res2
3988 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
3989 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ss
3992 ; CHECK: vscalefss {{.*}}{%k1}
3993 ; CHECK: vscalefss {rn-sae}
3994 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
3995 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
3996 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
3997 %res2 = fadd <4 x float> %res, %res1
3998 ret <4 x float> %res2
4001 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
4002 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_sd
4005 ; CHECK: vscalefsd {{.*}}{%k1}
4006 ; CHECK: vscalefsd {rn-sae}
4007 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
4008 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
4009 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
4010 %res2 = fadd <2 x double> %res, %res1
4011 ret <2 x double> %res2
4014 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
4016 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
4017 ; CHECK-LABEL: test_getexp_ss:
4019 ; CHECK-NEXT: andl $1, %edi
4020 ; CHECK-NEXT: kmovw %edi, %k1
4021 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
4022 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
4023 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
4024 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
4025 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
4026 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
4027 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
4028 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4030 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
4031 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
4032 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
4033 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
4035 %res.1 = fadd <4 x float> %res0, %res1
4036 %res.2 = fadd <4 x float> %res2, %res3
4037 %res = fadd <4 x float> %res.1, %res.2
4038 ret <4 x float> %res
4041 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
4043 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
4044 ; CHECK-LABEL: test_getexp_sd:
4046 ; CHECK-NEXT: andl $1, %edi
4047 ; CHECK-NEXT: kmovw %edi, %k1
4048 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
4049 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
4050 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
4051 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
4052 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
4053 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
4054 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
4055 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4057 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
4058 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
4059 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
4060 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
4062 %res.1 = fadd <2 x double> %res0, %res1
4063 %res.2 = fadd <2 x double> %res2, %res3
4064 %res = fadd <2 x double> %res.1, %res.2
4065 ret <2 x double> %res
4068 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
4070 define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
4071 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
4073 ; CHECK-NEXT: andl $1, %edi
4074 ; CHECK-NEXT: kmovw %edi, %k1
4075 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
4076 ; CHECK-NEXT: kmovw %k0, %eax
4077 ; CHECK-NEXT: shlb $7, %al
4078 ; CHECK-NEXT: sarb $7, %al
4081 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
4085 define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
4086 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
4088 ; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
4089 ; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1
4090 ; CHECK-NEXT: korw %k0, %k1, %k0
4091 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1
4092 ; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2
4093 ; CHECK-NEXT: korw %k1, %k2, %k1
4094 ; CHECK-NEXT: andl $1, %edi
4095 ; CHECK-NEXT: kmovw %edi, %k2
4096 ; CHECK-NEXT: kandw %k2, %k1, %k1
4097 ; CHECK-NEXT: korw %k1, %k0, %k0
4098 ; CHECK-NEXT: kmovw %k0, %eax
4099 ; CHECK-NEXT: shlb $7, %al
4100 ; CHECK-NEXT: sarb $7, %al
4103 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
4104 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
4105 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
4106 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
4108 %res11 = or i8 %res1, %res2
4109 %res12 = or i8 %res3, %res4
4110 %res13 = or i8 %res11, %res12
4114 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
4116 define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
4117 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
4119 ; CHECK-NEXT: andl $1, %edi
4120 ; CHECK-NEXT: kmovw %edi, %k1
4121 ; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
4122 ; CHECK-NEXT: kmovw %k0, %eax
4123 ; CHECK-NEXT: shlb $7, %al
4124 ; CHECK-NEXT: sarb $7, %al
4127 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
4132 define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
4133 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
4135 ; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1
4136 ; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
4137 ; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1
4138 ; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
4139 ; CHECK-NEXT: andl $1, %edi
4140 ; CHECK-NEXT: kmovw %edi, %k2
4141 ; CHECK-NEXT: kandw %k2, %k1, %k1
4142 ; CHECK-NEXT: kandw %k1, %k0, %k0
4143 ; CHECK-NEXT: kmovw %k0, %eax
4144 ; CHECK-NEXT: shlb $7, %al
4145 ; CHECK-NEXT: sarb $7, %al
4147 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
4148 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
4149 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
4150 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
4152 %res11 = and i8 %res1, %res2
4153 %res12 = and i8 %res3, %res4
4154 %res13 = and i8 %res11, %res12
4158 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
4160 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
4161 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
4163 ; CHECK-NEXT: kmovw %edi, %k1
4164 ; CHECK-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1}
4165 ; CHECK-NEXT: ## zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4166 ; CHECK-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm0
4167 ; CHECK-NEXT: ## zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4168 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4170 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
4171 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
4172 %res2 = fadd <16 x float> %res, %res1
4173 ret <16 x float> %res2
4176 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
4178 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
4179 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
4181 ; CHECK-NEXT: movzbl %dil, %eax
4182 ; CHECK-NEXT: kmovw %eax, %k1
4183 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1}
4184 ; CHECK-NEXT: ## zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4185 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
4186 ; CHECK-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4187 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0
4188 ; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4189 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4190 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
4192 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
4193 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
4194 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
4196 %res3 = fadd <8 x double> %res, %res1
4197 %res4 = fadd <8 x double> %res3, %res2
4198 ret <8 x double> %res4
4201 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
4203 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
4204 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
4206 ; CHECK-NEXT: kmovw %edi, %k1
4207 ; CHECK-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1}
4208 ; CHECK-NEXT: ## zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4209 ; CHECK-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm0
4210 ; CHECK-NEXT: ## zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4211 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4213 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
4214 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
4215 %res2 = add <16 x i32> %res, %res1
4216 ret <16 x i32> %res2
4219 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
4221 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
4222 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
4224 ; CHECK-NEXT: movzbl %dil, %eax
4225 ; CHECK-NEXT: kmovw %eax, %k1
4226 ; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1}
4227 ; CHECK-NEXT: ## zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4228 ; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm0
4229 ; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4230 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4232 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
4233 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
4234 %res2 = add <8 x i64> %res, %res1
4238 declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
4240 define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
4241 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512:
4243 ; CHECK-NEXT: movzbl %dil, %eax
4244 ; CHECK-NEXT: kmovw %eax, %k1
4245 ; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1}
4246 ; CHECK-NEXT: vgetmantpd $11,{sae}, %zmm0, %zmm0
4247 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4249 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
4250 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
4251 %res2 = fadd <8 x double> %res, %res1
4252 ret <8 x double> %res2
4255 declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
4257 define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
4258 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512:
4260 ; CHECK-NEXT: kmovw %edi, %k1
4261 ; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1}
4262 ; CHECK-NEXT: vgetmantps $11,{sae}, %zmm0, %zmm0
4263 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4265 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
4266 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
4267 %res2 = fadd <16 x float> %res, %res1
4268 ret <16 x float> %res2
4271 declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
4273 define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
4274 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
4276 ; CHECK-NEXT: andl $1, %edi
4277 ; CHECK-NEXT: kmovw %edi, %k1
4278 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
4279 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
4280 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
4281 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
4282 ; CHECK-NEXT: vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1}
4283 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
4284 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
4285 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
4287 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
4288 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
4289 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8)
4290 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4)
4291 %res11 = fadd <2 x double> %res, %res1
4292 %res12 = fadd <2 x double> %res2, %res3
4293 %res13 = fadd <2 x double> %res11, %res12
4294 ret <2 x double> %res13
4297 declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
4299 define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
4300 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
4302 ; CHECK-NEXT: andl $1, %edi
4303 ; CHECK-NEXT: kmovw %edi, %k1
4304 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
4305 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
4306 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4
4307 ; CHECK-NEXT: vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0
4308 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
4309 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
4310 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4312 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
4313 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
4314 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8)
4315 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4)
4316 %res11 = fadd <4 x float> %res, %res1
4317 %res12 = fadd <4 x float> %res2, %res3
4318 %res13 = fadd <4 x float> %res11, %res12
4319 ret <4 x float> %res13
4322 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
4324 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
4325 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
4327 ; CHECK-NEXT: movzbl %dil, %eax
4328 ; CHECK-NEXT: kmovw %eax, %k1
4329 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1}
4330 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
4331 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0
4332 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4333 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
4335 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
4336 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
4337 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
4339 %res3 = fadd <8 x double> %res, %res1
4340 %res4 = fadd <8 x double> %res3, %res2
4341 ret <8 x double> %res4
4344 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
4346 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
4347 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
4349 ; CHECK-NEXT: kmovw %edi, %k1
4350 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1}
4351 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm0
4352 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4354 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
4355 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
4356 %res2 = fadd <16 x float> %res, %res1
4357 ret <16 x float> %res2
4360 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
4362 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
4363 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
4365 ; CHECK-NEXT: movzbl %dil, %eax
4366 ; CHECK-NEXT: kmovw %eax, %k1
4367 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1}
4368 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z}
4369 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0
4370 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
4371 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4373 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
4374 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
4375 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
4376 %res3 = fadd <8 x double> %res, %res1
4377 %res4 = fadd <8 x double> %res3, %res2
4378 ret <8 x double> %res4
4381 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
4383 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
4384 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
4386 ; CHECK-NEXT: kmovw %edi, %k1
4387 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1}
4388 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z}
4389 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0
4390 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
4391 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4393 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
4394 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
4395 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
4396 %res3 = fadd <16 x float> %res, %res1
4397 %res4 = fadd <16 x float> %res3, %res2
4398 ret <16 x float> %res4
4401 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
4403 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
4404 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
4406 ; CHECK-NEXT: movzbl %dil, %eax
4407 ; CHECK-NEXT: kmovw %eax, %k1
4408 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
4409 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
4410 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
4411 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
4412 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
4414 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
4415 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
4416 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
4417 %res3 = fadd <8 x double> %res, %res1
4418 %res4 = fadd <8 x double> %res2, %res3
4419 ret <8 x double> %res4
4422 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
4424 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
4425 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
4427 ; CHECK-NEXT: kmovw %edi, %k1
4428 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
4429 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
4430 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
4431 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
4432 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
4434 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
4435 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
4436 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
4437 %res3 = fadd <16 x float> %res, %res1
4438 %res4 = fadd <16 x float> %res2, %res3
4439 ret <16 x float> %res4
4442 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
4444 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {
4445 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
4447 ; CHECK-NEXT: kmovw %edi, %k1
4448 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
4449 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
4450 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
4451 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4452 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
4454 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4)
4455 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1)
4456 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4)
4457 %res3 = fadd <16 x float> %res, %res1
4458 %res4 = fadd <16 x float> %res2, %res3
4459 ret <16 x float> %res4
4462 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8)
4464 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) {
4465 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
4467 ; CHECK-NEXT: kmovw %edi, %k1
4468 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
4469 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
4470 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
4471 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4472 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4474 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4)
4475 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1)
4476 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4)
4477 %res3 = add <16 x i32> %res, %res1
4478 %res4 = add <16 x i32> %res2, %res3
4479 ret <16 x i32> %res4
4482 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
4484 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
4485 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
4487 ; CHECK-NEXT: movzbl %dil, %eax
4488 ; CHECK-NEXT: kmovw %eax, %k1
4489 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
4490 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
4491 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
4492 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4493 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
4495 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
4496 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
4497 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
4498 %res3 = fadd <8 x double> %res, %res1
4499 %res4 = fadd <8 x double> %res2, %res3
4500 ret <8 x double> %res4
4503 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
4505 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
4506 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
4508 ; CHECK-NEXT: movzbl %dil, %eax
4509 ; CHECK-NEXT: kmovw %eax, %k1
4510 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
4511 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
4512 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4513 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4514 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4516 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
4517 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
4518 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
4519 %res3 = add <8 x i64> %res, %res1
4520 %res4 = add <8 x i64> %res2, %res3
4524 declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32)
4526 define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
4527 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
4528 ; CHECK: kmovw %edi, %k1
4529 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
4530 ; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
4531 ; CHECK-NEXT: %xmm0, %xmm2, %xmm0
4533 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
4534 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
4535 %res2 = fadd <2 x double> %res, %res1
4536 ret <2 x double> %res2
4539 declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32)
4541 define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
4542 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
4543 ; CHECK: kmovw %edi, %k1
4544 ; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
4545 ; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
4546 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
4548 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
4549 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
4550 %res2 = fadd <4 x float> %res, %res1
4551 ret <4 x float> %res2
4554 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
4556 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
4557 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
4559 ; CHECK-NEXT: kmovw %edi, %k1
4560 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4561 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
4562 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
4563 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4565 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
4566 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
4567 %res2 = add <16 x i32> %res, %res1
4568 ret <16 x i32> %res2
4571 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
4573 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
4574 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
4576 ; CHECK-NEXT: kmovw %edi, %k1
4577 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4578 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
4579 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
4580 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4582 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
4583 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
4584 %res2 = add <16 x i32> %res, %res1
4585 ret <16 x i32> %res2
4588 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
4590 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
4591 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
4593 ; CHECK-NEXT: movzbl %dil, %eax
4594 ; CHECK-NEXT: kmovw %eax, %k1
4595 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4596 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
4597 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
4598 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4600 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
4601 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
4602 %res2 = add <8 x i64> %res, %res1
4606 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
4608 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
4609 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
4611 ; CHECK-NEXT: movzbl %dil, %eax
4612 ; CHECK-NEXT: kmovw %eax, %k1
4613 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4614 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
4615 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
4616 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4618 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
4619 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
4620 %res2 = add <8 x i64> %res, %res1