1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=SKX
5 attributes #0 = { nounwind }
7 define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
8 ; ALL-LABEL: trunc_16x32_to_16x8:
10 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
12 %x = trunc <16 x i32> %i to <16 x i8>
16 define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
17 ; ALL-LABEL: trunc_8x64_to_8x16:
19 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
21 %x = trunc <8 x i64> %i to <8 x i16>
25 define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
26 ; ALL-LABEL: trunc_v16i32_to_v16i16:
28 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
30 %1 = trunc <16 x i32> %x to <16 x i16>
34 define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
35 ; ALL-LABEL: trunc_qb_512:
37 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
39 %x = trunc <8 x i64> %i to <8 x i8>
43 define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
44 ; ALL-LABEL: trunc_qb_512_mem:
46 ; ALL-NEXT: vpmovqb %zmm0, (%rdi)
48 %x = trunc <8 x i64> %i to <8 x i8>
49 store <8 x i8> %x, <8 x i8>* %res
53 define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
54 ; KNL-LABEL: trunc_qb_256:
56 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
59 ; SKX-LABEL: trunc_qb_256:
61 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
63 %x = trunc <4 x i64> %i to <4 x i8>
67 define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
68 ; KNL-LABEL: trunc_qb_256_mem:
70 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
71 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
72 ; KNL-NEXT: vmovd %xmm0, (%rdi)
75 ; SKX-LABEL: trunc_qb_256_mem:
77 ; SKX-NEXT: vpmovqb %ymm0, (%rdi)
79 %x = trunc <4 x i64> %i to <4 x i8>
80 store <4 x i8> %x, <4 x i8>* %res
84 define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
85 ; ALL-LABEL: trunc_qb_128:
88 %x = trunc <2 x i64> %i to <2 x i8>
92 define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
93 ; KNL-LABEL: trunc_qb_128_mem:
95 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
96 ; KNL-NEXT: vmovd %xmm0, %eax
97 ; KNL-NEXT: movw %ax, (%rdi)
100 ; SKX-LABEL: trunc_qb_128_mem:
102 ; SKX-NEXT: vpmovqb %xmm0, (%rdi)
104 %x = trunc <2 x i64> %i to <2 x i8>
105 store <2 x i8> %x, <2 x i8>* %res
109 define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
110 ; ALL-LABEL: trunc_qw_512:
112 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
114 %x = trunc <8 x i64> %i to <8 x i16>
118 define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
119 ; ALL-LABEL: trunc_qw_512_mem:
121 ; ALL-NEXT: vpmovqw %zmm0, (%rdi)
123 %x = trunc <8 x i64> %i to <8 x i16>
124 store <8 x i16> %x, <8 x i16>* %res
128 define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
129 ; KNL-LABEL: trunc_qw_256:
131 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
134 ; SKX-LABEL: trunc_qw_256:
136 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
138 %x = trunc <4 x i64> %i to <4 x i16>
142 define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
143 ; KNL-LABEL: trunc_qw_256_mem:
145 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
146 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
147 ; KNL-NEXT: vmovq %xmm0, (%rdi)
150 ; SKX-LABEL: trunc_qw_256_mem:
152 ; SKX-NEXT: vpmovqw %ymm0, (%rdi)
154 %x = trunc <4 x i64> %i to <4 x i16>
155 store <4 x i16> %x, <4 x i16>* %res
159 define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
160 ; ALL-LABEL: trunc_qw_128:
163 %x = trunc <2 x i64> %i to <2 x i16>
167 define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
168 ; KNL-LABEL: trunc_qw_128_mem:
170 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
171 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
172 ; KNL-NEXT: vmovd %xmm0, (%rdi)
175 ; SKX-LABEL: trunc_qw_128_mem:
177 ; SKX-NEXT: vpmovqw %xmm0, (%rdi)
179 %x = trunc <2 x i64> %i to <2 x i16>
180 store <2 x i16> %x, <2 x i16>* %res
184 define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
185 ; ALL-LABEL: trunc_qd_512:
187 ; ALL-NEXT: vpmovqd %zmm0, %ymm0
189 %x = trunc <8 x i64> %i to <8 x i32>
193 define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
194 ; ALL-LABEL: trunc_qd_512_mem:
196 ; ALL-NEXT: vpmovqd %zmm0, (%rdi)
198 %x = trunc <8 x i64> %i to <8 x i32>
199 store <8 x i32> %x, <8 x i32>* %res
203 define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
204 ; KNL-LABEL: trunc_qd_256:
206 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
209 ; SKX-LABEL: trunc_qd_256:
211 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
213 %x = trunc <4 x i64> %i to <4 x i32>
217 define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
218 ; KNL-LABEL: trunc_qd_256_mem:
220 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
221 ; KNL-NEXT: vmovaps %xmm0, (%rdi)
224 ; SKX-LABEL: trunc_qd_256_mem:
226 ; SKX-NEXT: vpmovqd %ymm0, (%rdi)
228 %x = trunc <4 x i64> %i to <4 x i32>
229 store <4 x i32> %x, <4 x i32>* %res
233 define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
234 ; ALL-LABEL: trunc_qd_128:
237 %x = trunc <2 x i64> %i to <2 x i32>
241 define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
242 ; KNL-LABEL: trunc_qd_128_mem:
244 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
245 ; KNL-NEXT: vmovq %xmm0, (%rdi)
248 ; SKX-LABEL: trunc_qd_128_mem:
250 ; SKX-NEXT: vpmovqd %xmm0, (%rdi)
252 %x = trunc <2 x i64> %i to <2 x i32>
253 store <2 x i32> %x, <2 x i32>* %res
257 define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
258 ; ALL-LABEL: trunc_db_512:
260 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
262 %x = trunc <16 x i32> %i to <16 x i8>
266 define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
267 ; ALL-LABEL: trunc_db_512_mem:
269 ; ALL-NEXT: vpmovdb %zmm0, (%rdi)
271 %x = trunc <16 x i32> %i to <16 x i8>
272 store <16 x i8> %x, <16 x i8>* %res
276 define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
277 ; KNL-LABEL: trunc_db_256:
279 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
282 ; SKX-LABEL: trunc_db_256:
284 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
286 %x = trunc <8 x i32> %i to <8 x i8>
290 define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
291 ; KNL-LABEL: trunc_db_256_mem:
293 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
294 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
295 ; KNL-NEXT: vmovq %xmm0, (%rdi)
298 ; SKX-LABEL: trunc_db_256_mem:
300 ; SKX-NEXT: vpmovdb %ymm0, (%rdi)
302 %x = trunc <8 x i32> %i to <8 x i8>
303 store <8 x i8> %x, <8 x i8>* %res
307 define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
308 ; ALL-LABEL: trunc_db_128:
311 %x = trunc <4 x i32> %i to <4 x i8>
315 define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
316 ; KNL-LABEL: trunc_db_128_mem:
318 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
319 ; KNL-NEXT: vmovd %xmm0, (%rdi)
322 ; SKX-LABEL: trunc_db_128_mem:
324 ; SKX-NEXT: vpmovdb %xmm0, (%rdi)
326 %x = trunc <4 x i32> %i to <4 x i8>
327 store <4 x i8> %x, <4 x i8>* %res
331 define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
332 ; ALL-LABEL: trunc_dw_512:
334 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
336 %x = trunc <16 x i32> %i to <16 x i16>
340 define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
341 ; ALL-LABEL: trunc_dw_512_mem:
343 ; ALL-NEXT: vpmovdw %zmm0, (%rdi)
345 %x = trunc <16 x i32> %i to <16 x i16>
346 store <16 x i16> %x, <16 x i16>* %res
350 define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
351 ; KNL-LABEL: trunc_dw_256:
353 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
356 ; SKX-LABEL: trunc_dw_256:
358 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
360 %x = trunc <8 x i32> %i to <8 x i16>
364 define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
365 ; KNL-LABEL: trunc_dw_256_mem:
367 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
368 ; KNL-NEXT: vmovaps %xmm0, (%rdi)
371 ; SKX-LABEL: trunc_dw_256_mem:
373 ; SKX-NEXT: vpmovdw %ymm0, (%rdi)
375 %x = trunc <8 x i32> %i to <8 x i16>
376 store <8 x i16> %x, <8 x i16>* %res
380 define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
381 ; KNL-LABEL: trunc_dw_128_mem:
383 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
384 ; KNL-NEXT: vmovq %xmm0, (%rdi)
387 ; SKX-LABEL: trunc_dw_128_mem:
389 ; SKX-NEXT: vpmovdw %xmm0, (%rdi)
391 %x = trunc <4 x i32> %i to <4 x i16>
392 store <4 x i16> %x, <4 x i16>* %res
396 define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
397 ; KNL-LABEL: trunc_wb_512:
399 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
400 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
401 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
402 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
403 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
406 ; SKX-LABEL: trunc_wb_512:
408 ; SKX-NEXT: vpmovwb %zmm0, %ymm0
410 %x = trunc <32 x i16> %i to <32 x i8>
414 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
415 ; KNL-LABEL: trunc_wb_512_mem:
417 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
418 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
419 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
420 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
421 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
422 ; KNL-NEXT: vmovaps %ymm0, (%rdi)
425 ; SKX-LABEL: trunc_wb_512_mem:
427 ; SKX-NEXT: vpmovwb %zmm0, (%rdi)
429 %x = trunc <32 x i16> %i to <32 x i8>
430 store <32 x i8> %x, <32 x i8>* %res
434 define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
435 ; KNL-LABEL: trunc_wb_256:
437 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
438 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
441 ; SKX-LABEL: trunc_wb_256:
443 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
445 %x = trunc <16 x i16> %i to <16 x i8>
449 define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
450 ; KNL-LABEL: trunc_wb_256_mem:
452 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
453 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
454 ; KNL-NEXT: vmovaps %xmm0, (%rdi)
457 ; SKX-LABEL: trunc_wb_256_mem:
459 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
461 %x = trunc <16 x i16> %i to <16 x i8>
462 store <16 x i8> %x, <16 x i8>* %res
466 define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
467 ; ALL-LABEL: trunc_wb_128:
470 %x = trunc <8 x i16> %i to <8 x i8>
474 define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
475 ; KNL-LABEL: trunc_wb_128_mem:
477 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
478 ; KNL-NEXT: vmovq %xmm0, (%rdi)
481 ; SKX-LABEL: trunc_wb_128_mem:
483 ; SKX-NEXT: vpmovwb %xmm0, (%rdi)
485 %x = trunc <8 x i16> %i to <8 x i8>
486 store <8 x i8> %x, <8 x i8>* %res