1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
5 ; KNL-LABEL: trunc_16x32_to_16x8
8 define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
9 %x = trunc <16 x i32> %i to <16 x i8>
13 ; KNL-LABEL: trunc_8x64_to_8x16
16 define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
17 %x = trunc <8 x i64> %i to <8 x i16>
21 ;SKX-LABEL: zext_8x8mem_to_8x16:
23 ;SKX-NEXT: vpmovw2m %xmm0, %k1
24 ;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z}
26 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
27 %a = load <8 x i8>,<8 x i8> *%i,align 1
28 %x = zext <8 x i8> %a to <8 x i16>
29 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
33 ;SKX-LABEL: sext_8x8mem_to_8x16:
35 ;SKX-NEXT: vpmovw2m %xmm0, %k1
36 ;SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
38 define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
39 %a = load <8 x i8>,<8 x i8> *%i,align 1
40 %x = sext <8 x i8> %a to <8 x i16>
41 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
45 ;SKX-LABEL: zext_16x8mem_to_16x16:
47 ;SKX-NEXT: vpmovb2m %xmm0, %k1
48 ;SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z}
50 define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
51 %a = load <16 x i8>,<16 x i8> *%i,align 1
52 %x = zext <16 x i8> %a to <16 x i16>
53 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
57 ;SKX-LABEL: sext_16x8mem_to_16x16:
59 ;SKX-NEXT: vpmovb2m %xmm0, %k1
60 ;SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
62 define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
63 %a = load <16 x i8>,<16 x i8> *%i,align 1
64 %x = sext <16 x i8> %a to <16 x i16>
65 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
69 ;SKX-LABEL: zext_16x8_to_16x16:
71 ;SKX-NEXT: vpmovzxbw %xmm0, %ymm0
73 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
74 %x = zext <16 x i8> %a to <16 x i16>
78 ;SKX-LABEL: zext_16x8_to_16x16_mask:
80 ;SKX-NEXT: vpmovb2m %xmm1, %k1
81 ;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z}
83 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
84 %x = zext <16 x i8> %a to <16 x i16>
85 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
89 ;SKX-LABEL: sext_16x8_to_16x16:
91 ;SKX-NEXT: vpmovsxbw %xmm0, %ymm0
93 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
94 %x = sext <16 x i8> %a to <16 x i16>
98 ;SKX-LABEL: sext_16x8_to_16x16_mask:
100 ;SKX-NEXT: vpmovb2m %xmm1, %k1
101 ;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
103 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
104 %x = sext <16 x i8> %a to <16 x i16>
105 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
109 ;SKX-LABEL: zext_32x8mem_to_32x16:
111 ;SKX-NEXT: vpmovb2m %ymm0, %k1
112 ;SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z}
114 define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
115 %a = load <32 x i8>,<32 x i8> *%i,align 1
116 %x = zext <32 x i8> %a to <32 x i16>
117 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
121 ;SKX-LABEL: sext_32x8mem_to_32x16:
123 ;SKX-NEXT: vpmovb2m %ymm0, %k1
124 ;SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
126 define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
127 %a = load <32 x i8>,<32 x i8> *%i,align 1
128 %x = sext <32 x i8> %a to <32 x i16>
129 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
133 ;SKX-LABEL: zext_32x8_to_32x16:
135 ;SKX-NEXT: vpmovzxbw %ymm0, %zmm0
137 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
138 %x = zext <32 x i8> %a to <32 x i16>
142 ;SKX-LABEL: zext_32x8_to_32x16_mask:
144 ;SKX-NEXT: vpmovb2m %ymm1, %k1
145 ;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z}
147 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
148 %x = zext <32 x i8> %a to <32 x i16>
149 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
153 ;SKX-LABEL: sext_32x8_to_32x16:
155 ;SKX-NEXT: vpmovsxbw %ymm0, %zmm0
157 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
158 %x = sext <32 x i8> %a to <32 x i16>
162 ;SKX-LABEL: sext_32x8_to_32x16_mask:
164 ;SKX-NEXT: vpmovb2m %ymm1, %k1
165 ;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
167 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
168 %x = sext <32 x i8> %a to <32 x i16>
169 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
173 ;SKX-LABEL: zext_4x8mem_to_4x32:
175 ;SKX-NEXT: vpmovd2m %xmm0, %k1
176 ;SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z}
178 define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
179 %a = load <4 x i8>,<4 x i8> *%i,align 1
180 %x = zext <4 x i8> %a to <4 x i32>
181 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
185 ;SKX-LABEL: sext_4x8mem_to_4x32:
187 ;SKX-NEXT: vpmovd2m %xmm0, %k1
188 ;SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
190 define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
191 %a = load <4 x i8>,<4 x i8> *%i,align 1
192 %x = sext <4 x i8> %a to <4 x i32>
193 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
197 ;SKX-LABEL: zext_8x8mem_to_8x32:
199 ;SKX-NEXT: vpmovw2m %xmm0, %k1
200 ;SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z}
202 define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
203 %a = load <8 x i8>,<8 x i8> *%i,align 1
204 %x = zext <8 x i8> %a to <8 x i32>
205 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
209 ;SKX-LABEL: sext_8x8mem_to_8x32:
211 ;SKX-NEXT: vpmovw2m %xmm0, %k1
212 ;SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
214 define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
215 %a = load <8 x i8>,<8 x i8> *%i,align 1
216 %x = sext <8 x i8> %a to <8 x i32>
217 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
221 ;KNL-LABEL: zext_16x8mem_to_16x32:
222 ;KNL: vpmovzxbd (%rdi), %zmm0 {%k1} {z}
224 define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
225 %a = load <16 x i8>,<16 x i8> *%i,align 1
226 %x = zext <16 x i8> %a to <16 x i32>
227 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
231 ;KNL-LABEL: sext_16x8mem_to_16x32:
232 ;KNL: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
234 define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
235 %a = load <16 x i8>,<16 x i8> *%i,align 1
236 %x = sext <16 x i8> %a to <16 x i32>
237 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
241 ;KNL-LABEL: zext_16x8_to_16x32_mask:
242 ;KNL: vpmovzxbd %xmm0, %zmm0 {%k1} {z}
244 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
245 %x = zext <16 x i8> %a to <16 x i32>
246 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
250 ;KNL-LABEL: sext_16x8_to_16x32_mask:
251 ;KNL: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
253 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
254 %x = sext <16 x i8> %a to <16 x i32>
255 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
259 ; KNL-LABEL: zext_16x8_to_16x32
260 ; KNL: vpmovzxbd {{.*}}%zmm
262 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
263 %x = zext <16 x i8> %i to <16 x i32>
267 ; KNL-LABEL: sext_16x8_to_16x32
268 ; KNL: vpmovsxbd {{.*}}%zmm
270 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
271 %x = sext <16 x i8> %i to <16 x i32>
275 ;SKX-LABEL: zext_2x8mem_to_2x64:
277 ;SKX-NEXT: vpmovq2m %xmm0, %k1
278 ;SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z}
280 define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
281 %a = load <2 x i8>,<2 x i8> *%i,align 1
282 %x = zext <2 x i8> %a to <2 x i64>
283 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
286 ;SKX-LABEL: sext_2x8mem_to_2x64mask:
288 ;SKX-NEXT: vpmovq2m %xmm0, %k1
289 ;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
291 define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
292 %a = load <2 x i8>,<2 x i8> *%i,align 1
293 %x = sext <2 x i8> %a to <2 x i64>
294 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
297 ;SKX-LABEL: sext_2x8mem_to_2x64:
299 ;SKX-NEXT: vpmovsxbq (%rdi), %xmm0
301 define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
302 %a = load <2 x i8>,<2 x i8> *%i,align 1
303 %x = sext <2 x i8> %a to <2 x i64>
307 ;SKX-LABEL: zext_4x8mem_to_4x64:
309 ;SKX-NEXT: vpmovd2m %xmm0, %k1
310 ;SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z}
312 define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
313 %a = load <4 x i8>,<4 x i8> *%i,align 1
314 %x = zext <4 x i8> %a to <4 x i64>
315 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
319 ;SKX-LABEL: sext_4x8mem_to_4x64mask:
321 ;SKX-NEXT: vpmovd2m %xmm0, %k1
322 ;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
324 define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
325 %a = load <4 x i8>,<4 x i8> *%i,align 1
326 %x = sext <4 x i8> %a to <4 x i64>
327 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
331 ;SKX-LABEL: sext_4x8mem_to_4x64:
333 ;SKX-NEXT: vpmovsxbq (%rdi), %ymm0
335 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
336 %a = load <4 x i8>,<4 x i8> *%i,align 1
337 %x = sext <4 x i8> %a to <4 x i64>
341 ;KNL-LABEL: zext_8x8mem_to_8x64:
342 ;KNL: vpmovzxbq (%rdi), %zmm0 {%k1} {z}
344 define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
345 %a = load <8 x i8>,<8 x i8> *%i,align 1
346 %x = zext <8 x i8> %a to <8 x i64>
347 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
351 ;KNL-LABEL: sext_8x8mem_to_8x64mask:
352 ;KNL: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
354 define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
355 %a = load <8 x i8>,<8 x i8> *%i,align 1
356 %x = sext <8 x i8> %a to <8 x i64>
357 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
361 ;KNL-LABEL: sext_8x8mem_to_8x64:
362 ;KNL: vpmovsxbq (%rdi), %zmm0
364 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
365 %a = load <8 x i8>,<8 x i8> *%i,align 1
366 %x = sext <8 x i8> %a to <8 x i64>
370 ;SKX-LABEL: zext_4x16mem_to_4x32:
372 ;SKX-NEXT: vpmovd2m %xmm0, %k1
373 ;SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z}
375 define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
376 %a = load <4 x i16>,<4 x i16> *%i,align 1
377 %x = zext <4 x i16> %a to <4 x i32>
378 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
382 ;SKX-LABEL: sext_4x16mem_to_4x32mask:
384 ;SKX-NEXT: vpmovd2m %xmm0, %k1
385 ;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
387 define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
388 %a = load <4 x i16>,<4 x i16> *%i,align 1
389 %x = sext <4 x i16> %a to <4 x i32>
390 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
394 ;SKX-LABEL: sext_4x16mem_to_4x32:
396 ;SKX-NEXT: vpmovsxwd (%rdi), %xmm0
398 define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
399 %a = load <4 x i16>,<4 x i16> *%i,align 1
400 %x = sext <4 x i16> %a to <4 x i32>
405 ;SKX-LABEL: zext_8x16mem_to_8x32:
407 ;SKX-NEXT: vpmovw2m %xmm0, %k1
408 ;SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z}
410 define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
411 %a = load <8 x i16>,<8 x i16> *%i,align 1
412 %x = zext <8 x i16> %a to <8 x i32>
413 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
417 ;SKX-LABEL: sext_8x16mem_to_8x32mask:
419 ;SKX-NEXT: vpmovw2m %xmm0, %k1
420 ;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
422 define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
423 %a = load <8 x i16>,<8 x i16> *%i,align 1
424 %x = sext <8 x i16> %a to <8 x i32>
425 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
429 ;SKX-LABEL: sext_8x16mem_to_8x32:
431 ;SKX-NEXT: vpmovsxwd (%rdi), %ymm0
433 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
434 %a = load <8 x i16>,<8 x i16> *%i,align 1
435 %x = sext <8 x i16> %a to <8 x i32>
439 ;SKX-LABEL: zext_8x16_to_8x32mask:
441 ;SKX-NEXT: vpmovw2m %xmm1, %k1
442 ;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z}
444 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
445 %x = zext <8 x i16> %a to <8 x i32>
446 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
450 ;SKX-LABEL: zext_8x16_to_8x32:
452 ;SKX-NEXT: vpmovzxwd %xmm0, %ymm0
454 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
455 %x = zext <8 x i16> %a to <8 x i32>
459 ;SKX-LABEL: zext_16x16mem_to_16x32:
460 ;KNL-LABEL: zext_16x16mem_to_16x32:
462 ;SKX-NEXT: vpmovb2m %xmm0, %k1
463 ;SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
464 ;KNL: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
466 define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
467 %a = load <16 x i16>,<16 x i16> *%i,align 1
468 %x = zext <16 x i16> %a to <16 x i32>
469 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
473 ;SKX-LABEL: sext_16x16mem_to_16x32mask:
474 ;KNL-LABEL: sext_16x16mem_to_16x32mask:
476 ;SKX-NEXT: vpmovb2m %xmm0, %k1
477 ;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
478 ;KNL: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
480 define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
481 %a = load <16 x i16>,<16 x i16> *%i,align 1
482 %x = sext <16 x i16> %a to <16 x i32>
483 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
487 ;SKX-LABEL: sext_16x16mem_to_16x32:
488 ;KNL-LABEL: sext_16x16mem_to_16x32:
490 ;SKX-NEXT: vpmovsxwd (%rdi), %zmm0
491 ;KNL: vpmovsxwd (%rdi), %zmm0
493 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
494 %a = load <16 x i16>,<16 x i16> *%i,align 1
495 %x = sext <16 x i16> %a to <16 x i32>
498 ;SKX-LABEL: zext_16x16_to_16x32mask:
499 ;KNL-LABEL: zext_16x16_to_16x32mask:
501 ;SKX-NEXT: vpmovb2m %xmm1, %k1
502 ;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
503 ;KNL: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
505 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
506 %x = zext <16 x i16> %a to <16 x i32>
507 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
511 ;SKX-LABEL: zext_16x16_to_16x32:
512 ;KNL-LABEL: zext_16x16_to_16x32:
514 ;SKX-NEXT: vpmovzxwd %ymm0, %zmm0
515 ;KNL: vpmovzxwd %ymm0, %zmm0
517 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
518 %x = zext <16 x i16> %a to <16 x i32>
522 ;SKX-LABEL: zext_2x16mem_to_2x64:
524 ;SKX-NEXT: vpmovq2m %xmm0, %k1
525 ;SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z}
527 define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
528 %a = load <2 x i16>,<2 x i16> *%i,align 1
529 %x = zext <2 x i16> %a to <2 x i64>
530 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
534 ;SKX-LABEL: sext_2x16mem_to_2x64mask:
536 ;SKX-NEXT: vpmovq2m %xmm0, %k1
537 ;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
539 define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
540 %a = load <2 x i16>,<2 x i16> *%i,align 1
541 %x = sext <2 x i16> %a to <2 x i64>
542 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
546 ;SKX-LABEL: sext_2x16mem_to_2x64:
548 ;SKX-NEXT: vpmovsxwq (%rdi), %xmm0
550 define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
551 %a = load <2 x i16>,<2 x i16> *%i,align 1
552 %x = sext <2 x i16> %a to <2 x i64>
556 ;SKX-LABEL: zext_4x16mem_to_4x64:
558 ;SKX-NEXT: vpmovd2m %xmm0, %k1
559 ;SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z}
561 define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
562 %a = load <4 x i16>,<4 x i16> *%i,align 1
563 %x = zext <4 x i16> %a to <4 x i64>
564 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
568 ;SKX-LABEL: sext_4x16mem_to_4x64mask:
570 ;SKX-NEXT: vpmovd2m %xmm0, %k1
571 ;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
573 define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
574 %a = load <4 x i16>,<4 x i16> *%i,align 1
575 %x = sext <4 x i16> %a to <4 x i64>
576 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
580 ;SKX-LABEL: sext_4x16mem_to_4x64:
582 ;SKX-NEXT: vpmovsxwq (%rdi), %ymm0
584 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
585 %a = load <4 x i16>,<4 x i16> *%i,align 1
586 %x = sext <4 x i16> %a to <4 x i64>
590 ;SKX-LABEL: zext_8x16mem_to_8x64:
591 ;KNL-LABEL: zext_8x16mem_to_8x64:
593 ;SKX-NEXT: vpmovw2m %xmm0, %k1
594 ;SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
595 ;KNL: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
597 define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
598 %a = load <8 x i16>,<8 x i16> *%i,align 1
599 %x = zext <8 x i16> %a to <8 x i64>
600 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
604 ;SKX-LABEL: sext_8x16mem_to_8x64mask:
605 ;KNL-LABEL: sext_8x16mem_to_8x64mask:
607 ;SKX-NEXT: vpmovw2m %xmm0, %k1
608 ;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
609 ;KNL: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
611 define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
612 %a = load <8 x i16>,<8 x i16> *%i,align 1
613 %x = sext <8 x i16> %a to <8 x i64>
614 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
618 ;SKX-LABEL: sext_8x16mem_to_8x64:
619 ;KNL-LABEL: sext_8x16mem_to_8x64:
621 ;SKX-NEXT: vpmovsxwq (%rdi), %zmm0
622 ;KNL: vpmovsxwq (%rdi), %zmm0
624 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
625 %a = load <8 x i16>,<8 x i16> *%i,align 1
626 %x = sext <8 x i16> %a to <8 x i64>
630 ;SKX-LABEL: zext_8x16_to_8x64mask:
631 ;KNL-LABEL: zext_8x16_to_8x64mask:
633 ;SKX-NEXT: vpmovw2m %xmm1, %k1
634 ;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
635 ;KNL: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
637 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
638 %x = zext <8 x i16> %a to <8 x i64>
639 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
643 ;SKX-LABEL: zext_8x16_to_8x64:
644 ;KNL-LABEL: zext_8x16_to_8x64:
646 ;SKX-NEXT: vpmovzxwq %xmm0, %zmm0
647 ;KNL: vpmovzxwq %xmm0, %zmm0
650 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
651 %ret = zext <8 x i16> %a to <8 x i64>
655 ;SKX-LABEL: zext_2x32mem_to_2x64:
657 ;SKX-NEXT: vpmovq2m %xmm0, %k1
658 ;SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z}
660 define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
661 %a = load <2 x i32>,<2 x i32> *%i,align 1
662 %x = zext <2 x i32> %a to <2 x i64>
663 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
667 ;SKX-LABEL: sext_2x32mem_to_2x64mask:
669 ;SKX-NEXT: vpmovq2m %xmm0, %k1
670 ;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
672 define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
673 %a = load <2 x i32>,<2 x i32> *%i,align 1
674 %x = sext <2 x i32> %a to <2 x i64>
675 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
679 ;SKX-LABEL: sext_2x32mem_to_2x64:
681 ;SKX-NEXT: vpmovsxdq (%rdi), %xmm0
683 define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
684 %a = load <2 x i32>,<2 x i32> *%i,align 1
685 %x = sext <2 x i32> %a to <2 x i64>
689 ;SKX-LABEL: zext_4x32mem_to_4x64:
691 ;SKX-NEXT: vpmovd2m %xmm0, %k1
692 ;SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z}
694 define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
695 %a = load <4 x i32>,<4 x i32> *%i,align 1
696 %x = zext <4 x i32> %a to <4 x i64>
697 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
701 ;SKX-LABEL: sext_4x32mem_to_4x64mask:
703 ;SKX-NEXT: vpmovd2m %xmm0, %k1
704 ;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
706 define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
707 %a = load <4 x i32>,<4 x i32> *%i,align 1
708 %x = sext <4 x i32> %a to <4 x i64>
709 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
713 ;SKX-LABEL: sext_4x32mem_to_4x64:
715 ;SKX-NEXT: vpmovsxdq (%rdi), %ymm0
717 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
718 %a = load <4 x i32>,<4 x i32> *%i,align 1
719 %x = sext <4 x i32> %a to <4 x i64>
723 ;SKX-LABEL: sext_4x32_to_4x64:
725 ;SKX-NEXT: vpmovsxdq %xmm0, %ymm0
727 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
728 %x = sext <4 x i32> %a to <4 x i64>
732 ;SKX-LABEL: zext_4x32_to_4x64mask:
734 ;SKX-NEXT: vpmovd2m %xmm1, %k1
735 ;SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z}
737 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
738 %x = zext <4 x i32> %a to <4 x i64>
739 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
743 ;SKX-LABEL: zext_8x32mem_to_8x64:
745 ;SKX-NEXT: vpmovw2m %xmm0, %k1
746 ;SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z}
748 define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
749 %a = load <8 x i32>,<8 x i32> *%i,align 1
750 %x = zext <8 x i32> %a to <8 x i64>
751 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
755 ;SKX-LABEL: sext_8x32mem_to_8x64mask:
757 ;SKX-NEXT: vpmovw2m %xmm0, %k1
758 ;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
760 define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
761 %a = load <8 x i32>,<8 x i32> *%i,align 1
762 %x = sext <8 x i32> %a to <8 x i64>
763 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
767 ;SKX-LABEL: sext_8x32mem_to_8x64:
768 ;KNL-LABEL: sext_8x32mem_to_8x64:
770 ;SKX-NEXT: vpmovsxdq (%rdi), %zmm0
771 ;KNL: vpmovsxdq (%rdi), %zmm0
773 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
774 %a = load <8 x i32>,<8 x i32> *%i,align 1
775 %x = sext <8 x i32> %a to <8 x i64>
779 ;SKX-LABEL: sext_8x32_to_8x64:
780 ;KNL-LABEL: sext_8x32_to_8x64:
782 ;SKX-NEXT: vpmovsxdq %ymm0, %zmm0
783 ;KNL: vpmovsxdq %ymm0, %zmm0
785 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
786 %x = sext <8 x i32> %a to <8 x i64>
790 ;SKX-LABEL: zext_8x32_to_8x64mask:
791 ;KNL-LABEL: zext_8x32_to_8x64mask:
793 ;SKX-NEXT: vpmovw2m %xmm1, %k1
794 ;SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
795 ;KNL: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
797 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
798 %x = zext <8 x i32> %a to <8 x i64>
799 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
802 ;KNL-LABEL: fptrunc_test
803 ;KNL: vcvtpd2ps {{.*}}%zmm
805 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
806 %b = fptrunc <8 x double> %a to <8 x float>
810 ;KNL-LABEL: fpext_test
811 ;KNL: vcvtps2pd {{.*}}%zmm
813 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
814 %b = fpext <8 x float> %a to <8 x double>
818 ; KNL-LABEL: zext_16i1_to_16xi32
819 ; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
821 define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
822 %a = bitcast i16 %b to <16 x i1>
823 %c = zext <16 x i1> %a to <16 x i32>
827 ; KNL-LABEL: zext_8i1_to_8xi64
828 ; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
830 define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
831 %a = bitcast i8 %b to <8 x i1>
832 %c = zext <8 x i1> %a to <8 x i64>
836 ; KNL-LABEL: trunc_16i8_to_16i1
841 ; SKX-LABEL: trunc_16i8_to_16i1
843 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
844 %mask_b = trunc <16 x i8>%a to <16 x i1>
845 %mask = bitcast <16 x i1> %mask_b to i16
849 ; KNL-LABEL: trunc_16i32_to_16i1
853 ; SKX-LABEL: trunc_16i32_to_16i1
855 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
856 %mask_b = trunc <16 x i32>%a to <16 x i1>
857 %mask = bitcast <16 x i1> %mask_b to i16
861 ; SKX-LABEL: trunc_4i32_to_4i1
865 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
866 %mask_a = trunc <4 x i32>%a to <4 x i1>
867 %mask_b = trunc <4 x i32>%b to <4 x i1>
868 %a_and_b = and <4 x i1>%mask_a, %mask_b
869 %res = sext <4 x i1>%a_and_b to <4 x i32>
873 ; KNL-LABEL: trunc_8i16_to_8i1
875 ; KNL: vpandq LCP{{.*}}(%rip){1to8}
879 ; SKX-LABEL: trunc_8i16_to_8i1
881 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
882 %mask_b = trunc <8 x i16>%a to <8 x i1>
883 %mask = bitcast <8 x i1> %mask_b to i8
887 ; KNL-LABEL: sext_8i1_8i32
888 ; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
891 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
892 %x = icmp slt <8 x i32> %a1, %a2
893 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
894 %y = sext <8 x i1> %x1 to <8 x i32>
898 ; KNL-LABEL: trunc_v16i32_to_v16i16
901 define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
902 %1 = trunc <16 x i32> %x to <16 x i16>
906 ; KNL-LABEL: trunc_i32_to_i1
908 ; KNL: kmovw %eax, %k1
910 define i16 @trunc_i32_to_i1(i32 %a) {
911 %a_i = trunc i32 %a to i1
912 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
913 %res = bitcast <16 x i1> %maskv to i16
917 ; KNL-LABEL: sext_8i1_8i16
920 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
921 %x = icmp slt <8 x i32> %a1, %a2
922 %y = sext <8 x i1> %x to <8 x i16>
926 ; KNL-LABEL: sext_16i1_16i32
929 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
930 %x = icmp slt <16 x i32> %a1, %a2
931 %y = sext <16 x i1> %x to <16 x i32>
935 ; KNL-LABEL: sext_8i1_8i64
938 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
939 %x = icmp slt <8 x i32> %a1, %a2
940 %y = sext <8 x i1> %x to <8 x i64>
944 ; KNL-LABEL: @extload_v8i64
946 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
947 %sign_load = load <8 x i8>, <8 x i8>* %a
948 %c = sext <8 x i8> %sign_load to <8 x i64>
949 store <8 x i64> %c, <8 x i64>* %res
954 ;SKX: vmovdqu16 %zmm0, %zmm3 {%k1}
955 ;SKX-NEXT: kshiftrq $32, %k1, %k1
956 ;SKX-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1}
957 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
958 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer