1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
8 ; SSE2-LABEL: zext_16i8_to_8i16:
9 ; SSE2: # BB#0: # %entry
10 ; SSE2-NEXT: pxor %xmm1, %xmm1
11 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
14 ; SSSE3-LABEL: zext_16i8_to_8i16:
15 ; SSSE3: # BB#0: # %entry
16 ; SSSE3-NEXT: pxor %xmm1, %xmm1
17 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
20 ; SSE41-LABEL: zext_16i8_to_8i16:
21 ; SSE41: # BB#0: # %entry
22 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
25 ; AVX-LABEL: zext_16i8_to_8i16:
26 ; AVX: # BB#0: # %entry
27 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
30 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
31 %C = zext <8 x i8> %B to <8 x i16>
36 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
37 ; SSE2-LABEL: zext_16i8_to_16i16:
38 ; SSE2: # BB#0: # %entry
39 ; SSE2-NEXT: movdqa %xmm0, %xmm1
40 ; SSE2-NEXT: pxor %xmm2, %xmm2
41 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
42 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
45 ; SSSE3-LABEL: zext_16i8_to_16i16:
46 ; SSSE3: # BB#0: # %entry
47 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
48 ; SSSE3-NEXT: pxor %xmm2, %xmm2
49 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
50 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
53 ; SSE41-LABEL: zext_16i8_to_16i16:
54 ; SSE41: # BB#0: # %entry
55 ; SSE41-NEXT: movdqa %xmm0, %xmm1
56 ; SSE41-NEXT: pxor %xmm2, %xmm2
57 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
58 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
61 ; AVX1-LABEL: zext_16i8_to_16i16:
62 ; AVX1: # BB#0: # %entry
63 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
64 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
65 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
66 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
69 ; AVX2-LABEL: zext_16i8_to_16i16:
70 ; AVX2: # BB#0: # %entry
71 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
74 %B = zext <16 x i8> %A to <16 x i16>
78 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
79 ; SSE2-LABEL: zext_16i8_to_4i32:
80 ; SSE2: # BB#0: # %entry
81 ; SSE2-NEXT: pxor %xmm1, %xmm1
82 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
83 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
86 ; SSSE3-LABEL: zext_16i8_to_4i32:
87 ; SSSE3: # BB#0: # %entry
88 ; SSSE3-NEXT: pxor %xmm1, %xmm1
89 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
90 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
93 ; SSE41-LABEL: zext_16i8_to_4i32:
94 ; SSE41: # BB#0: # %entry
95 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
98 ; AVX-LABEL: zext_16i8_to_4i32:
99 ; AVX: # BB#0: # %entry
100 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
103 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
104 %C = zext <4 x i8> %B to <4 x i32>
108 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
109 ; SSE2-LABEL: zext_16i8_to_8i32:
110 ; SSE2: # BB#0: # %entry
111 ; SSE2-NEXT: movdqa %xmm0, %xmm1
112 ; SSE2-NEXT: pxor %xmm2, %xmm2
113 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
114 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
115 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
116 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
117 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
120 ; SSSE3-LABEL: zext_16i8_to_8i32:
121 ; SSSE3: # BB#0: # %entry
122 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
123 ; SSSE3-NEXT: pxor %xmm2, %xmm2
124 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
125 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
126 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
129 ; SSE41-LABEL: zext_16i8_to_8i32:
130 ; SSE41: # BB#0: # %entry
131 ; SSE41-NEXT: movdqa %xmm0, %xmm1
132 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
133 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
136 ; AVX1-LABEL: zext_16i8_to_8i32:
137 ; AVX1: # BB#0: # %entry
138 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
139 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
140 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
141 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
142 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
143 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
144 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
147 ; AVX2-LABEL: zext_16i8_to_8i32:
148 ; AVX2: # BB#0: # %entry
149 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
150 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
153 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
154 %C = zext <8 x i8> %B to <8 x i32>
158 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
159 ; SSE2-LABEL: zext_16i8_to_2i64:
160 ; SSE2: # BB#0: # %entry
161 ; SSE2-NEXT: pxor %xmm1, %xmm1
162 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
163 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
164 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
167 ; SSSE3-LABEL: zext_16i8_to_2i64:
168 ; SSSE3: # BB#0: # %entry
169 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
172 ; SSE41-LABEL: zext_16i8_to_2i64:
173 ; SSE41: # BB#0: # %entry
174 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
177 ; AVX-LABEL: zext_16i8_to_2i64:
178 ; AVX: # BB#0: # %entry
179 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
182 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
183 %C = zext <2 x i8> %B to <2 x i64>
187 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
188 ; SSE2-LABEL: zext_16i8_to_4i64:
189 ; SSE2: # BB#0: # %entry
190 ; SSE2-NEXT: pxor %xmm1, %xmm1
191 ; SSE2-NEXT: movdqa %xmm0, %xmm2
192 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
193 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
194 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
195 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
196 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
197 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
198 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,5,6,7]
199 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
200 ; SSE2-NEXT: movdqa %xmm2, %xmm0
203 ; SSSE3-LABEL: zext_16i8_to_4i64:
204 ; SSSE3: # BB#0: # %entry
205 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
206 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
207 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
210 ; SSE41-LABEL: zext_16i8_to_4i64:
211 ; SSE41: # BB#0: # %entry
212 ; SSE41-NEXT: movdqa %xmm0, %xmm1
213 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
214 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
217 ; AVX1-LABEL: zext_16i8_to_4i64:
218 ; AVX1: # BB#0: # %entry
219 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
220 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
221 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
222 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
223 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
224 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
225 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
228 ; AVX2-LABEL: zext_16i8_to_4i64:
229 ; AVX2: # BB#0: # %entry
230 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
231 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
234 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
235 %C = zext <4 x i8> %B to <4 x i64>
239 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
240 ; SSE2-LABEL: zext_8i16_to_4i32:
241 ; SSE2: # BB#0: # %entry
242 ; SSE2-NEXT: pxor %xmm1, %xmm1
243 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
246 ; SSSE3-LABEL: zext_8i16_to_4i32:
247 ; SSSE3: # BB#0: # %entry
248 ; SSSE3-NEXT: pxor %xmm1, %xmm1
249 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
252 ; SSE41-LABEL: zext_8i16_to_4i32:
253 ; SSE41: # BB#0: # %entry
254 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
257 ; AVX-LABEL: zext_8i16_to_4i32:
258 ; AVX: # BB#0: # %entry
259 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
262 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
263 %C = zext <4 x i16> %B to <4 x i32>
267 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
268 ; SSE2-LABEL: zext_8i16_to_8i32:
269 ; SSE2: # BB#0: # %entry
270 ; SSE2-NEXT: movdqa %xmm0, %xmm1
271 ; SSE2-NEXT: pxor %xmm2, %xmm2
272 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
273 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
276 ; SSSE3-LABEL: zext_8i16_to_8i32:
277 ; SSSE3: # BB#0: # %entry
278 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
279 ; SSSE3-NEXT: pxor %xmm2, %xmm2
280 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
281 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
284 ; SSE41-LABEL: zext_8i16_to_8i32:
285 ; SSE41: # BB#0: # %entry
286 ; SSE41-NEXT: movdqa %xmm0, %xmm1
287 ; SSE41-NEXT: pxor %xmm2, %xmm2
288 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
289 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
292 ; AVX1-LABEL: zext_8i16_to_8i32:
293 ; AVX1: # BB#0: # %entry
294 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
295 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
296 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
297 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
300 ; AVX2-LABEL: zext_8i16_to_8i32:
301 ; AVX2: # BB#0: # %entry
302 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
305 %B = zext <8 x i16> %A to <8 x i32>
309 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
310 ; SSE2-LABEL: zext_8i16_to_2i64:
311 ; SSE2: # BB#0: # %entry
312 ; SSE2-NEXT: pxor %xmm1, %xmm1
313 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
314 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
317 ; SSSE3-LABEL: zext_8i16_to_2i64:
318 ; SSSE3: # BB#0: # %entry
319 ; SSSE3-NEXT: pxor %xmm1, %xmm1
320 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
321 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
324 ; SSE41-LABEL: zext_8i16_to_2i64:
325 ; SSE41: # BB#0: # %entry
326 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
329 ; AVX-LABEL: zext_8i16_to_2i64:
330 ; AVX: # BB#0: # %entry
331 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
334 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
335 %C = zext <2 x i16> %B to <2 x i64>
339 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
340 ; SSE2-LABEL: zext_8i16_to_4i64:
341 ; SSE2: # BB#0: # %entry
342 ; SSE2-NEXT: pxor %xmm1, %xmm1
343 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,2,1]
344 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
345 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
346 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[2,1,2,3,4,5,6,7]
347 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,6,7]
348 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
351 ; SSSE3-LABEL: zext_8i16_to_4i64:
352 ; SSSE3: # BB#0: # %entry
353 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
354 ; SSSE3-NEXT: pxor %xmm2, %xmm2
355 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
356 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
357 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4,5],zero,zero,zero,zero,zero,zero,xmm1[6,7],zero,zero,zero,zero,zero,zero
360 ; SSE41-LABEL: zext_8i16_to_4i64:
361 ; SSE41: # BB#0: # %entry
362 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
363 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7]
364 ; SSE41-NEXT: pxor %xmm1, %xmm1
365 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
366 ; SSE41-NEXT: movdqa %xmm2, %xmm0
369 ; AVX1-LABEL: zext_8i16_to_4i64:
370 ; AVX1: # BB#0: # %entry
371 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
372 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
373 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
374 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
375 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
376 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
377 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
380 ; AVX2-LABEL: zext_8i16_to_4i64:
381 ; AVX2: # BB#0: # %entry
382 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
383 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
384 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
387 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
388 %C = zext <4 x i16> %B to <4 x i64>
392 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
393 ; SSE2-LABEL: zext_4i32_to_2i64:
394 ; SSE2: # BB#0: # %entry
395 ; SSE2-NEXT: pxor %xmm1, %xmm1
396 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
399 ; SSSE3-LABEL: zext_4i32_to_2i64:
400 ; SSSE3: # BB#0: # %entry
401 ; SSSE3-NEXT: pxor %xmm1, %xmm1
402 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
405 ; SSE41-LABEL: zext_4i32_to_2i64:
406 ; SSE41: # BB#0: # %entry
407 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
410 ; AVX-LABEL: zext_4i32_to_2i64:
411 ; AVX: # BB#0: # %entry
412 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
415 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
416 %C = zext <2 x i32> %B to <2 x i64>
420 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
421 ; SSE2-LABEL: zext_4i32_to_4i64:
422 ; SSE2: # BB#0: # %entry
423 ; SSE2-NEXT: movdqa %xmm0, %xmm1
424 ; SSE2-NEXT: pxor %xmm2, %xmm2
425 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
426 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
429 ; SSSE3-LABEL: zext_4i32_to_4i64:
430 ; SSSE3: # BB#0: # %entry
431 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
432 ; SSSE3-NEXT: pxor %xmm2, %xmm2
433 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
434 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
437 ; SSE41-LABEL: zext_4i32_to_4i64:
438 ; SSE41: # BB#0: # %entry
439 ; SSE41-NEXT: movdqa %xmm0, %xmm1
440 ; SSE41-NEXT: pxor %xmm2, %xmm2
441 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
442 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
445 ; AVX1-LABEL: zext_4i32_to_4i64:
446 ; AVX1: # BB#0: # %entry
447 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
448 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
449 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
450 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
453 ; AVX2-LABEL: zext_4i32_to_4i64:
454 ; AVX2: # BB#0: # %entry
455 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
458 %B = zext <4 x i32> %A to <4 x i64>
462 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
463 ; SSE2-LABEL: load_zext_2i8_to_2i64:
464 ; SSE2: # BB#0: # %entry
465 ; SSE2-NEXT: movzwl (%rdi), %eax
466 ; SSE2-NEXT: movd %eax, %xmm0
467 ; SSE2-NEXT: pxor %xmm1, %xmm1
468 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
469 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
470 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
473 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
474 ; SSSE3: # BB#0: # %entry
475 ; SSSE3-NEXT: movzwl (%rdi), %eax
476 ; SSSE3-NEXT: movd %eax, %xmm0
477 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
480 ; SSE41-LABEL: load_zext_2i8_to_2i64:
481 ; SSE41: # BB#0: # %entry
482 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
485 ; AVX-LABEL: load_zext_2i8_to_2i64:
486 ; AVX: # BB#0: # %entry
487 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
490 %X = load <2 x i8>, <2 x i8>* %ptr
491 %Y = zext <2 x i8> %X to <2 x i64>
495 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
496 ; SSE2-LABEL: load_zext_4i8_to_4i32:
497 ; SSE2: # BB#0: # %entry
498 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
499 ; SSE2-NEXT: pxor %xmm1, %xmm1
500 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
501 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
504 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
505 ; SSSE3: # BB#0: # %entry
506 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
507 ; SSSE3-NEXT: pxor %xmm1, %xmm1
508 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
509 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
512 ; SSE41-LABEL: load_zext_4i8_to_4i32:
513 ; SSE41: # BB#0: # %entry
514 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
517 ; AVX-LABEL: load_zext_4i8_to_4i32:
518 ; AVX: # BB#0: # %entry
519 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
522 %X = load <4 x i8>, <4 x i8>* %ptr
523 %Y = zext <4 x i8> %X to <4 x i32>
527 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
528 ; SSE2-LABEL: load_zext_4i8_to_4i64:
529 ; SSE2: # BB#0: # %entry
530 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
531 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
532 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
533 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
534 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
535 ; SSE2-NEXT: pand %xmm2, %xmm0
536 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
537 ; SSE2-NEXT: pand %xmm2, %xmm1
540 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
541 ; SSSE3: # BB#0: # %entry
542 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
543 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
544 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
545 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
546 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero
547 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,zero,zero,zero,zero,xmm1[12],zero,zero,zero,zero,zero,zero,zero
550 ; SSE41-LABEL: load_zext_4i8_to_4i64:
551 ; SSE41: # BB#0: # %entry
552 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
553 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
556 ; AVX1-LABEL: load_zext_4i8_to_4i64:
557 ; AVX1: # BB#0: # %entry
558 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
559 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
560 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
563 ; AVX2-LABEL: load_zext_4i8_to_4i64:
564 ; AVX2: # BB#0: # %entry
565 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
568 %X = load <4 x i8>, <4 x i8>* %ptr
569 %Y = zext <4 x i8> %X to <4 x i64>
573 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
574 ; SSE2-LABEL: load_zext_8i8_to_8i16:
575 ; SSE2: # BB#0: # %entry
576 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
577 ; SSE2-NEXT: pxor %xmm1, %xmm1
578 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
581 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
582 ; SSSE3: # BB#0: # %entry
583 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
584 ; SSSE3-NEXT: pxor %xmm1, %xmm1
585 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
588 ; SSE41-LABEL: load_zext_8i8_to_8i16:
589 ; SSE41: # BB#0: # %entry
590 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
593 ; AVX-LABEL: load_zext_8i8_to_8i16:
594 ; AVX: # BB#0: # %entry
595 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
598 %X = load <8 x i8>, <8 x i8>* %ptr
599 %Y = zext <8 x i8> %X to <8 x i16>
603 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
604 ; SSE2-LABEL: load_zext_8i8_to_8i32:
605 ; SSE2: # BB#0: # %entry
606 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
607 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
608 ; SSE2-NEXT: movdqa %xmm1, %xmm0
609 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
610 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
611 ; SSE2-NEXT: pand %xmm2, %xmm0
612 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
613 ; SSE2-NEXT: pand %xmm2, %xmm1
616 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
617 ; SSSE3: # BB#0: # %entry
618 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
619 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
620 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
621 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[6],zero,zero,zero
622 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[14],zero,zero,zero
625 ; SSE41-LABEL: load_zext_8i8_to_8i32:
626 ; SSE41: # BB#0: # %entry
627 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
628 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
631 ; AVX1-LABEL: load_zext_8i8_to_8i32:
632 ; AVX1: # BB#0: # %entry
633 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
634 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
635 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
638 ; AVX2-LABEL: load_zext_8i8_to_8i32:
639 ; AVX2: # BB#0: # %entry
640 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
643 %X = load <8 x i8>, <8 x i8>* %ptr
644 %Y = zext <8 x i8> %X to <8 x i32>
648 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
649 ; SSE2-LABEL: load_zext_16i8_to_16i16:
650 ; SSE2: # BB#0: # %entry
651 ; SSE2-NEXT: movdqa (%rdi), %xmm1
652 ; SSE2-NEXT: pxor %xmm2, %xmm2
653 ; SSE2-NEXT: movdqa %xmm1, %xmm0
654 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
655 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
658 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
659 ; SSSE3: # BB#0: # %entry
660 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
661 ; SSSE3-NEXT: pxor %xmm2, %xmm2
662 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
663 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
664 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
667 ; SSE41-LABEL: load_zext_16i8_to_16i16:
668 ; SSE41: # BB#0: # %entry
669 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
670 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
673 ; AVX1-LABEL: load_zext_16i8_to_16i16:
674 ; AVX1: # BB#0: # %entry
675 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
676 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
677 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
680 ; AVX2-LABEL: load_zext_16i8_to_16i16:
681 ; AVX2: # BB#0: # %entry
682 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
685 %X = load <16 x i8>, <16 x i8>* %ptr
686 %Y = zext <16 x i8> %X to <16 x i16>
690 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
691 ; SSE2-LABEL: load_zext_2i16_to_2i64:
692 ; SSE2: # BB#0: # %entry
693 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
694 ; SSE2-NEXT: pxor %xmm1, %xmm1
695 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
696 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
699 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
700 ; SSSE3: # BB#0: # %entry
701 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
702 ; SSSE3-NEXT: pxor %xmm1, %xmm1
703 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
704 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
707 ; SSE41-LABEL: load_zext_2i16_to_2i64:
708 ; SSE41: # BB#0: # %entry
709 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
712 ; AVX-LABEL: load_zext_2i16_to_2i64:
713 ; AVX: # BB#0: # %entry
714 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
717 %X = load <2 x i16>, <2 x i16>* %ptr
718 %Y = zext <2 x i16> %X to <2 x i64>
722 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
723 ; SSE2-LABEL: load_zext_4i16_to_4i32:
724 ; SSE2: # BB#0: # %entry
725 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
726 ; SSE2-NEXT: pxor %xmm1, %xmm1
727 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
730 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
731 ; SSSE3: # BB#0: # %entry
732 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
733 ; SSSE3-NEXT: pxor %xmm1, %xmm1
734 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
737 ; SSE41-LABEL: load_zext_4i16_to_4i32:
738 ; SSE41: # BB#0: # %entry
739 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
742 ; AVX-LABEL: load_zext_4i16_to_4i32:
743 ; AVX: # BB#0: # %entry
744 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
747 %X = load <4 x i16>, <4 x i16>* %ptr
748 %Y = zext <4 x i16> %X to <4 x i32>
752 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
753 ; SSE2-LABEL: load_zext_4i16_to_4i64:
754 ; SSE2: # BB#0: # %entry
755 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
756 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
757 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
758 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0,65535,0,0,0]
759 ; SSE2-NEXT: pand %xmm2, %xmm0
760 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
761 ; SSE2-NEXT: pand %xmm2, %xmm1
764 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
765 ; SSSE3: # BB#0: # %entry
766 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
767 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
768 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
769 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
770 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9],zero,zero,zero,zero,zero,zero,xmm1[12,13],zero,zero,zero,zero,zero,zero
773 ; SSE41-LABEL: load_zext_4i16_to_4i64:
774 ; SSE41: # BB#0: # %entry
775 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
776 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
779 ; AVX1-LABEL: load_zext_4i16_to_4i64:
780 ; AVX1: # BB#0: # %entry
781 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
782 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
783 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
786 ; AVX2-LABEL: load_zext_4i16_to_4i64:
787 ; AVX2: # BB#0: # %entry
788 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
791 %X = load <4 x i16>, <4 x i16>* %ptr
792 %Y = zext <4 x i16> %X to <4 x i64>
796 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
797 ; SSE2-LABEL: load_zext_8i16_to_8i32:
798 ; SSE2: # BB#0: # %entry
799 ; SSE2-NEXT: movdqa (%rdi), %xmm1
800 ; SSE2-NEXT: pxor %xmm2, %xmm2
801 ; SSE2-NEXT: movdqa %xmm1, %xmm0
802 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
803 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
806 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
807 ; SSSE3: # BB#0: # %entry
808 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
809 ; SSSE3-NEXT: pxor %xmm2, %xmm2
810 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
811 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
812 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
815 ; SSE41-LABEL: load_zext_8i16_to_8i32:
816 ; SSE41: # BB#0: # %entry
817 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
818 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
821 ; AVX1-LABEL: load_zext_8i16_to_8i32:
822 ; AVX1: # BB#0: # %entry
823 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
824 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
825 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
828 ; AVX2-LABEL: load_zext_8i16_to_8i32:
829 ; AVX2: # BB#0: # %entry
830 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
833 %X = load <8 x i16>, <8 x i16>* %ptr
834 %Y = zext <8 x i16> %X to <8 x i32>
838 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
839 ; SSE2-LABEL: load_zext_2i32_to_2i64:
840 ; SSE2: # BB#0: # %entry
841 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
842 ; SSE2-NEXT: pxor %xmm1, %xmm1
843 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
846 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
847 ; SSSE3: # BB#0: # %entry
848 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
849 ; SSSE3-NEXT: pxor %xmm1, %xmm1
850 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
853 ; SSE41-LABEL: load_zext_2i32_to_2i64:
854 ; SSE41: # BB#0: # %entry
855 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
858 ; AVX-LABEL: load_zext_2i32_to_2i64:
859 ; AVX: # BB#0: # %entry
860 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
863 %X = load <2 x i32>, <2 x i32>* %ptr
864 %Y = zext <2 x i32> %X to <2 x i64>
868 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
869 ; SSE2-LABEL: load_zext_4i32_to_4i64:
870 ; SSE2: # BB#0: # %entry
871 ; SSE2-NEXT: movdqa (%rdi), %xmm1
872 ; SSE2-NEXT: pxor %xmm2, %xmm2
873 ; SSE2-NEXT: movdqa %xmm1, %xmm0
874 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
875 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
878 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
879 ; SSSE3: # BB#0: # %entry
880 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
881 ; SSSE3-NEXT: pxor %xmm2, %xmm2
882 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
883 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
884 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
887 ; SSE41-LABEL: load_zext_4i32_to_4i64:
888 ; SSE41: # BB#0: # %entry
889 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
890 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
893 ; AVX1-LABEL: load_zext_4i32_to_4i64:
894 ; AVX1: # BB#0: # %entry
895 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
896 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
897 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
900 ; AVX2-LABEL: load_zext_4i32_to_4i64:
901 ; AVX2: # BB#0: # %entry
902 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
905 %X = load <4 x i32>, <4 x i32>* %ptr
906 %Y = zext <4 x i32> %X to <4 x i64>
910 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
911 ; SSE2-LABEL: zext_8i8_to_8i32:
912 ; SSE2: # BB#0: # %entry
913 ; SSE2-NEXT: movdqa %xmm0, %xmm1
914 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
915 ; SSE2-NEXT: pxor %xmm2, %xmm2
916 ; SSE2-NEXT: movdqa %xmm1, %xmm0
917 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
918 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
921 ; SSSE3-LABEL: zext_8i8_to_8i32:
922 ; SSSE3: # BB#0: # %entry
923 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
924 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
925 ; SSSE3-NEXT: pxor %xmm2, %xmm2
926 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
927 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
928 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
931 ; SSE41-LABEL: zext_8i8_to_8i32:
932 ; SSE41: # BB#0: # %entry
933 ; SSE41-NEXT: movdqa %xmm0, %xmm1
934 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
935 ; SSE41-NEXT: pxor %xmm2, %xmm2
936 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
937 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
940 ; AVX1-LABEL: zext_8i8_to_8i32:
941 ; AVX1: # BB#0: # %entry
942 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
943 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
944 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
945 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
946 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
949 ; AVX2-LABEL: zext_8i8_to_8i32:
950 ; AVX2: # BB#0: # %entry
951 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
952 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
955 %t = zext <8 x i8> %z to <8 x i32>
959 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
960 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
961 ; SSE2: # BB#0: # %entry
962 ; SSE2-NEXT: movdqa %xmm0, %xmm1
963 ; SSE2-NEXT: pxor %xmm2, %xmm2
964 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
965 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
968 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
969 ; SSSE3: # BB#0: # %entry
970 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
971 ; SSSE3-NEXT: pxor %xmm2, %xmm2
972 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
973 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
976 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
977 ; SSE41: # BB#0: # %entry
978 ; SSE41-NEXT: movdqa %xmm0, %xmm1
979 ; SSE41-NEXT: pxor %xmm2, %xmm2
980 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
981 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
984 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
985 ; AVX1: # BB#0: # %entry
986 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
987 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
988 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
989 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
992 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
993 ; AVX2: # BB#0: # %entry
994 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
997 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
998 %Z = bitcast <16 x i16> %B to <8 x i32>
1002 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1003 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1004 ; SSE2: # BB#0: # %entry
1005 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1006 ; SSE2-NEXT: pxor %xmm2, %xmm2
1007 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1008 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1011 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1012 ; SSSE3: # BB#0: # %entry
1013 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1014 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1015 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1016 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1019 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1020 ; SSE41: # BB#0: # %entry
1021 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1022 ; SSE41-NEXT: pxor %xmm2, %xmm2
1023 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1024 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1027 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1028 ; AVX1: # BB#0: # %entry
1029 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
1030 ; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1031 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1032 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
1033 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1036 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1037 ; AVX2: # BB#0: # %entry
1038 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1041 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1042 %Z = bitcast <8 x i32> %B to <4 x i64>
1046 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1047 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1048 ; SSE2: # BB#0: # %entry
1049 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1050 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1051 ; SSE2-NEXT: packuswb %xmm1, %xmm1
1052 ; SSE2-NEXT: pxor %xmm2, %xmm2
1053 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1054 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1055 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1056 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1057 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
1058 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1061 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1062 ; SSSE3: # BB#0: # %entry
1063 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1064 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1065 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1066 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1067 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1068 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1069 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
1072 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1073 ; SSE41: # BB#0: # %entry
1074 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1075 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1076 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1077 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
1080 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1081 ; AVX1: # BB#0: # %entry
1082 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1083 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1084 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1085 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1088 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1089 ; AVX2: # BB#0: # %entry
1090 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1091 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1094 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1095 %Z = bitcast <32 x i8> %B to <8 x i32>