1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
8 ; SSE2-LABEL: zext_16i8_to_8i16:
9 ; SSE2: # BB#0: # %entry
10 ; SSE2-NEXT: pxor %xmm1, %xmm1
11 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
14 ; SSSE3-LABEL: zext_16i8_to_8i16:
15 ; SSSE3: # BB#0: # %entry
16 ; SSSE3-NEXT: pxor %xmm1, %xmm1
17 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
20 ; SSE41-LABEL: zext_16i8_to_8i16:
21 ; SSE41: # BB#0: # %entry
22 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
25 ; AVX-LABEL: zext_16i8_to_8i16:
26 ; AVX: # BB#0: # %entry
27 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
30 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
31 %C = zext <8 x i8> %B to <8 x i16>
36 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
37 ; SSE2-LABEL: zext_16i8_to_16i16:
38 ; SSE2: # BB#0: # %entry
39 ; SSE2-NEXT: movdqa %xmm0, %xmm1
40 ; SSE2-NEXT: pxor %xmm2, %xmm2
41 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
42 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
43 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
46 ; SSSE3-LABEL: zext_16i8_to_16i16:
47 ; SSSE3: # BB#0: # %entry
48 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
49 ; SSSE3-NEXT: pxor %xmm2, %xmm2
50 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
51 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
52 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
55 ; SSE41-LABEL: zext_16i8_to_16i16:
56 ; SSE41: # BB#0: # %entry
57 ; SSE41-NEXT: movdqa %xmm0, %xmm1
58 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
59 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
60 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
63 ; AVX1-LABEL: zext_16i8_to_16i16:
64 ; AVX1: # BB#0: # %entry
65 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
66 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
67 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
68 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
71 ; AVX2-LABEL: zext_16i8_to_16i16:
72 ; AVX2: # BB#0: # %entry
73 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
76 %B = zext <16 x i8> %A to <16 x i16>
80 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
81 ; SSE2-LABEL: zext_16i8_to_4i32:
82 ; SSE2: # BB#0: # %entry
83 ; SSE2-NEXT: pxor %xmm1, %xmm1
84 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
85 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
88 ; SSSE3-LABEL: zext_16i8_to_4i32:
89 ; SSSE3: # BB#0: # %entry
90 ; SSSE3-NEXT: pxor %xmm1, %xmm1
91 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
92 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
95 ; SSE41-LABEL: zext_16i8_to_4i32:
96 ; SSE41: # BB#0: # %entry
97 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
100 ; AVX-LABEL: zext_16i8_to_4i32:
101 ; AVX: # BB#0: # %entry
102 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
105 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
106 %C = zext <4 x i8> %B to <4 x i32>
110 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
111 ; SSE2-LABEL: zext_16i8_to_8i32:
112 ; SSE2: # BB#0: # %entry
113 ; SSE2-NEXT: movdqa %xmm0, %xmm1
114 ; SSE2-NEXT: pxor %xmm2, %xmm2
115 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
116 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
117 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
118 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
119 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
122 ; SSSE3-LABEL: zext_16i8_to_8i32:
123 ; SSSE3: # BB#0: # %entry
124 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
125 ; SSSE3-NEXT: pxor %xmm2, %xmm2
126 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
127 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
128 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
129 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
130 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
133 ; SSE41-LABEL: zext_16i8_to_8i32:
134 ; SSE41: # BB#0: # %entry
135 ; SSE41-NEXT: movdqa %xmm0, %xmm1
136 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
137 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
138 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
139 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
142 ; AVX1-LABEL: zext_16i8_to_8i32:
143 ; AVX1: # BB#0: # %entry
144 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
145 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
146 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
147 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
148 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
151 ; AVX2-LABEL: zext_16i8_to_8i32:
152 ; AVX2: # BB#0: # %entry
153 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
154 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
155 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
158 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
159 %C = zext <8 x i8> %B to <8 x i32>
163 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
164 ; SSE2-LABEL: zext_16i8_to_2i64:
165 ; SSE2: # BB#0: # %entry
166 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
167 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
168 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]
169 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
172 ; SSSE3-LABEL: zext_16i8_to_2i64:
173 ; SSSE3: # BB#0: # %entry
174 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
175 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
178 ; SSE41-LABEL: zext_16i8_to_2i64:
179 ; SSE41: # BB#0: # %entry
180 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
181 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
184 ; AVX-LABEL: zext_16i8_to_2i64:
185 ; AVX: # BB#0: # %entry
186 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
187 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
190 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
191 %C = zext <2 x i8> %B to <2 x i64>
195 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
196 ; SSE2-LABEL: zext_16i8_to_4i64:
197 ; SSE2: # BB#0: # %entry
198 ; SSE2-NEXT: movdqa %xmm0, %xmm2
199 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
200 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
201 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
202 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255]
203 ; SSE2-NEXT: pand %xmm3, %xmm2
204 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
205 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
206 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
207 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,5,6,7]
208 ; SSE2-NEXT: pand %xmm3, %xmm1
209 ; SSE2-NEXT: movdqa %xmm2, %xmm0
212 ; SSSE3-LABEL: zext_16i8_to_4i64:
213 ; SSSE3: # BB#0: # %entry
214 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
215 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
216 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255]
217 ; SSSE3-NEXT: pand %xmm1, %xmm2
218 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,1,1,2,2,3,3,3,3,5,5,2,2,3,3]
219 ; SSSE3-NEXT: pand %xmm0, %xmm1
220 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
223 ; SSE41-LABEL: zext_16i8_to_4i64:
224 ; SSE41: # BB#0: # %entry
225 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
226 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255]
227 ; SSE41-NEXT: pand %xmm1, %xmm2
228 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,1,1,2,2,3,3,3,3,5,5,2,2,3,3]
229 ; SSE41-NEXT: pand %xmm0, %xmm1
230 ; SSE41-NEXT: movdqa %xmm2, %xmm0
233 ; AVX1-LABEL: zext_16i8_to_4i64:
234 ; AVX1: # BB#0: # %entry
235 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
236 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
237 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
238 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
239 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
242 ; AVX2-LABEL: zext_16i8_to_4i64:
243 ; AVX2: # BB#0: # %entry
244 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
245 ; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
246 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
249 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
250 %C = zext <4 x i8> %B to <4 x i64>
254 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
255 ; SSE2-LABEL: zext_8i16_to_4i32:
256 ; SSE2: # BB#0: # %entry
257 ; SSE2-NEXT: pxor %xmm1, %xmm1
258 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
261 ; SSSE3-LABEL: zext_8i16_to_4i32:
262 ; SSSE3: # BB#0: # %entry
263 ; SSSE3-NEXT: pxor %xmm1, %xmm1
264 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
267 ; SSE41-LABEL: zext_8i16_to_4i32:
268 ; SSE41: # BB#0: # %entry
269 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
272 ; AVX-LABEL: zext_8i16_to_4i32:
273 ; AVX: # BB#0: # %entry
274 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
277 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
278 %C = zext <4 x i16> %B to <4 x i32>
282 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
283 ; SSE2-LABEL: zext_8i16_to_8i32:
284 ; SSE2: # BB#0: # %entry
285 ; SSE2-NEXT: movdqa %xmm0, %xmm1
286 ; SSE2-NEXT: pxor %xmm2, %xmm2
287 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
288 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
289 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
292 ; SSSE3-LABEL: zext_8i16_to_8i32:
293 ; SSSE3: # BB#0: # %entry
294 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
295 ; SSSE3-NEXT: pxor %xmm2, %xmm2
296 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
297 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
298 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
301 ; SSE41-LABEL: zext_8i16_to_8i32:
302 ; SSE41: # BB#0: # %entry
303 ; SSE41-NEXT: movdqa %xmm0, %xmm1
304 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
305 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
306 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
309 ; AVX1-LABEL: zext_8i16_to_8i32:
310 ; AVX1: # BB#0: # %entry
311 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
312 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
313 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
314 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
317 ; AVX2-LABEL: zext_8i16_to_8i32:
318 ; AVX2: # BB#0: # %entry
319 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
322 %B = zext <8 x i16> %A to <8 x i32>
326 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
327 ; SSE2-LABEL: zext_8i16_to_2i64:
328 ; SSE2: # BB#0: # %entry
329 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
330 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
331 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
334 ; SSSE3-LABEL: zext_8i16_to_2i64:
335 ; SSSE3: # BB#0: # %entry
336 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
337 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
338 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
341 ; SSE41-LABEL: zext_8i16_to_2i64:
342 ; SSE41: # BB#0: # %entry
343 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
344 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
347 ; AVX-LABEL: zext_8i16_to_2i64:
348 ; AVX: # BB#0: # %entry
349 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
350 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
353 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
354 %C = zext <2 x i16> %B to <2 x i64>
358 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
359 ; SSE2-LABEL: zext_8i16_to_4i64:
360 ; SSE2: # BB#0: # %entry
361 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,3]
362 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,5,5,6,7]
363 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535]
364 ; SSE2-NEXT: pand %xmm3, %xmm2
365 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
366 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
367 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,5,6,7]
368 ; SSE2-NEXT: pand %xmm3, %xmm1
369 ; SSE2-NEXT: movdqa %xmm2, %xmm0
372 ; SSSE3-LABEL: zext_8i16_to_4i64:
373 ; SSSE3: # BB#0: # %entry
374 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
375 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
376 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7]
377 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535]
378 ; SSSE3-NEXT: pand %xmm2, %xmm1
379 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
380 ; SSSE3-NEXT: pand %xmm2, %xmm0
383 ; SSE41-LABEL: zext_8i16_to_4i64:
384 ; SSE41: # BB#0: # %entry
385 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
386 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535]
387 ; SSE41-NEXT: pand %xmm1, %xmm2
388 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7]
389 ; SSE41-NEXT: pand %xmm0, %xmm1
390 ; SSE41-NEXT: movdqa %xmm2, %xmm0
393 ; AVX1-LABEL: zext_8i16_to_4i64:
394 ; AVX1: # BB#0: # %entry
395 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
396 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
397 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
398 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
399 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
402 ; AVX2-LABEL: zext_8i16_to_4i64:
403 ; AVX2: # BB#0: # %entry
404 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
405 ; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
406 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
409 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
410 %C = zext <4 x i16> %B to <4 x i64>
414 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
415 ; SSE2-LABEL: zext_4i32_to_2i64:
416 ; SSE2: # BB#0: # %entry
417 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
418 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
421 ; SSSE3-LABEL: zext_4i32_to_2i64:
422 ; SSSE3: # BB#0: # %entry
423 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
424 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
427 ; SSE41-LABEL: zext_4i32_to_2i64:
428 ; SSE41: # BB#0: # %entry
429 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
430 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
433 ; AVX-LABEL: zext_4i32_to_2i64:
434 ; AVX: # BB#0: # %entry
435 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
436 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
439 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
440 %C = zext <2 x i32> %B to <2 x i64>
444 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
445 ; SSE2-LABEL: zext_4i32_to_4i64:
446 ; SSE2: # BB#0: # %entry
447 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
448 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
449 ; SSE2-NEXT: pand %xmm3, %xmm2
450 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
451 ; SSE2-NEXT: pand %xmm3, %xmm1
452 ; SSE2-NEXT: movdqa %xmm2, %xmm0
455 ; SSSE3-LABEL: zext_4i32_to_4i64:
456 ; SSSE3: # BB#0: # %entry
457 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
458 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
459 ; SSSE3-NEXT: pand %xmm3, %xmm2
460 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
461 ; SSSE3-NEXT: pand %xmm3, %xmm1
462 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
465 ; SSE41-LABEL: zext_4i32_to_4i64:
466 ; SSE41: # BB#0: # %entry
467 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
468 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
469 ; SSE41-NEXT: pand %xmm3, %xmm2
470 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
471 ; SSE41-NEXT: pand %xmm3, %xmm1
472 ; SSE41-NEXT: movdqa %xmm2, %xmm0
475 ; AVX1-LABEL: zext_4i32_to_4i64:
476 ; AVX1: # BB#0: # %entry
477 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
478 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
479 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
480 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
483 ; AVX2-LABEL: zext_4i32_to_4i64:
484 ; AVX2: # BB#0: # %entry
485 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
488 %B = zext <4 x i32> %A to <4 x i64>
492 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
493 ; SSE2-LABEL: load_zext_2i8_to_2i64:
494 ; SSE2: # BB#0: # %entry
495 ; SSE2-NEXT: movzwl (%rdi), %eax
496 ; SSE2-NEXT: movd %eax, %xmm0
497 ; SSE2-NEXT: pxor %xmm1, %xmm1
498 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
499 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
500 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
503 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
504 ; SSSE3: # BB#0: # %entry
505 ; SSSE3-NEXT: movzwl (%rdi), %eax
506 ; SSSE3-NEXT: movd %eax, %xmm0
507 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
510 ; SSE41-LABEL: load_zext_2i8_to_2i64:
511 ; SSE41: # BB#0: # %entry
512 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
515 ; AVX-LABEL: load_zext_2i8_to_2i64:
516 ; AVX: # BB#0: # %entry
517 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
520 %X = load <2 x i8>, <2 x i8>* %ptr
521 %Y = zext <2 x i8> %X to <2 x i64>
525 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
526 ; SSE2-LABEL: load_zext_4i8_to_4i32:
527 ; SSE2: # BB#0: # %entry
528 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
529 ; SSE2-NEXT: pxor %xmm1, %xmm1
530 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
531 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
534 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
535 ; SSSE3: # BB#0: # %entry
536 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
537 ; SSSE3-NEXT: pxor %xmm1, %xmm1
538 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
539 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
542 ; SSE41-LABEL: load_zext_4i8_to_4i32:
543 ; SSE41: # BB#0: # %entry
544 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
547 ; AVX-LABEL: load_zext_4i8_to_4i32:
548 ; AVX: # BB#0: # %entry
549 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
552 %X = load <4 x i8>, <4 x i8>* %ptr
553 %Y = zext <4 x i8> %X to <4 x i32>
557 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
558 ; SSE2-LABEL: load_zext_4i8_to_4i64:
559 ; SSE2: # BB#0: # %entry
560 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
561 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
562 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
563 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
564 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255]
565 ; SSE2-NEXT: pand %xmm2, %xmm0
566 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
567 ; SSE2-NEXT: pand %xmm2, %xmm1
570 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
571 ; SSSE3: # BB#0: # %entry
572 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
573 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
574 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
575 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
576 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255]
577 ; SSSE3-NEXT: pand %xmm2, %xmm0
578 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
579 ; SSSE3-NEXT: pand %xmm2, %xmm1
582 ; SSE41-LABEL: load_zext_4i8_to_4i64:
583 ; SSE41: # BB#0: # %entry
584 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
585 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
588 ; AVX1-LABEL: load_zext_4i8_to_4i64:
589 ; AVX1: # BB#0: # %entry
590 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
591 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
592 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
595 ; AVX2-LABEL: load_zext_4i8_to_4i64:
596 ; AVX2: # BB#0: # %entry
597 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
600 %X = load <4 x i8>, <4 x i8>* %ptr
601 %Y = zext <4 x i8> %X to <4 x i64>
605 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
606 ; SSE2-LABEL: load_zext_8i8_to_8i16:
607 ; SSE2: # BB#0: # %entry
608 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
609 ; SSE2-NEXT: pxor %xmm1, %xmm1
610 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
613 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
614 ; SSSE3: # BB#0: # %entry
615 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
616 ; SSSE3-NEXT: pxor %xmm1, %xmm1
617 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
620 ; SSE41-LABEL: load_zext_8i8_to_8i16:
621 ; SSE41: # BB#0: # %entry
622 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
625 ; AVX-LABEL: load_zext_8i8_to_8i16:
626 ; AVX: # BB#0: # %entry
627 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
630 %X = load <8 x i8>, <8 x i8>* %ptr
631 %Y = zext <8 x i8> %X to <8 x i16>
635 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
636 ; SSE2-LABEL: load_zext_8i8_to_8i32:
637 ; SSE2: # BB#0: # %entry
638 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
639 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
640 ; SSE2-NEXT: movdqa %xmm1, %xmm0
641 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
642 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
643 ; SSE2-NEXT: pand %xmm2, %xmm0
644 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
645 ; SSE2-NEXT: pand %xmm2, %xmm1
648 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
649 ; SSSE3: # BB#0: # %entry
650 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
651 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
652 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
653 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
654 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
655 ; SSSE3-NEXT: pand %xmm2, %xmm0
656 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
657 ; SSSE3-NEXT: pand %xmm2, %xmm1
660 ; SSE41-LABEL: load_zext_8i8_to_8i32:
661 ; SSE41: # BB#0: # %entry
662 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
663 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
666 ; AVX1-LABEL: load_zext_8i8_to_8i32:
667 ; AVX1: # BB#0: # %entry
668 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
669 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
670 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
673 ; AVX2-LABEL: load_zext_8i8_to_8i32:
674 ; AVX2: # BB#0: # %entry
675 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
678 %X = load <8 x i8>, <8 x i8>* %ptr
679 %Y = zext <8 x i8> %X to <8 x i32>
683 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
684 ; SSE2-LABEL: load_zext_16i8_to_16i16:
685 ; SSE2: # BB#0: # %entry
686 ; SSE2-NEXT: movdqa (%rdi), %xmm1
687 ; SSE2-NEXT: pxor %xmm2, %xmm2
688 ; SSE2-NEXT: movdqa %xmm1, %xmm0
689 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
690 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
691 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
694 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
695 ; SSSE3: # BB#0: # %entry
696 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
697 ; SSSE3-NEXT: pxor %xmm2, %xmm2
698 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
699 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
700 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
701 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
704 ; SSE41-LABEL: load_zext_16i8_to_16i16:
705 ; SSE41: # BB#0: # %entry
706 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
707 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
710 ; AVX1-LABEL: load_zext_16i8_to_16i16:
711 ; AVX1: # BB#0: # %entry
712 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
713 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
714 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
717 ; AVX2-LABEL: load_zext_16i8_to_16i16:
718 ; AVX2: # BB#0: # %entry
719 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
722 %X = load <16 x i8>, <16 x i8>* %ptr
723 %Y = zext <16 x i8> %X to <16 x i16>
727 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
728 ; SSE2-LABEL: load_zext_2i16_to_2i64:
729 ; SSE2: # BB#0: # %entry
730 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
731 ; SSE2-NEXT: pxor %xmm1, %xmm1
732 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
733 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
736 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
737 ; SSSE3: # BB#0: # %entry
738 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
739 ; SSSE3-NEXT: pxor %xmm1, %xmm1
740 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
741 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
744 ; SSE41-LABEL: load_zext_2i16_to_2i64:
745 ; SSE41: # BB#0: # %entry
746 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
749 ; AVX-LABEL: load_zext_2i16_to_2i64:
750 ; AVX: # BB#0: # %entry
751 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
754 %X = load <2 x i16>, <2 x i16>* %ptr
755 %Y = zext <2 x i16> %X to <2 x i64>
759 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
760 ; SSE2-LABEL: load_zext_4i16_to_4i32:
761 ; SSE2: # BB#0: # %entry
762 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
763 ; SSE2-NEXT: pxor %xmm1, %xmm1
764 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
767 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
768 ; SSSE3: # BB#0: # %entry
769 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
770 ; SSSE3-NEXT: pxor %xmm1, %xmm1
771 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
774 ; SSE41-LABEL: load_zext_4i16_to_4i32:
775 ; SSE41: # BB#0: # %entry
776 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
779 ; AVX-LABEL: load_zext_4i16_to_4i32:
780 ; AVX: # BB#0: # %entry
781 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
784 %X = load <4 x i16>, <4 x i16>* %ptr
785 %Y = zext <4 x i16> %X to <4 x i32>
789 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
790 ; SSE2-LABEL: load_zext_4i16_to_4i64:
791 ; SSE2: # BB#0: # %entry
792 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
793 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
794 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
795 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535]
796 ; SSE2-NEXT: pand %xmm2, %xmm0
797 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
798 ; SSE2-NEXT: pand %xmm2, %xmm1
801 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
802 ; SSSE3: # BB#0: # %entry
803 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
804 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
805 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
806 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535]
807 ; SSSE3-NEXT: pand %xmm2, %xmm0
808 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
809 ; SSSE3-NEXT: pand %xmm2, %xmm1
812 ; SSE41-LABEL: load_zext_4i16_to_4i64:
813 ; SSE41: # BB#0: # %entry
814 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
815 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
818 ; AVX1-LABEL: load_zext_4i16_to_4i64:
819 ; AVX1: # BB#0: # %entry
820 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
821 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
822 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
825 ; AVX2-LABEL: load_zext_4i16_to_4i64:
826 ; AVX2: # BB#0: # %entry
827 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
830 %X = load <4 x i16>, <4 x i16>* %ptr
831 %Y = zext <4 x i16> %X to <4 x i64>
835 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
836 ; SSE2-LABEL: load_zext_8i16_to_8i32:
837 ; SSE2: # BB#0: # %entry
838 ; SSE2-NEXT: movdqa (%rdi), %xmm1
839 ; SSE2-NEXT: pxor %xmm2, %xmm2
840 ; SSE2-NEXT: movdqa %xmm1, %xmm0
841 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
842 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
843 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
846 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
847 ; SSSE3: # BB#0: # %entry
848 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
849 ; SSSE3-NEXT: pxor %xmm2, %xmm2
850 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
851 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
852 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
853 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
856 ; SSE41-LABEL: load_zext_8i16_to_8i32:
857 ; SSE41: # BB#0: # %entry
858 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
859 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
862 ; AVX1-LABEL: load_zext_8i16_to_8i32:
863 ; AVX1: # BB#0: # %entry
864 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
865 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
866 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
869 ; AVX2-LABEL: load_zext_8i16_to_8i32:
870 ; AVX2: # BB#0: # %entry
871 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
874 %X = load <8 x i16>, <8 x i16>* %ptr
875 %Y = zext <8 x i16> %X to <8 x i32>
879 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
880 ; SSE2-LABEL: load_zext_2i32_to_2i64:
881 ; SSE2: # BB#0: # %entry
882 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
883 ; SSE2-NEXT: pxor %xmm1, %xmm1
884 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
887 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
888 ; SSSE3: # BB#0: # %entry
889 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
890 ; SSSE3-NEXT: pxor %xmm1, %xmm1
891 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
894 ; SSE41-LABEL: load_zext_2i32_to_2i64:
895 ; SSE41: # BB#0: # %entry
896 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
899 ; AVX-LABEL: load_zext_2i32_to_2i64:
900 ; AVX: # BB#0: # %entry
901 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
904 %X = load <2 x i32>, <2 x i32>* %ptr
905 %Y = zext <2 x i32> %X to <2 x i64>
909 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
910 ; SSE2-LABEL: load_zext_4i32_to_4i64:
911 ; SSE2: # BB#0: # %entry
912 ; SSE2-NEXT: movdqa (%rdi), %xmm1
913 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
914 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
915 ; SSE2-NEXT: pand %xmm2, %xmm0
916 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
917 ; SSE2-NEXT: pand %xmm2, %xmm1
920 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
921 ; SSSE3: # BB#0: # %entry
922 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
923 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
924 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
925 ; SSSE3-NEXT: pand %xmm2, %xmm0
926 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
927 ; SSSE3-NEXT: pand %xmm2, %xmm1
930 ; SSE41-LABEL: load_zext_4i32_to_4i64:
931 ; SSE41: # BB#0: # %entry
932 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
933 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
936 ; AVX1-LABEL: load_zext_4i32_to_4i64:
937 ; AVX1: # BB#0: # %entry
938 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
939 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
940 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
943 ; AVX2-LABEL: load_zext_4i32_to_4i64:
944 ; AVX2: # BB#0: # %entry
945 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
948 %X = load <4 x i32>, <4 x i32>* %ptr
949 %Y = zext <4 x i32> %X to <4 x i64>
953 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
954 ; SSE2-LABEL: zext_8i8_to_8i32:
955 ; SSE2: # BB#0: # %entry
956 ; SSE2-NEXT: movdqa %xmm0, %xmm2
957 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
958 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
959 ; SSE2-NEXT: pand %xmm1, %xmm2
960 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
961 ; SSE2-NEXT: pand %xmm0, %xmm1
962 ; SSE2-NEXT: movdqa %xmm2, %xmm0
965 ; SSSE3-LABEL: zext_8i8_to_8i32:
966 ; SSSE3: # BB#0: # %entry
967 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
968 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
969 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
970 ; SSSE3-NEXT: pand %xmm1, %xmm2
971 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
972 ; SSSE3-NEXT: pand %xmm0, %xmm1
973 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
976 ; SSE41-LABEL: zext_8i8_to_8i32:
977 ; SSE41: # BB#0: # %entry
978 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
979 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
980 ; SSE41-NEXT: pand %xmm1, %xmm2
981 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
982 ; SSE41-NEXT: pand %xmm0, %xmm1
983 ; SSE41-NEXT: movdqa %xmm2, %xmm0
986 ; AVX1-LABEL: zext_8i8_to_8i32:
987 ; AVX1: # BB#0: # %entry
988 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
989 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
990 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
991 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
994 ; AVX2-LABEL: zext_8i8_to_8i32:
995 ; AVX2: # BB#0: # %entry
996 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
997 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
998 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
1001 %t = zext <8 x i8> %z to <8 x i32>
1005 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
1006 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
1007 ; SSE2: # BB#0: # %entry
1008 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1009 ; SSE2-NEXT: pxor %xmm2, %xmm2
1010 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1011 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1014 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
1015 ; SSSE3: # BB#0: # %entry
1016 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1017 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1018 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1019 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1022 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1023 ; SSE41: # BB#0: # %entry
1024 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1025 ; SSE41-NEXT: pxor %xmm2, %xmm2
1026 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1027 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1030 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
1031 ; AVX1: # BB#0: # %entry
1032 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1033 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1034 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1035 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1038 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
1039 ; AVX2: # BB#0: # %entry
1040 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1043 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
1044 %Z = bitcast <16 x i16> %B to <8 x i32>
1048 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1049 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1050 ; SSE2: # BB#0: # %entry
1051 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1052 ; SSE2-NEXT: pxor %xmm2, %xmm2
1053 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1054 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1057 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1058 ; SSSE3: # BB#0: # %entry
1059 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1060 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1061 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1062 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1065 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1066 ; SSE41: # BB#0: # %entry
1067 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1068 ; SSE41-NEXT: pxor %xmm2, %xmm2
1069 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1070 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1073 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1074 ; AVX1: # BB#0: # %entry
1075 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
1076 ; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1077 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1078 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
1079 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1082 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1083 ; AVX2: # BB#0: # %entry
1084 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1087 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1088 %Z = bitcast <8 x i32> %B to <4 x i64>
1092 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1093 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1094 ; SSE2: # BB#0: # %entry
1095 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1096 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1097 ; SSE2-NEXT: packuswb %xmm1, %xmm1
1098 ; SSE2-NEXT: pxor %xmm2, %xmm2
1099 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1100 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1101 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1102 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1103 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
1104 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1107 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1108 ; SSSE3: # BB#0: # %entry
1109 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1110 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1111 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1112 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1113 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1114 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1115 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
1118 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1119 ; SSE41: # BB#0: # %entry
1120 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1121 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1122 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1123 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
1126 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1127 ; AVX1: # BB#0: # %entry
1128 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1129 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1130 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1131 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1134 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1135 ; AVX2: # BB#0: # %entry
1136 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1137 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1140 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1141 %Z = bitcast <32 x i8> %B to <8 x i32>