1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
8 ; SSE2-LABEL: zext_16i8_to_8i16:
9 ; SSE2: # BB#0: # %entry
10 ; SSE2-NEXT: pxor %xmm1, %xmm1
11 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
14 ; SSSE3-LABEL: zext_16i8_to_8i16:
15 ; SSSE3: # BB#0: # %entry
16 ; SSSE3-NEXT: pxor %xmm1, %xmm1
17 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
20 ; SSE41-LABEL: zext_16i8_to_8i16:
21 ; SSE41: # BB#0: # %entry
22 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
25 ; AVX-LABEL: zext_16i8_to_8i16:
26 ; AVX: # BB#0: # %entry
27 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
30 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
31 %C = zext <8 x i8> %B to <8 x i16>
36 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
37 ; SSE2-LABEL: zext_16i8_to_16i16:
38 ; SSE2: # BB#0: # %entry
39 ; SSE2-NEXT: movdqa %xmm0, %xmm1
40 ; SSE2-NEXT: pxor %xmm2, %xmm2
41 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
42 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
45 ; SSSE3-LABEL: zext_16i8_to_16i16:
46 ; SSSE3: # BB#0: # %entry
47 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
48 ; SSSE3-NEXT: pxor %xmm2, %xmm2
49 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
50 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
53 ; SSE41-LABEL: zext_16i8_to_16i16:
54 ; SSE41: # BB#0: # %entry
55 ; SSE41-NEXT: movdqa %xmm0, %xmm1
56 ; SSE41-NEXT: pxor %xmm2, %xmm2
57 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
58 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
61 ; AVX1-LABEL: zext_16i8_to_16i16:
62 ; AVX1: # BB#0: # %entry
63 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
64 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
65 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
66 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
69 ; AVX2-LABEL: zext_16i8_to_16i16:
70 ; AVX2: # BB#0: # %entry
71 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
74 %B = zext <16 x i8> %A to <16 x i16>
78 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
79 ; SSE2-LABEL: zext_16i8_to_4i32:
80 ; SSE2: # BB#0: # %entry
81 ; SSE2-NEXT: pxor %xmm1, %xmm1
82 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
83 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
86 ; SSSE3-LABEL: zext_16i8_to_4i32:
87 ; SSSE3: # BB#0: # %entry
88 ; SSSE3-NEXT: pxor %xmm1, %xmm1
89 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
90 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
93 ; SSE41-LABEL: zext_16i8_to_4i32:
94 ; SSE41: # BB#0: # %entry
95 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
98 ; AVX-LABEL: zext_16i8_to_4i32:
99 ; AVX: # BB#0: # %entry
100 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
103 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
104 %C = zext <4 x i8> %B to <4 x i32>
108 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
109 ; SSE2-LABEL: zext_16i8_to_8i32:
110 ; SSE2: # BB#0: # %entry
111 ; SSE2-NEXT: movdqa %xmm0, %xmm1
112 ; SSE2-NEXT: pxor %xmm2, %xmm2
113 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
114 ; SSE2-NEXT: movdqa %xmm1, %xmm0
115 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
116 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
119 ; SSSE3-LABEL: zext_16i8_to_8i32:
120 ; SSSE3: # BB#0: # %entry
121 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
122 ; SSSE3-NEXT: pxor %xmm2, %xmm2
123 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
124 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
125 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
126 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
129 ; SSE41-LABEL: zext_16i8_to_8i32:
130 ; SSE41: # BB#0: # %entry
131 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
132 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
133 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
134 ; SSE41-NEXT: movdqa %xmm2, %xmm0
137 ; AVX1-LABEL: zext_16i8_to_8i32:
138 ; AVX1: # BB#0: # %entry
139 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
140 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
141 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
142 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
143 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
144 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
145 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
148 ; AVX2-LABEL: zext_16i8_to_8i32:
149 ; AVX2: # BB#0: # %entry
150 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
151 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
154 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
155 %C = zext <8 x i8> %B to <8 x i32>
159 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
160 ; SSE2-LABEL: zext_16i8_to_2i64:
161 ; SSE2: # BB#0: # %entry
162 ; SSE2-NEXT: pxor %xmm1, %xmm1
163 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
164 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
165 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
168 ; SSSE3-LABEL: zext_16i8_to_2i64:
169 ; SSSE3: # BB#0: # %entry
170 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
173 ; SSE41-LABEL: zext_16i8_to_2i64:
174 ; SSE41: # BB#0: # %entry
175 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
178 ; AVX-LABEL: zext_16i8_to_2i64:
179 ; AVX: # BB#0: # %entry
180 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
183 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
184 %C = zext <2 x i8> %B to <2 x i64>
188 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
189 ; SSE2-LABEL: zext_16i8_to_4i64:
190 ; SSE2: # BB#0: # %entry
191 ; SSE2-NEXT: movdqa %xmm0, %xmm1
192 ; SSE2-NEXT: pxor %xmm2, %xmm2
193 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
194 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
195 ; SSE2-NEXT: movdqa %xmm1, %xmm0
196 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
197 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
200 ; SSSE3-LABEL: zext_16i8_to_4i64:
201 ; SSSE3: # BB#0: # %entry
202 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
203 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
204 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
207 ; SSE41-LABEL: zext_16i8_to_4i64:
208 ; SSE41: # BB#0: # %entry
209 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
210 ; SSE41-NEXT: psrld $16, %xmm0
211 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
212 ; SSE41-NEXT: movdqa %xmm2, %xmm0
215 ; AVX1-LABEL: zext_16i8_to_4i64:
216 ; AVX1: # BB#0: # %entry
217 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
218 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
219 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
220 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
221 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
222 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
223 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
226 ; AVX2-LABEL: zext_16i8_to_4i64:
227 ; AVX2: # BB#0: # %entry
228 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
229 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
232 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
233 %C = zext <4 x i8> %B to <4 x i64>
237 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
238 ; SSE2-LABEL: zext_8i16_to_4i32:
239 ; SSE2: # BB#0: # %entry
240 ; SSE2-NEXT: pxor %xmm1, %xmm1
241 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
244 ; SSSE3-LABEL: zext_8i16_to_4i32:
245 ; SSSE3: # BB#0: # %entry
246 ; SSSE3-NEXT: pxor %xmm1, %xmm1
247 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
250 ; SSE41-LABEL: zext_8i16_to_4i32:
251 ; SSE41: # BB#0: # %entry
252 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
255 ; AVX-LABEL: zext_8i16_to_4i32:
256 ; AVX: # BB#0: # %entry
257 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
260 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
261 %C = zext <4 x i16> %B to <4 x i32>
265 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
266 ; SSE2-LABEL: zext_8i16_to_8i32:
267 ; SSE2: # BB#0: # %entry
268 ; SSE2-NEXT: movdqa %xmm0, %xmm1
269 ; SSE2-NEXT: pxor %xmm2, %xmm2
270 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
271 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
274 ; SSSE3-LABEL: zext_8i16_to_8i32:
275 ; SSSE3: # BB#0: # %entry
276 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
277 ; SSSE3-NEXT: pxor %xmm2, %xmm2
278 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
279 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
282 ; SSE41-LABEL: zext_8i16_to_8i32:
283 ; SSE41: # BB#0: # %entry
284 ; SSE41-NEXT: movdqa %xmm0, %xmm1
285 ; SSE41-NEXT: pxor %xmm2, %xmm2
286 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
287 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
290 ; AVX1-LABEL: zext_8i16_to_8i32:
291 ; AVX1: # BB#0: # %entry
292 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
293 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
294 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
295 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
298 ; AVX2-LABEL: zext_8i16_to_8i32:
299 ; AVX2: # BB#0: # %entry
300 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
303 %B = zext <8 x i16> %A to <8 x i32>
307 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
308 ; SSE2-LABEL: zext_8i16_to_2i64:
309 ; SSE2: # BB#0: # %entry
310 ; SSE2-NEXT: pxor %xmm1, %xmm1
311 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
312 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
315 ; SSSE3-LABEL: zext_8i16_to_2i64:
316 ; SSSE3: # BB#0: # %entry
317 ; SSSE3-NEXT: pxor %xmm1, %xmm1
318 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
319 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
322 ; SSE41-LABEL: zext_8i16_to_2i64:
323 ; SSE41: # BB#0: # %entry
324 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
327 ; AVX-LABEL: zext_8i16_to_2i64:
328 ; AVX: # BB#0: # %entry
329 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
332 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
333 %C = zext <2 x i16> %B to <2 x i64>
337 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
338 ; SSE2-LABEL: zext_8i16_to_4i64:
339 ; SSE2: # BB#0: # %entry
340 ; SSE2-NEXT: movdqa %xmm0, %xmm1
341 ; SSE2-NEXT: pxor %xmm2, %xmm2
342 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
343 ; SSE2-NEXT: movdqa %xmm1, %xmm0
344 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
345 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
348 ; SSSE3-LABEL: zext_8i16_to_4i64:
349 ; SSSE3: # BB#0: # %entry
350 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
351 ; SSSE3-NEXT: pxor %xmm2, %xmm2
352 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
353 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
354 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
355 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
358 ; SSE41-LABEL: zext_8i16_to_4i64:
359 ; SSE41: # BB#0: # %entry
360 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
361 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
362 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
363 ; SSE41-NEXT: movdqa %xmm2, %xmm0
366 ; AVX1-LABEL: zext_8i16_to_4i64:
367 ; AVX1: # BB#0: # %entry
368 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
369 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
370 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
371 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
372 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
373 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
374 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
377 ; AVX2-LABEL: zext_8i16_to_4i64:
378 ; AVX2: # BB#0: # %entry
379 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
380 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
381 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
384 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
385 %C = zext <4 x i16> %B to <4 x i64>
389 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
390 ; SSE2-LABEL: zext_4i32_to_2i64:
391 ; SSE2: # BB#0: # %entry
392 ; SSE2-NEXT: pxor %xmm1, %xmm1
393 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
396 ; SSSE3-LABEL: zext_4i32_to_2i64:
397 ; SSSE3: # BB#0: # %entry
398 ; SSSE3-NEXT: pxor %xmm1, %xmm1
399 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
402 ; SSE41-LABEL: zext_4i32_to_2i64:
403 ; SSE41: # BB#0: # %entry
404 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
407 ; AVX-LABEL: zext_4i32_to_2i64:
408 ; AVX: # BB#0: # %entry
409 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
412 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
413 %C = zext <2 x i32> %B to <2 x i64>
417 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
418 ; SSE2-LABEL: zext_4i32_to_4i64:
419 ; SSE2: # BB#0: # %entry
420 ; SSE2-NEXT: movdqa %xmm0, %xmm1
421 ; SSE2-NEXT: pxor %xmm2, %xmm2
422 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
423 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
426 ; SSSE3-LABEL: zext_4i32_to_4i64:
427 ; SSSE3: # BB#0: # %entry
428 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
429 ; SSSE3-NEXT: pxor %xmm2, %xmm2
430 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
431 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
434 ; SSE41-LABEL: zext_4i32_to_4i64:
435 ; SSE41: # BB#0: # %entry
436 ; SSE41-NEXT: movdqa %xmm0, %xmm1
437 ; SSE41-NEXT: pxor %xmm2, %xmm2
438 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
439 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
442 ; AVX1-LABEL: zext_4i32_to_4i64:
443 ; AVX1: # BB#0: # %entry
444 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
445 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
446 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
447 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
450 ; AVX2-LABEL: zext_4i32_to_4i64:
451 ; AVX2: # BB#0: # %entry
452 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
455 %B = zext <4 x i32> %A to <4 x i64>
459 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
460 ; SSE2-LABEL: load_zext_2i8_to_2i64:
461 ; SSE2: # BB#0: # %entry
462 ; SSE2-NEXT: movzwl (%rdi), %eax
463 ; SSE2-NEXT: movd %eax, %xmm0
464 ; SSE2-NEXT: pxor %xmm1, %xmm1
465 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
466 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
467 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
470 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
471 ; SSSE3: # BB#0: # %entry
472 ; SSSE3-NEXT: movzwl (%rdi), %eax
473 ; SSSE3-NEXT: movd %eax, %xmm0
474 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
477 ; SSE41-LABEL: load_zext_2i8_to_2i64:
478 ; SSE41: # BB#0: # %entry
479 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
482 ; AVX-LABEL: load_zext_2i8_to_2i64:
483 ; AVX: # BB#0: # %entry
484 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
487 %X = load <2 x i8>, <2 x i8>* %ptr
488 %Y = zext <2 x i8> %X to <2 x i64>
492 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
493 ; SSE2-LABEL: load_zext_4i8_to_4i32:
494 ; SSE2: # BB#0: # %entry
495 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
496 ; SSE2-NEXT: pxor %xmm1, %xmm1
497 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
498 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
501 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
502 ; SSSE3: # BB#0: # %entry
503 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
504 ; SSSE3-NEXT: pxor %xmm1, %xmm1
505 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
506 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
509 ; SSE41-LABEL: load_zext_4i8_to_4i32:
510 ; SSE41: # BB#0: # %entry
511 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
514 ; AVX-LABEL: load_zext_4i8_to_4i32:
515 ; AVX: # BB#0: # %entry
516 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
519 %X = load <4 x i8>, <4 x i8>* %ptr
520 %Y = zext <4 x i8> %X to <4 x i32>
524 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
525 ; SSE2-LABEL: load_zext_4i8_to_4i64:
526 ; SSE2: # BB#0: # %entry
527 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
528 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
529 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
530 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
531 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
532 ; SSE2-NEXT: pand %xmm2, %xmm0
533 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
534 ; SSE2-NEXT: pand %xmm2, %xmm1
537 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
538 ; SSSE3: # BB#0: # %entry
539 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
540 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
541 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
542 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
543 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero
544 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,zero,zero,zero,zero,xmm1[12],zero,zero,zero,zero,zero,zero,zero
547 ; SSE41-LABEL: load_zext_4i8_to_4i64:
548 ; SSE41: # BB#0: # %entry
549 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
550 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
553 ; AVX1-LABEL: load_zext_4i8_to_4i64:
554 ; AVX1: # BB#0: # %entry
555 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
556 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
557 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
560 ; AVX2-LABEL: load_zext_4i8_to_4i64:
561 ; AVX2: # BB#0: # %entry
562 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
565 %X = load <4 x i8>, <4 x i8>* %ptr
566 %Y = zext <4 x i8> %X to <4 x i64>
570 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
571 ; SSE2-LABEL: load_zext_8i8_to_8i16:
572 ; SSE2: # BB#0: # %entry
573 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
574 ; SSE2-NEXT: pxor %xmm1, %xmm1
575 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
578 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
579 ; SSSE3: # BB#0: # %entry
580 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
581 ; SSSE3-NEXT: pxor %xmm1, %xmm1
582 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
585 ; SSE41-LABEL: load_zext_8i8_to_8i16:
586 ; SSE41: # BB#0: # %entry
587 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
590 ; AVX-LABEL: load_zext_8i8_to_8i16:
591 ; AVX: # BB#0: # %entry
592 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
595 %X = load <8 x i8>, <8 x i8>* %ptr
596 %Y = zext <8 x i8> %X to <8 x i16>
600 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
601 ; SSE2-LABEL: load_zext_8i8_to_8i32:
602 ; SSE2: # BB#0: # %entry
603 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
604 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
605 ; SSE2-NEXT: movdqa %xmm1, %xmm0
606 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
607 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
608 ; SSE2-NEXT: pand %xmm2, %xmm0
609 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
610 ; SSE2-NEXT: pand %xmm2, %xmm1
613 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
614 ; SSSE3: # BB#0: # %entry
615 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
616 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
617 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
618 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[6],zero,zero,zero
619 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[14],zero,zero,zero
622 ; SSE41-LABEL: load_zext_8i8_to_8i32:
623 ; SSE41: # BB#0: # %entry
624 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
625 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
628 ; AVX1-LABEL: load_zext_8i8_to_8i32:
629 ; AVX1: # BB#0: # %entry
630 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
631 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
632 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
635 ; AVX2-LABEL: load_zext_8i8_to_8i32:
636 ; AVX2: # BB#0: # %entry
637 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
640 %X = load <8 x i8>, <8 x i8>* %ptr
641 %Y = zext <8 x i8> %X to <8 x i32>
645 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
646 ; SSE2-LABEL: load_zext_16i8_to_16i16:
647 ; SSE2: # BB#0: # %entry
648 ; SSE2-NEXT: movdqa (%rdi), %xmm1
649 ; SSE2-NEXT: pxor %xmm2, %xmm2
650 ; SSE2-NEXT: movdqa %xmm1, %xmm0
651 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
652 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
655 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
656 ; SSSE3: # BB#0: # %entry
657 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
658 ; SSSE3-NEXT: pxor %xmm2, %xmm2
659 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
660 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
661 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
664 ; SSE41-LABEL: load_zext_16i8_to_16i16:
665 ; SSE41: # BB#0: # %entry
666 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
667 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
670 ; AVX1-LABEL: load_zext_16i8_to_16i16:
671 ; AVX1: # BB#0: # %entry
672 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
673 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
674 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
677 ; AVX2-LABEL: load_zext_16i8_to_16i16:
678 ; AVX2: # BB#0: # %entry
679 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
682 %X = load <16 x i8>, <16 x i8>* %ptr
683 %Y = zext <16 x i8> %X to <16 x i16>
687 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
688 ; SSE2-LABEL: load_zext_2i16_to_2i64:
689 ; SSE2: # BB#0: # %entry
690 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
691 ; SSE2-NEXT: pxor %xmm1, %xmm1
692 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
693 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
696 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
697 ; SSSE3: # BB#0: # %entry
698 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
699 ; SSSE3-NEXT: pxor %xmm1, %xmm1
700 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
701 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
704 ; SSE41-LABEL: load_zext_2i16_to_2i64:
705 ; SSE41: # BB#0: # %entry
706 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
709 ; AVX-LABEL: load_zext_2i16_to_2i64:
710 ; AVX: # BB#0: # %entry
711 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
714 %X = load <2 x i16>, <2 x i16>* %ptr
715 %Y = zext <2 x i16> %X to <2 x i64>
719 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
720 ; SSE2-LABEL: load_zext_4i16_to_4i32:
721 ; SSE2: # BB#0: # %entry
722 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
723 ; SSE2-NEXT: pxor %xmm1, %xmm1
724 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
727 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
728 ; SSSE3: # BB#0: # %entry
729 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
730 ; SSSE3-NEXT: pxor %xmm1, %xmm1
731 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
734 ; SSE41-LABEL: load_zext_4i16_to_4i32:
735 ; SSE41: # BB#0: # %entry
736 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
739 ; AVX-LABEL: load_zext_4i16_to_4i32:
740 ; AVX: # BB#0: # %entry
741 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
744 %X = load <4 x i16>, <4 x i16>* %ptr
745 %Y = zext <4 x i16> %X to <4 x i32>
749 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
750 ; SSE2-LABEL: load_zext_4i16_to_4i64:
751 ; SSE2: # BB#0: # %entry
752 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
753 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
754 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
755 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0,65535,0,0,0]
756 ; SSE2-NEXT: pand %xmm2, %xmm0
757 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
758 ; SSE2-NEXT: pand %xmm2, %xmm1
761 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
762 ; SSSE3: # BB#0: # %entry
763 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
764 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
765 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
766 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
767 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9],zero,zero,zero,zero,zero,zero,xmm1[12,13],zero,zero,zero,zero,zero,zero
770 ; SSE41-LABEL: load_zext_4i16_to_4i64:
771 ; SSE41: # BB#0: # %entry
772 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
773 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
776 ; AVX1-LABEL: load_zext_4i16_to_4i64:
777 ; AVX1: # BB#0: # %entry
778 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
779 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
780 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
783 ; AVX2-LABEL: load_zext_4i16_to_4i64:
784 ; AVX2: # BB#0: # %entry
785 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
788 %X = load <4 x i16>, <4 x i16>* %ptr
789 %Y = zext <4 x i16> %X to <4 x i64>
793 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
794 ; SSE2-LABEL: load_zext_8i16_to_8i32:
795 ; SSE2: # BB#0: # %entry
796 ; SSE2-NEXT: movdqa (%rdi), %xmm1
797 ; SSE2-NEXT: pxor %xmm2, %xmm2
798 ; SSE2-NEXT: movdqa %xmm1, %xmm0
799 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
800 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
803 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
804 ; SSSE3: # BB#0: # %entry
805 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
806 ; SSSE3-NEXT: pxor %xmm2, %xmm2
807 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
808 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
809 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
812 ; SSE41-LABEL: load_zext_8i16_to_8i32:
813 ; SSE41: # BB#0: # %entry
814 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
815 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
818 ; AVX1-LABEL: load_zext_8i16_to_8i32:
819 ; AVX1: # BB#0: # %entry
820 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
821 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
822 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
825 ; AVX2-LABEL: load_zext_8i16_to_8i32:
826 ; AVX2: # BB#0: # %entry
827 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
830 %X = load <8 x i16>, <8 x i16>* %ptr
831 %Y = zext <8 x i16> %X to <8 x i32>
835 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
836 ; SSE2-LABEL: load_zext_2i32_to_2i64:
837 ; SSE2: # BB#0: # %entry
838 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
839 ; SSE2-NEXT: pxor %xmm1, %xmm1
840 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
843 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
844 ; SSSE3: # BB#0: # %entry
845 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
846 ; SSSE3-NEXT: pxor %xmm1, %xmm1
847 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
850 ; SSE41-LABEL: load_zext_2i32_to_2i64:
851 ; SSE41: # BB#0: # %entry
852 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
855 ; AVX-LABEL: load_zext_2i32_to_2i64:
856 ; AVX: # BB#0: # %entry
857 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
860 %X = load <2 x i32>, <2 x i32>* %ptr
861 %Y = zext <2 x i32> %X to <2 x i64>
865 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
866 ; SSE2-LABEL: load_zext_4i32_to_4i64:
867 ; SSE2: # BB#0: # %entry
868 ; SSE2-NEXT: movdqa (%rdi), %xmm1
869 ; SSE2-NEXT: pxor %xmm2, %xmm2
870 ; SSE2-NEXT: movdqa %xmm1, %xmm0
871 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
872 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
875 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
876 ; SSSE3: # BB#0: # %entry
877 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
878 ; SSSE3-NEXT: pxor %xmm2, %xmm2
879 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
880 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
881 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
884 ; SSE41-LABEL: load_zext_4i32_to_4i64:
885 ; SSE41: # BB#0: # %entry
886 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
887 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
890 ; AVX1-LABEL: load_zext_4i32_to_4i64:
891 ; AVX1: # BB#0: # %entry
892 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
893 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
894 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
897 ; AVX2-LABEL: load_zext_4i32_to_4i64:
898 ; AVX2: # BB#0: # %entry
899 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
902 %X = load <4 x i32>, <4 x i32>* %ptr
903 %Y = zext <4 x i32> %X to <4 x i64>
907 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
908 ; SSE2-LABEL: zext_8i8_to_8i32:
909 ; SSE2: # BB#0: # %entry
910 ; SSE2-NEXT: movdqa %xmm0, %xmm1
911 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
912 ; SSE2-NEXT: pxor %xmm2, %xmm2
913 ; SSE2-NEXT: movdqa %xmm1, %xmm0
914 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
915 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
918 ; SSSE3-LABEL: zext_8i8_to_8i32:
919 ; SSSE3: # BB#0: # %entry
920 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
921 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
922 ; SSSE3-NEXT: pxor %xmm2, %xmm2
923 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
924 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
925 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
928 ; SSE41-LABEL: zext_8i8_to_8i32:
929 ; SSE41: # BB#0: # %entry
930 ; SSE41-NEXT: movdqa %xmm0, %xmm1
931 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
932 ; SSE41-NEXT: pxor %xmm2, %xmm2
933 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
934 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
937 ; AVX1-LABEL: zext_8i8_to_8i32:
938 ; AVX1: # BB#0: # %entry
939 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
940 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
941 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
942 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
943 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
946 ; AVX2-LABEL: zext_8i8_to_8i32:
947 ; AVX2: # BB#0: # %entry
948 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
949 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
952 %t = zext <8 x i8> %z to <8 x i32>
956 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
957 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
958 ; SSE2: # BB#0: # %entry
959 ; SSE2-NEXT: movdqa %xmm0, %xmm1
960 ; SSE2-NEXT: pxor %xmm2, %xmm2
961 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
962 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
965 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
966 ; SSSE3: # BB#0: # %entry
967 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
968 ; SSSE3-NEXT: pxor %xmm2, %xmm2
969 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
970 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
973 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
974 ; SSE41: # BB#0: # %entry
975 ; SSE41-NEXT: movdqa %xmm0, %xmm1
976 ; SSE41-NEXT: pxor %xmm2, %xmm2
977 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
978 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
981 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
982 ; AVX1: # BB#0: # %entry
983 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
984 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
985 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
986 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
989 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
990 ; AVX2: # BB#0: # %entry
991 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
994 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
995 %Z = bitcast <16 x i16> %B to <8 x i32>
999 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1000 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1001 ; SSE2: # BB#0: # %entry
1002 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1003 ; SSE2-NEXT: pxor %xmm2, %xmm2
1004 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1005 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1008 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1009 ; SSSE3: # BB#0: # %entry
1010 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1011 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1012 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1013 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1016 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1017 ; SSE41: # BB#0: # %entry
1018 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1019 ; SSE41-NEXT: pxor %xmm2, %xmm2
1020 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1021 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1024 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1025 ; AVX1: # BB#0: # %entry
1026 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
1027 ; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1028 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1029 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
1030 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1033 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1034 ; AVX2: # BB#0: # %entry
1035 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1038 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1039 %Z = bitcast <8 x i32> %B to <4 x i64>
1043 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1044 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1045 ; SSE2: # BB#0: # %entry
1046 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1047 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1048 ; SSE2-NEXT: packuswb %xmm1, %xmm1
1049 ; SSE2-NEXT: pxor %xmm2, %xmm2
1050 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1051 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1052 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1053 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1056 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1057 ; SSSE3: # BB#0: # %entry
1058 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1059 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1060 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1061 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1062 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1063 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1064 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1067 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1068 ; SSE41: # BB#0: # %entry
1069 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1070 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1071 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1072 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1073 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1076 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1077 ; AVX1: # BB#0: # %entry
1078 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1079 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1080 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1081 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1082 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1085 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1086 ; AVX2: # BB#0: # %entry
1087 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1088 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1091 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1092 %Z = bitcast <32 x i8> %B to <8 x i32>
1096 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1097 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1098 ; SSE2: # BB#0: # %entry
1099 ; SSE2-NEXT: pxor %xmm1, %xmm1
1100 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1101 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1102 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1105 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1106 ; SSSE3: # BB#0: # %entry
1107 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1110 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1111 ; SSE41: # BB#0: # %entry
1112 ; SSE41-NEXT: psrlq $48, %xmm0
1113 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1116 ; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
1117 ; AVX: # BB#0: # %entry
1118 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
1119 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1122 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1123 %Z = bitcast <16 x i8> %B to <2 x i64>
1127 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1128 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1129 ; SSE2: # BB#0: # %entry
1130 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1131 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1132 ; SSE2-NEXT: pxor %xmm2, %xmm2
1133 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1134 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1135 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1136 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1137 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1138 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1141 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1142 ; SSSE3: # BB#0: # %entry
1143 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1144 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1145 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1148 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1149 ; SSE41: # BB#0: # %entry
1150 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1151 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1152 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1153 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1154 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1155 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1158 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1159 ; AVX1: # BB#0: # %entry
1160 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1161 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1162 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1163 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1164 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1167 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1168 ; AVX2: # BB#0: # %entry
1169 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1170 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1173 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1174 %Z = bitcast <32 x i8> %B to <4 x i64>
1178 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1179 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1180 ; SSE2: # BB#0: # %entry
1181 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1182 ; SSE2-NEXT: pxor %xmm1, %xmm1
1183 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1184 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1187 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1188 ; SSSE3: # BB#0: # %entry
1189 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1190 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1191 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1192 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1195 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1196 ; SSE41: # BB#0: # %entry
1197 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1198 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1201 ; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
1202 ; AVX: # BB#0: # %entry
1203 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1204 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1207 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1208 %Z = bitcast <8 x i16> %B to <2 x i64>
1212 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1213 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1214 ; SSE2: # BB#0: # %entry
1215 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1216 ; SSE2-NEXT: pxor %xmm2, %xmm2
1217 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1218 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1219 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1220 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1223 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1224 ; SSSE3: # BB#0: # %entry
1225 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1226 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1227 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1228 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1229 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1230 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1233 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1234 ; SSE41: # BB#0: # %entry
1235 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1236 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1237 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1238 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1239 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1242 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1243 ; AVX1: # BB#0: # %entry
1244 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1245 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1246 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1247 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1248 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1251 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1252 ; AVX2: # BB#0: # %entry
1253 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,2,3,5,6,6,7]
1254 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1257 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1258 %Z = bitcast <16 x i16> %B to <4 x i64>
1262 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1263 ; SSE-LABEL: shuf_zext_8i16_to_4i32_offset1:
1264 ; SSE: # BB#0: # %entry
1265 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1266 ; SSE-NEXT: pxor %xmm1, %xmm1
1267 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1270 ; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1:
1271 ; AVX: # BB#0: # %entry
1272 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1273 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1274 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1277 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1278 %Z = bitcast <8 x i16> %B to <4 x i32>
1282 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1283 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1284 ; SSE2: # BB#0: # %entry
1285 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1286 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1287 ; SSE2-NEXT: pxor %xmm2, %xmm2
1288 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1289 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1292 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1293 ; SSSE3: # BB#0: # %entry
1294 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1295 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1296 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1297 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1298 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1301 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1302 ; SSE41: # BB#0: # %entry
1303 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1304 ; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1305 ; SSE41-NEXT: pxor %xmm2, %xmm2
1306 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1307 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1310 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
1311 ; AVX1: # BB#0: # %entry
1312 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1313 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1314 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1315 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1316 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1319 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1320 ; AVX2: # BB#0: # %entry
1321 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
1322 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1325 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
1326 %Z = bitcast <16 x i16> %B to <8 x i32>
1330 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
1331 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
1332 ; SSE2: # BB#0: # %entry
1333 ; SSE2-NEXT: pxor %xmm2, %xmm2
1334 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1335 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1336 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1339 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
1340 ; SSSE3: # BB#0: # %entry
1341 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1342 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1343 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1344 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1347 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
1348 ; SSE41: # BB#0: # %entry
1349 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1350 ; SSE41-NEXT: pxor %xmm2, %xmm2
1351 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
1352 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1353 ; SSE41-NEXT: movdqa %xmm2, %xmm1
1356 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
1357 ; AVX1: # BB#0: # %entry
1358 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1359 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
1360 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1361 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
1362 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1363 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1366 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
1367 ; AVX2: # BB#0: # %entry
1368 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1369 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1372 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
1373 %Z = bitcast <16 x i16> %B to <8 x i32>
1377 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
1378 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
1379 ; SSE: # BB#0: # %entry
1380 ; SSE-NEXT: pxor %xmm1, %xmm1
1381 ; SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1384 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
1385 ; AVX: # BB#0: # %entry
1386 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1387 ; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1390 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
1391 %Z = bitcast <4 x i32> %B to <2 x i64>
1395 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
1396 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
1397 ; SSE2: # BB#0: # %entry
1398 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1399 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
1400 ; SSE2-NEXT: pand %xmm1, %xmm0
1401 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1404 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
1405 ; SSSE3: # BB#0: # %entry
1406 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1407 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
1408 ; SSSE3-NEXT: pand %xmm1, %xmm0
1409 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1412 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
1413 ; SSE41: # BB#0: # %entry
1414 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1415 ; SSE41-NEXT: pxor %xmm0, %xmm0
1416 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1417 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1420 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
1421 ; AVX1: # BB#0: # %entry
1422 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero
1423 ; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
1424 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
1425 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1428 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
1429 ; AVX2: # BB#0: # %entry
1430 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,3,5,6,7,7]
1431 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1434 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
1435 %Z = bitcast <8 x i32> %B to <4 x i64>