1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
8 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
9 ; SSE2-LABEL: zext_16i8_to_8i16:
10 ; SSE2: # BB#0: # %entry
11 ; SSE2-NEXT: pxor %xmm1, %xmm1
12 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
15 ; SSSE3-LABEL: zext_16i8_to_8i16:
16 ; SSSE3: # BB#0: # %entry
17 ; SSSE3-NEXT: pxor %xmm1, %xmm1
18 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
21 ; SSE41-LABEL: zext_16i8_to_8i16:
22 ; SSE41: # BB#0: # %entry
23 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
26 ; AVX-LABEL: zext_16i8_to_8i16:
27 ; AVX: # BB#0: # %entry
28 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
31 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
32 %C = zext <8 x i8> %B to <8 x i16>
37 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
38 ; SSE2-LABEL: zext_16i8_to_16i16:
39 ; SSE2: # BB#0: # %entry
40 ; SSE2-NEXT: movdqa %xmm0, %xmm1
41 ; SSE2-NEXT: pxor %xmm2, %xmm2
42 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
43 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
46 ; SSSE3-LABEL: zext_16i8_to_16i16:
47 ; SSSE3: # BB#0: # %entry
48 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
49 ; SSSE3-NEXT: pxor %xmm2, %xmm2
50 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
51 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
54 ; SSE41-LABEL: zext_16i8_to_16i16:
55 ; SSE41: # BB#0: # %entry
56 ; SSE41-NEXT: movdqa %xmm0, %xmm1
57 ; SSE41-NEXT: pxor %xmm2, %xmm2
58 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
59 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
62 ; AVX1-LABEL: zext_16i8_to_16i16:
63 ; AVX1: # BB#0: # %entry
64 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
65 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
66 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
67 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
70 ; AVX2-LABEL: zext_16i8_to_16i16:
71 ; AVX2: # BB#0: # %entry
72 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
75 %B = zext <16 x i8> %A to <16 x i16>
79 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
80 ; SSE2-LABEL: zext_16i8_to_4i32:
81 ; SSE2: # BB#0: # %entry
82 ; SSE2-NEXT: pxor %xmm1, %xmm1
83 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
84 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
87 ; SSSE3-LABEL: zext_16i8_to_4i32:
88 ; SSSE3: # BB#0: # %entry
89 ; SSSE3-NEXT: pxor %xmm1, %xmm1
90 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
91 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
94 ; SSE41-LABEL: zext_16i8_to_4i32:
95 ; SSE41: # BB#0: # %entry
96 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
99 ; AVX-LABEL: zext_16i8_to_4i32:
100 ; AVX: # BB#0: # %entry
101 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
104 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
105 %C = zext <4 x i8> %B to <4 x i32>
109 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
110 ; SSE2-LABEL: zext_16i8_to_8i32:
111 ; SSE2: # BB#0: # %entry
112 ; SSE2-NEXT: movdqa %xmm0, %xmm1
113 ; SSE2-NEXT: pxor %xmm2, %xmm2
114 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
115 ; SSE2-NEXT: movdqa %xmm1, %xmm0
116 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
117 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
120 ; SSSE3-LABEL: zext_16i8_to_8i32:
121 ; SSSE3: # BB#0: # %entry
122 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
123 ; SSSE3-NEXT: pxor %xmm2, %xmm2
124 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
125 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
126 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
127 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
130 ; SSE41-LABEL: zext_16i8_to_8i32:
131 ; SSE41: # BB#0: # %entry
132 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
133 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
134 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
135 ; SSE41-NEXT: movdqa %xmm2, %xmm0
138 ; AVX1-LABEL: zext_16i8_to_8i32:
139 ; AVX1: # BB#0: # %entry
140 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
141 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
142 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
143 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
144 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
145 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
146 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
149 ; AVX2-LABEL: zext_16i8_to_8i32:
150 ; AVX2: # BB#0: # %entry
151 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
152 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
155 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
156 %C = zext <8 x i8> %B to <8 x i32>
160 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
161 ; SSE2-LABEL: zext_16i8_to_2i64:
162 ; SSE2: # BB#0: # %entry
163 ; SSE2-NEXT: pxor %xmm1, %xmm1
164 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
165 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
166 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
169 ; SSSE3-LABEL: zext_16i8_to_2i64:
170 ; SSSE3: # BB#0: # %entry
171 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
174 ; SSE41-LABEL: zext_16i8_to_2i64:
175 ; SSE41: # BB#0: # %entry
176 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
179 ; AVX-LABEL: zext_16i8_to_2i64:
180 ; AVX: # BB#0: # %entry
181 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
184 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
185 %C = zext <2 x i8> %B to <2 x i64>
189 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
190 ; SSE2-LABEL: zext_16i8_to_4i64:
191 ; SSE2: # BB#0: # %entry
192 ; SSE2-NEXT: movdqa %xmm0, %xmm1
193 ; SSE2-NEXT: pxor %xmm2, %xmm2
194 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
195 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
196 ; SSE2-NEXT: movdqa %xmm1, %xmm0
197 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
198 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
201 ; SSSE3-LABEL: zext_16i8_to_4i64:
202 ; SSSE3: # BB#0: # %entry
203 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
204 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
205 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
208 ; SSE41-LABEL: zext_16i8_to_4i64:
209 ; SSE41: # BB#0: # %entry
210 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
211 ; SSE41-NEXT: psrld $16, %xmm0
212 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
213 ; SSE41-NEXT: movdqa %xmm2, %xmm0
216 ; AVX1-LABEL: zext_16i8_to_4i64:
217 ; AVX1: # BB#0: # %entry
218 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
219 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
220 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
221 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
222 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
223 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
224 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
227 ; AVX2-LABEL: zext_16i8_to_4i64:
228 ; AVX2: # BB#0: # %entry
229 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
230 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
233 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
234 %C = zext <4 x i8> %B to <4 x i64>
238 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
239 ; SSE2-LABEL: zext_8i16_to_4i32:
240 ; SSE2: # BB#0: # %entry
241 ; SSE2-NEXT: pxor %xmm1, %xmm1
242 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
245 ; SSSE3-LABEL: zext_8i16_to_4i32:
246 ; SSSE3: # BB#0: # %entry
247 ; SSSE3-NEXT: pxor %xmm1, %xmm1
248 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
251 ; SSE41-LABEL: zext_8i16_to_4i32:
252 ; SSE41: # BB#0: # %entry
253 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
256 ; AVX-LABEL: zext_8i16_to_4i32:
257 ; AVX: # BB#0: # %entry
258 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
261 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
262 %C = zext <4 x i16> %B to <4 x i32>
266 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
267 ; SSE2-LABEL: zext_8i16_to_8i32:
268 ; SSE2: # BB#0: # %entry
269 ; SSE2-NEXT: movdqa %xmm0, %xmm1
270 ; SSE2-NEXT: pxor %xmm2, %xmm2
271 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
272 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
275 ; SSSE3-LABEL: zext_8i16_to_8i32:
276 ; SSSE3: # BB#0: # %entry
277 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
278 ; SSSE3-NEXT: pxor %xmm2, %xmm2
279 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
280 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
283 ; SSE41-LABEL: zext_8i16_to_8i32:
284 ; SSE41: # BB#0: # %entry
285 ; SSE41-NEXT: movdqa %xmm0, %xmm1
286 ; SSE41-NEXT: pxor %xmm2, %xmm2
287 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
288 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
291 ; AVX1-LABEL: zext_8i16_to_8i32:
292 ; AVX1: # BB#0: # %entry
293 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
294 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
295 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
296 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
299 ; AVX2-LABEL: zext_8i16_to_8i32:
300 ; AVX2: # BB#0: # %entry
301 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
304 %B = zext <8 x i16> %A to <8 x i32>
308 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
309 ; SSE2-LABEL: zext_8i16_to_2i64:
310 ; SSE2: # BB#0: # %entry
311 ; SSE2-NEXT: pxor %xmm1, %xmm1
312 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
313 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
316 ; SSSE3-LABEL: zext_8i16_to_2i64:
317 ; SSSE3: # BB#0: # %entry
318 ; SSSE3-NEXT: pxor %xmm1, %xmm1
319 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
320 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
323 ; SSE41-LABEL: zext_8i16_to_2i64:
324 ; SSE41: # BB#0: # %entry
325 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
328 ; AVX-LABEL: zext_8i16_to_2i64:
329 ; AVX: # BB#0: # %entry
330 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
333 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
334 %C = zext <2 x i16> %B to <2 x i64>
338 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
339 ; SSE2-LABEL: zext_8i16_to_4i64:
340 ; SSE2: # BB#0: # %entry
341 ; SSE2-NEXT: movdqa %xmm0, %xmm1
342 ; SSE2-NEXT: pxor %xmm2, %xmm2
343 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
344 ; SSE2-NEXT: movdqa %xmm1, %xmm0
345 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
346 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
349 ; SSSE3-LABEL: zext_8i16_to_4i64:
350 ; SSSE3: # BB#0: # %entry
351 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
352 ; SSSE3-NEXT: pxor %xmm2, %xmm2
353 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
354 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
355 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
356 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
359 ; SSE41-LABEL: zext_8i16_to_4i64:
360 ; SSE41: # BB#0: # %entry
361 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
362 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
363 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
364 ; SSE41-NEXT: movdqa %xmm2, %xmm0
367 ; AVX1-LABEL: zext_8i16_to_4i64:
368 ; AVX1: # BB#0: # %entry
369 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
370 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
371 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
372 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
373 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
374 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
375 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
378 ; AVX2-LABEL: zext_8i16_to_4i64:
379 ; AVX2: # BB#0: # %entry
380 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
381 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
382 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
385 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
386 %C = zext <4 x i16> %B to <4 x i64>
390 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
391 ; SSE2-LABEL: zext_4i32_to_2i64:
392 ; SSE2: # BB#0: # %entry
393 ; SSE2-NEXT: pxor %xmm1, %xmm1
394 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
397 ; SSSE3-LABEL: zext_4i32_to_2i64:
398 ; SSSE3: # BB#0: # %entry
399 ; SSSE3-NEXT: pxor %xmm1, %xmm1
400 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
403 ; SSE41-LABEL: zext_4i32_to_2i64:
404 ; SSE41: # BB#0: # %entry
405 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
408 ; AVX-LABEL: zext_4i32_to_2i64:
409 ; AVX: # BB#0: # %entry
410 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
413 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
414 %C = zext <2 x i32> %B to <2 x i64>
418 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
419 ; SSE2-LABEL: zext_4i32_to_4i64:
420 ; SSE2: # BB#0: # %entry
421 ; SSE2-NEXT: movdqa %xmm0, %xmm1
422 ; SSE2-NEXT: pxor %xmm2, %xmm2
423 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
424 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
427 ; SSSE3-LABEL: zext_4i32_to_4i64:
428 ; SSSE3: # BB#0: # %entry
429 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
430 ; SSSE3-NEXT: pxor %xmm2, %xmm2
431 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
432 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
435 ; SSE41-LABEL: zext_4i32_to_4i64:
436 ; SSE41: # BB#0: # %entry
437 ; SSE41-NEXT: movdqa %xmm0, %xmm1
438 ; SSE41-NEXT: pxor %xmm2, %xmm2
439 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
440 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
443 ; AVX1-LABEL: zext_4i32_to_4i64:
444 ; AVX1: # BB#0: # %entry
445 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
446 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
447 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
448 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
451 ; AVX2-LABEL: zext_4i32_to_4i64:
452 ; AVX2: # BB#0: # %entry
453 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
456 %B = zext <4 x i32> %A to <4 x i64>
460 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
461 ; SSE2-LABEL: load_zext_2i8_to_2i64:
462 ; SSE2: # BB#0: # %entry
463 ; SSE2-NEXT: movzwl (%rdi), %eax
464 ; SSE2-NEXT: movd %eax, %xmm0
465 ; SSE2-NEXT: pxor %xmm1, %xmm1
466 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
467 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
468 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
471 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
472 ; SSSE3: # BB#0: # %entry
473 ; SSSE3-NEXT: movzwl (%rdi), %eax
474 ; SSSE3-NEXT: movd %eax, %xmm0
475 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
478 ; SSE41-LABEL: load_zext_2i8_to_2i64:
479 ; SSE41: # BB#0: # %entry
480 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
483 ; AVX-LABEL: load_zext_2i8_to_2i64:
484 ; AVX: # BB#0: # %entry
485 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
488 %X = load <2 x i8>, <2 x i8>* %ptr
489 %Y = zext <2 x i8> %X to <2 x i64>
493 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
494 ; SSE2-LABEL: load_zext_4i8_to_4i32:
495 ; SSE2: # BB#0: # %entry
496 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
497 ; SSE2-NEXT: pxor %xmm1, %xmm1
498 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
499 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
502 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
503 ; SSSE3: # BB#0: # %entry
504 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
505 ; SSSE3-NEXT: pxor %xmm1, %xmm1
506 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
507 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
510 ; SSE41-LABEL: load_zext_4i8_to_4i32:
511 ; SSE41: # BB#0: # %entry
512 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
515 ; AVX-LABEL: load_zext_4i8_to_4i32:
516 ; AVX: # BB#0: # %entry
517 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
520 %X = load <4 x i8>, <4 x i8>* %ptr
521 %Y = zext <4 x i8> %X to <4 x i32>
525 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
526 ; SSE2-LABEL: load_zext_4i8_to_4i64:
527 ; SSE2: # BB#0: # %entry
528 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
529 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
530 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
531 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
532 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
533 ; SSE2-NEXT: pand %xmm2, %xmm0
534 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
535 ; SSE2-NEXT: pand %xmm2, %xmm1
538 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
539 ; SSSE3: # BB#0: # %entry
540 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
541 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
542 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
543 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
544 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero
545 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,zero,zero,zero,zero,xmm1[12],zero,zero,zero,zero,zero,zero,zero
548 ; SSE41-LABEL: load_zext_4i8_to_4i64:
549 ; SSE41: # BB#0: # %entry
550 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
551 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
554 ; AVX1-LABEL: load_zext_4i8_to_4i64:
555 ; AVX1: # BB#0: # %entry
556 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
557 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
558 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
561 ; AVX2-LABEL: load_zext_4i8_to_4i64:
562 ; AVX2: # BB#0: # %entry
563 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
566 %X = load <4 x i8>, <4 x i8>* %ptr
567 %Y = zext <4 x i8> %X to <4 x i64>
571 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
572 ; SSE2-LABEL: load_zext_8i8_to_8i16:
573 ; SSE2: # BB#0: # %entry
574 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
575 ; SSE2-NEXT: pxor %xmm1, %xmm1
576 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
579 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
580 ; SSSE3: # BB#0: # %entry
581 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
582 ; SSSE3-NEXT: pxor %xmm1, %xmm1
583 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
586 ; SSE41-LABEL: load_zext_8i8_to_8i16:
587 ; SSE41: # BB#0: # %entry
588 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
591 ; AVX-LABEL: load_zext_8i8_to_8i16:
592 ; AVX: # BB#0: # %entry
593 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
596 %X = load <8 x i8>, <8 x i8>* %ptr
597 %Y = zext <8 x i8> %X to <8 x i16>
601 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
602 ; SSE2-LABEL: load_zext_8i8_to_8i32:
603 ; SSE2: # BB#0: # %entry
604 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
605 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
606 ; SSE2-NEXT: movdqa %xmm1, %xmm0
607 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
608 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
609 ; SSE2-NEXT: pand %xmm2, %xmm0
610 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
611 ; SSE2-NEXT: pand %xmm2, %xmm1
614 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
615 ; SSSE3: # BB#0: # %entry
616 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
617 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
618 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
619 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[6],zero,zero,zero
620 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[14],zero,zero,zero
623 ; SSE41-LABEL: load_zext_8i8_to_8i32:
624 ; SSE41: # BB#0: # %entry
625 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
626 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
629 ; AVX1-LABEL: load_zext_8i8_to_8i32:
630 ; AVX1: # BB#0: # %entry
631 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
632 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
633 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
636 ; AVX2-LABEL: load_zext_8i8_to_8i32:
637 ; AVX2: # BB#0: # %entry
638 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
641 %X = load <8 x i8>, <8 x i8>* %ptr
642 %Y = zext <8 x i8> %X to <8 x i32>
646 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
647 ; SSE2-LABEL: load_zext_16i8_to_16i16:
648 ; SSE2: # BB#0: # %entry
649 ; SSE2-NEXT: movdqa (%rdi), %xmm1
650 ; SSE2-NEXT: pxor %xmm2, %xmm2
651 ; SSE2-NEXT: movdqa %xmm1, %xmm0
652 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
653 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
656 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
657 ; SSSE3: # BB#0: # %entry
658 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
659 ; SSSE3-NEXT: pxor %xmm2, %xmm2
660 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
661 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
662 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
665 ; SSE41-LABEL: load_zext_16i8_to_16i16:
666 ; SSE41: # BB#0: # %entry
667 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
668 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
671 ; AVX1-LABEL: load_zext_16i8_to_16i16:
672 ; AVX1: # BB#0: # %entry
673 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
674 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
675 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
678 ; AVX2-LABEL: load_zext_16i8_to_16i16:
679 ; AVX2: # BB#0: # %entry
680 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
683 %X = load <16 x i8>, <16 x i8>* %ptr
684 %Y = zext <16 x i8> %X to <16 x i16>
688 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
689 ; SSE2-LABEL: load_zext_2i16_to_2i64:
690 ; SSE2: # BB#0: # %entry
691 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
692 ; SSE2-NEXT: pxor %xmm1, %xmm1
693 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
694 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
697 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
698 ; SSSE3: # BB#0: # %entry
699 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
700 ; SSSE3-NEXT: pxor %xmm1, %xmm1
701 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
702 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
705 ; SSE41-LABEL: load_zext_2i16_to_2i64:
706 ; SSE41: # BB#0: # %entry
707 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
710 ; AVX-LABEL: load_zext_2i16_to_2i64:
711 ; AVX: # BB#0: # %entry
712 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
715 %X = load <2 x i16>, <2 x i16>* %ptr
716 %Y = zext <2 x i16> %X to <2 x i64>
720 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
721 ; SSE2-LABEL: load_zext_4i16_to_4i32:
722 ; SSE2: # BB#0: # %entry
723 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
724 ; SSE2-NEXT: pxor %xmm1, %xmm1
725 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
728 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
729 ; SSSE3: # BB#0: # %entry
730 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
731 ; SSSE3-NEXT: pxor %xmm1, %xmm1
732 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
735 ; SSE41-LABEL: load_zext_4i16_to_4i32:
736 ; SSE41: # BB#0: # %entry
737 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
740 ; AVX-LABEL: load_zext_4i16_to_4i32:
741 ; AVX: # BB#0: # %entry
742 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
745 %X = load <4 x i16>, <4 x i16>* %ptr
746 %Y = zext <4 x i16> %X to <4 x i32>
750 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
751 ; SSE2-LABEL: load_zext_4i16_to_4i64:
752 ; SSE2: # BB#0: # %entry
753 ; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
754 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
755 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
756 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0,65535,0,0,0]
757 ; SSE2-NEXT: pand %xmm2, %xmm0
758 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
759 ; SSE2-NEXT: pand %xmm2, %xmm1
762 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
763 ; SSSE3: # BB#0: # %entry
764 ; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
765 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
766 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
767 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
768 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9],zero,zero,zero,zero,zero,zero,xmm1[12,13],zero,zero,zero,zero,zero,zero
771 ; SSE41-LABEL: load_zext_4i16_to_4i64:
772 ; SSE41: # BB#0: # %entry
773 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
774 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
777 ; AVX1-LABEL: load_zext_4i16_to_4i64:
778 ; AVX1: # BB#0: # %entry
779 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
780 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
781 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
784 ; AVX2-LABEL: load_zext_4i16_to_4i64:
785 ; AVX2: # BB#0: # %entry
786 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
789 %X = load <4 x i16>, <4 x i16>* %ptr
790 %Y = zext <4 x i16> %X to <4 x i64>
794 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
795 ; SSE2-LABEL: load_zext_8i16_to_8i32:
796 ; SSE2: # BB#0: # %entry
797 ; SSE2-NEXT: movdqa (%rdi), %xmm1
798 ; SSE2-NEXT: pxor %xmm2, %xmm2
799 ; SSE2-NEXT: movdqa %xmm1, %xmm0
800 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
801 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
804 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
805 ; SSSE3: # BB#0: # %entry
806 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
807 ; SSSE3-NEXT: pxor %xmm2, %xmm2
808 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
809 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
810 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
813 ; SSE41-LABEL: load_zext_8i16_to_8i32:
814 ; SSE41: # BB#0: # %entry
815 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
816 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
819 ; AVX1-LABEL: load_zext_8i16_to_8i32:
820 ; AVX1: # BB#0: # %entry
821 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
822 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
823 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
826 ; AVX2-LABEL: load_zext_8i16_to_8i32:
827 ; AVX2: # BB#0: # %entry
828 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
831 %X = load <8 x i16>, <8 x i16>* %ptr
832 %Y = zext <8 x i16> %X to <8 x i32>
836 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
837 ; SSE2-LABEL: load_zext_2i32_to_2i64:
838 ; SSE2: # BB#0: # %entry
839 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
840 ; SSE2-NEXT: pxor %xmm1, %xmm1
841 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
844 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
845 ; SSSE3: # BB#0: # %entry
846 ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
847 ; SSSE3-NEXT: pxor %xmm1, %xmm1
848 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
851 ; SSE41-LABEL: load_zext_2i32_to_2i64:
852 ; SSE41: # BB#0: # %entry
853 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
856 ; AVX-LABEL: load_zext_2i32_to_2i64:
857 ; AVX: # BB#0: # %entry
858 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
861 %X = load <2 x i32>, <2 x i32>* %ptr
862 %Y = zext <2 x i32> %X to <2 x i64>
866 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
867 ; SSE2-LABEL: load_zext_4i32_to_4i64:
868 ; SSE2: # BB#0: # %entry
869 ; SSE2-NEXT: movdqa (%rdi), %xmm1
870 ; SSE2-NEXT: pxor %xmm2, %xmm2
871 ; SSE2-NEXT: movdqa %xmm1, %xmm0
872 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
873 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
876 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
877 ; SSSE3: # BB#0: # %entry
878 ; SSSE3-NEXT: movdqa (%rdi), %xmm1
879 ; SSSE3-NEXT: pxor %xmm2, %xmm2
880 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
881 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
882 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
885 ; SSE41-LABEL: load_zext_4i32_to_4i64:
886 ; SSE41: # BB#0: # %entry
887 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
888 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
891 ; AVX1-LABEL: load_zext_4i32_to_4i64:
892 ; AVX1: # BB#0: # %entry
893 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
894 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
895 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
898 ; AVX2-LABEL: load_zext_4i32_to_4i64:
899 ; AVX2: # BB#0: # %entry
900 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
903 %X = load <4 x i32>, <4 x i32>* %ptr
904 %Y = zext <4 x i32> %X to <4 x i64>
908 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
909 ; SSE2-LABEL: zext_8i8_to_8i32:
910 ; SSE2: # BB#0: # %entry
911 ; SSE2-NEXT: movdqa %xmm0, %xmm1
912 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
913 ; SSE2-NEXT: pxor %xmm2, %xmm2
914 ; SSE2-NEXT: movdqa %xmm1, %xmm0
915 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
916 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
919 ; SSSE3-LABEL: zext_8i8_to_8i32:
920 ; SSSE3: # BB#0: # %entry
921 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
922 ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
923 ; SSSE3-NEXT: pxor %xmm2, %xmm2
924 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
925 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
926 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
929 ; SSE41-LABEL: zext_8i8_to_8i32:
930 ; SSE41: # BB#0: # %entry
931 ; SSE41-NEXT: movdqa %xmm0, %xmm1
932 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
933 ; SSE41-NEXT: pxor %xmm2, %xmm2
934 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
935 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
938 ; AVX1-LABEL: zext_8i8_to_8i32:
939 ; AVX1: # BB#0: # %entry
940 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
941 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
942 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
943 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
944 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
947 ; AVX2-LABEL: zext_8i8_to_8i32:
948 ; AVX2: # BB#0: # %entry
949 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
950 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
953 %t = zext <8 x i8> %z to <8 x i32>
957 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
958 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
959 ; SSE2: # BB#0: # %entry
960 ; SSE2-NEXT: movdqa %xmm0, %xmm1
961 ; SSE2-NEXT: pxor %xmm2, %xmm2
962 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
963 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
966 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
967 ; SSSE3: # BB#0: # %entry
968 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
969 ; SSSE3-NEXT: pxor %xmm2, %xmm2
970 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
971 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
974 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
975 ; SSE41: # BB#0: # %entry
976 ; SSE41-NEXT: movdqa %xmm0, %xmm1
977 ; SSE41-NEXT: pxor %xmm2, %xmm2
978 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
979 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
982 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
983 ; AVX1: # BB#0: # %entry
984 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
985 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
986 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
987 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
990 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
991 ; AVX2: # BB#0: # %entry
992 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
995 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
996 %Z = bitcast <16 x i16> %B to <8 x i32>
1000 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1001 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1002 ; SSE2: # BB#0: # %entry
1003 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1004 ; SSE2-NEXT: pxor %xmm2, %xmm2
1005 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1006 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1009 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1010 ; SSSE3: # BB#0: # %entry
1011 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1012 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1013 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1014 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1017 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1018 ; SSE41: # BB#0: # %entry
1019 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1020 ; SSE41-NEXT: pxor %xmm2, %xmm2
1021 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1022 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1025 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1026 ; AVX1: # BB#0: # %entry
1027 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
1028 ; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1029 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1030 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
1031 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1034 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1035 ; AVX2: # BB#0: # %entry
1036 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1039 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1040 %Z = bitcast <8 x i32> %B to <4 x i64>
1044 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1045 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1046 ; SSE2: # BB#0: # %entry
1047 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1048 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
1049 ; SSE2-NEXT: packuswb %xmm1, %xmm1
1050 ; SSE2-NEXT: pxor %xmm2, %xmm2
1051 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1052 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1053 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1054 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1057 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1058 ; SSSE3: # BB#0: # %entry
1059 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1060 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1061 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1062 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1063 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1064 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1065 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1068 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1069 ; SSE41: # BB#0: # %entry
1070 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1071 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1072 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1073 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1074 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1077 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1078 ; AVX1: # BB#0: # %entry
1079 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1080 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1081 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1082 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1083 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1086 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1087 ; AVX2: # BB#0: # %entry
1088 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1089 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1092 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1093 %Z = bitcast <32 x i8> %B to <8 x i32>
1097 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1098 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1099 ; SSE2: # BB#0: # %entry
1100 ; SSE2-NEXT: pxor %xmm1, %xmm1
1101 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1102 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1103 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1106 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1107 ; SSSE3: # BB#0: # %entry
1108 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1111 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1112 ; SSE41: # BB#0: # %entry
1113 ; SSE41-NEXT: psrlq $48, %xmm0
1114 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1117 ; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
1118 ; AVX: # BB#0: # %entry
1119 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
1120 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1123 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1124 %Z = bitcast <16 x i8> %B to <2 x i64>
1128 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1129 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1130 ; SSE2: # BB#0: # %entry
1131 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1132 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1133 ; SSE2-NEXT: pxor %xmm2, %xmm2
1134 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1135 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1136 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1137 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1138 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1139 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1142 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1143 ; SSSE3: # BB#0: # %entry
1144 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1145 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1146 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1149 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1150 ; SSE41: # BB#0: # %entry
1151 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1152 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1153 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1154 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1155 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1156 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1159 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1160 ; AVX1: # BB#0: # %entry
1161 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1162 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1163 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1164 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1165 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1168 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1169 ; AVX2: # BB#0: # %entry
1170 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1171 ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1174 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1175 %Z = bitcast <32 x i8> %B to <4 x i64>
1179 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1180 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1181 ; SSE2: # BB#0: # %entry
1182 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1183 ; SSE2-NEXT: pxor %xmm1, %xmm1
1184 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1185 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1188 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1189 ; SSSE3: # BB#0: # %entry
1190 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1191 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1192 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1193 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1196 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1197 ; SSE41: # BB#0: # %entry
1198 ; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1199 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1202 ; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
1203 ; AVX: # BB#0: # %entry
1204 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1205 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1208 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1209 %Z = bitcast <8 x i16> %B to <2 x i64>
1213 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1214 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1215 ; SSE2: # BB#0: # %entry
1216 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1217 ; SSE2-NEXT: pxor %xmm2, %xmm2
1218 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1219 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1220 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1221 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1224 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1225 ; SSSE3: # BB#0: # %entry
1226 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1227 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1228 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1229 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1230 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1231 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1234 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1235 ; SSE41: # BB#0: # %entry
1236 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1237 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1238 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1239 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1240 ; SSE41-NEXT: movdqa %xmm2, %xmm0
1243 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1244 ; AVX1: # BB#0: # %entry
1245 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1246 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1247 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1248 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1249 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1252 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1253 ; AVX2: # BB#0: # %entry
1254 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,2,3,5,6,6,7]
1255 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1258 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1259 %Z = bitcast <16 x i16> %B to <4 x i64>
1263 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1264 ; SSE-LABEL: shuf_zext_8i16_to_4i32_offset1:
1265 ; SSE: # BB#0: # %entry
1266 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1267 ; SSE-NEXT: pxor %xmm1, %xmm1
1268 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1271 ; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1:
1272 ; AVX: # BB#0: # %entry
1273 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1274 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1275 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1278 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1279 %Z = bitcast <8 x i16> %B to <4 x i32>
1283 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1284 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1285 ; SSE2: # BB#0: # %entry
1286 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1287 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1288 ; SSE2-NEXT: pxor %xmm2, %xmm2
1289 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1290 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1293 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1294 ; SSSE3: # BB#0: # %entry
1295 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1296 ; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1297 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1298 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1299 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1302 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1303 ; SSE41: # BB#0: # %entry
1304 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1305 ; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1306 ; SSE41-NEXT: pxor %xmm2, %xmm2
1307 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1308 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1311 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
1312 ; AVX1: # BB#0: # %entry
1313 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1314 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1315 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1316 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1317 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1320 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1321 ; AVX2: # BB#0: # %entry
1322 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
1323 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1326 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
1327 %Z = bitcast <16 x i16> %B to <8 x i32>
1331 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
1332 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
1333 ; SSE2: # BB#0: # %entry
1334 ; SSE2-NEXT: pxor %xmm2, %xmm2
1335 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1336 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1337 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1340 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
1341 ; SSSE3: # BB#0: # %entry
1342 ; SSSE3-NEXT: pxor %xmm2, %xmm2
1343 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1344 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1345 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1348 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
1349 ; SSE41: # BB#0: # %entry
1350 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1351 ; SSE41-NEXT: pxor %xmm2, %xmm2
1352 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
1353 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1354 ; SSE41-NEXT: movdqa %xmm2, %xmm1
1357 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
1358 ; AVX1: # BB#0: # %entry
1359 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1360 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
1361 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1362 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
1363 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1364 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1367 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
1368 ; AVX2: # BB#0: # %entry
1369 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1370 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1373 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
1374 %Z = bitcast <16 x i16> %B to <8 x i32>
1378 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
1379 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
1380 ; SSE: # BB#0: # %entry
1381 ; SSE-NEXT: pxor %xmm1, %xmm1
1382 ; SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1385 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
1386 ; AVX: # BB#0: # %entry
1387 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1388 ; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1391 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
1392 %Z = bitcast <4 x i32> %B to <2 x i64>
1396 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
1397 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
1398 ; SSE2: # BB#0: # %entry
1399 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1400 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
1401 ; SSE2-NEXT: pand %xmm1, %xmm0
1402 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1405 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
1406 ; SSSE3: # BB#0: # %entry
1407 ; SSSE3-NEXT: movdqa %xmm0, %xmm1
1408 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
1409 ; SSSE3-NEXT: pand %xmm1, %xmm0
1410 ; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1413 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
1414 ; SSE41: # BB#0: # %entry
1415 ; SSE41-NEXT: movdqa %xmm0, %xmm1
1416 ; SSE41-NEXT: pxor %xmm0, %xmm0
1417 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1418 ; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1421 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
1422 ; AVX1: # BB#0: # %entry
1423 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero
1424 ; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
1425 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
1426 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1429 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
1430 ; AVX2: # BB#0: # %entry
1431 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,3,5,6,7,7]
1432 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1435 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
1436 %Z = bitcast <8 x i32> %B to <4 x i64>