1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9 target triple = "x86_64-unknown-unknown"
11 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
12 ; SSE-LABEL: shuffle_v8i16_01012323:
14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
17 ; AVX-LABEL: shuffle_v8i16_01012323:
19 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
21 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
22 ret <8 x i16> %shuffle
24 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
25 ; SSE-LABEL: shuffle_v8i16_67452301:
27 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
30 ; AVX-LABEL: shuffle_v8i16_67452301:
32 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
34 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
35 ret <8 x i16> %shuffle
37 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
38 ; SSE2-LABEL: shuffle_v8i16_456789AB:
40 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
43 ; SSSE3-LABEL: shuffle_v8i16_456789AB:
45 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
46 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
49 ; SSE41-LABEL: shuffle_v8i16_456789AB:
51 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
52 ; SSE41-NEXT: movdqa %xmm1, %xmm0
55 ; AVX-LABEL: shuffle_v8i16_456789AB:
57 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
59 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
60 ret <8 x i16> %shuffle
63 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
64 ; SSE2-LABEL: shuffle_v8i16_00000000:
66 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
67 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
68 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
71 ; SSSE3-LABEL: shuffle_v8i16_00000000:
73 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
76 ; SSE41-LABEL: shuffle_v8i16_00000000:
78 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
81 ; AVX1-LABEL: shuffle_v8i16_00000000:
83 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
86 ; AVX2-LABEL: shuffle_v8i16_00000000:
88 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
90 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
91 ret <8 x i16> %shuffle
93 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
94 ; SSE-LABEL: shuffle_v8i16_00004444:
96 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
97 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
100 ; AVX-LABEL: shuffle_v8i16_00004444:
102 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
103 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
105 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
106 ret <8 x i16> %shuffle
108 define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
109 ; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
111 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
114 ; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
116 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
118 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
119 ret <8 x i16> %shuffle
121 define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
122 ; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
124 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
127 ; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
129 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
131 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
132 ret <8 x i16> %shuffle
134 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
135 ; SSE-LABEL: shuffle_v8i16_31206745:
137 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
138 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
141 ; AVX-LABEL: shuffle_v8i16_31206745:
143 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
144 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
146 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
147 ret <8 x i16> %shuffle
149 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
150 ; SSE2-LABEL: shuffle_v8i16_44440000:
152 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
153 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
154 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
157 ; SSSE3-LABEL: shuffle_v8i16_44440000:
159 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
162 ; SSE41-LABEL: shuffle_v8i16_44440000:
164 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
167 ; AVX-LABEL: shuffle_v8i16_44440000:
169 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
171 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
172 ret <8 x i16> %shuffle
174 define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
175 ; SSE-LABEL: shuffle_v8i16_23016745:
177 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
180 ; AVX-LABEL: shuffle_v8i16_23016745:
182 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
184 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
185 ret <8 x i16> %shuffle
187 define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
188 ; SSE-LABEL: shuffle_v8i16_23026745:
190 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
191 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
194 ; AVX-LABEL: shuffle_v8i16_23026745:
196 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
197 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
199 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
200 ret <8 x i16> %shuffle
202 define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
203 ; SSE-LABEL: shuffle_v8i16_23016747:
205 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
206 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
209 ; AVX-LABEL: shuffle_v8i16_23016747:
211 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
212 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
214 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
215 ret <8 x i16> %shuffle
217 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
218 ; SSE2-LABEL: shuffle_v8i16_75643120:
220 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
221 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
222 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
225 ; SSSE3-LABEL: shuffle_v8i16_75643120:
227 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
230 ; SSE41-LABEL: shuffle_v8i16_75643120:
232 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
235 ; AVX-LABEL: shuffle_v8i16_75643120:
237 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
239 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
240 ret <8 x i16> %shuffle
243 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
244 ; SSE2-LABEL: shuffle_v8i16_10545410:
246 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
247 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
248 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
251 ; SSSE3-LABEL: shuffle_v8i16_10545410:
253 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
256 ; SSE41-LABEL: shuffle_v8i16_10545410:
258 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
261 ; AVX-LABEL: shuffle_v8i16_10545410:
263 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
265 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
266 ret <8 x i16> %shuffle
268 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
269 ; SSE2-LABEL: shuffle_v8i16_54105410:
271 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
272 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
273 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
276 ; SSSE3-LABEL: shuffle_v8i16_54105410:
278 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
281 ; SSE41-LABEL: shuffle_v8i16_54105410:
283 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
286 ; AVX-LABEL: shuffle_v8i16_54105410:
288 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
290 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
291 ret <8 x i16> %shuffle
293 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
294 ; SSE2-LABEL: shuffle_v8i16_54101054:
296 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
297 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
298 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
301 ; SSSE3-LABEL: shuffle_v8i16_54101054:
303 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
306 ; SSE41-LABEL: shuffle_v8i16_54101054:
308 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
311 ; AVX-LABEL: shuffle_v8i16_54101054:
313 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
315 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
316 ret <8 x i16> %shuffle
318 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
319 ; SSE2-LABEL: shuffle_v8i16_04400440:
321 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
322 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
323 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
326 ; SSSE3-LABEL: shuffle_v8i16_04400440:
328 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
331 ; SSE41-LABEL: shuffle_v8i16_04400440:
333 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
336 ; AVX-LABEL: shuffle_v8i16_04400440:
338 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
340 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
341 ret <8 x i16> %shuffle
343 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
344 ; SSE2-LABEL: shuffle_v8i16_40044004:
346 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
347 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
348 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
351 ; SSSE3-LABEL: shuffle_v8i16_40044004:
353 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
356 ; SSE41-LABEL: shuffle_v8i16_40044004:
358 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
361 ; AVX-LABEL: shuffle_v8i16_40044004:
363 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
365 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
366 ret <8 x i16> %shuffle
369 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
370 ; SSE2-LABEL: shuffle_v8i16_26405173:
372 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
373 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
374 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
375 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
376 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
379 ; SSSE3-LABEL: shuffle_v8i16_26405173:
381 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
384 ; SSE41-LABEL: shuffle_v8i16_26405173:
386 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
389 ; AVX-LABEL: shuffle_v8i16_26405173:
391 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
393 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
394 ret <8 x i16> %shuffle
396 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
397 ; SSE2-LABEL: shuffle_v8i16_20645173:
399 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
400 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
401 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
402 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
403 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
406 ; SSSE3-LABEL: shuffle_v8i16_20645173:
408 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
411 ; SSE41-LABEL: shuffle_v8i16_20645173:
413 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
416 ; AVX-LABEL: shuffle_v8i16_20645173:
418 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
420 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
421 ret <8 x i16> %shuffle
423 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
424 ; SSE2-LABEL: shuffle_v8i16_26401375:
426 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
427 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
428 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
429 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
432 ; SSSE3-LABEL: shuffle_v8i16_26401375:
434 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
437 ; SSE41-LABEL: shuffle_v8i16_26401375:
439 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
442 ; AVX-LABEL: shuffle_v8i16_26401375:
444 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
446 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
447 ret <8 x i16> %shuffle
450 define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
451 ; SSE2-LABEL: shuffle_v8i16_66751643:
453 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
454 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
455 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
456 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
457 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
460 ; SSSE3-LABEL: shuffle_v8i16_66751643:
462 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
465 ; SSE41-LABEL: shuffle_v8i16_66751643:
467 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
470 ; AVX-LABEL: shuffle_v8i16_66751643:
472 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
474 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
475 ret <8 x i16> %shuffle
478 define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
479 ; SSE2-LABEL: shuffle_v8i16_60514754:
481 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
482 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
483 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
484 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
487 ; SSSE3-LABEL: shuffle_v8i16_60514754:
489 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
492 ; SSE41-LABEL: shuffle_v8i16_60514754:
494 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
497 ; AVX-LABEL: shuffle_v8i16_60514754:
499 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
501 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
502 ret <8 x i16> %shuffle
505 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
506 ; SSE2-LABEL: shuffle_v8i16_00444444:
508 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
509 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
510 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
513 ; SSSE3-LABEL: shuffle_v8i16_00444444:
515 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
518 ; SSE41-LABEL: shuffle_v8i16_00444444:
520 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
523 ; AVX-LABEL: shuffle_v8i16_00444444:
525 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
527 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
528 ret <8 x i16> %shuffle
530 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
531 ; SSE2-LABEL: shuffle_v8i16_44004444:
533 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
534 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
535 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
538 ; SSSE3-LABEL: shuffle_v8i16_44004444:
540 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
543 ; SSE41-LABEL: shuffle_v8i16_44004444:
545 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
548 ; AVX-LABEL: shuffle_v8i16_44004444:
550 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
552 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
553 ret <8 x i16> %shuffle
555 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
556 ; SSE2-LABEL: shuffle_v8i16_04404444:
558 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
559 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
560 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
563 ; SSSE3-LABEL: shuffle_v8i16_04404444:
565 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
568 ; SSE41-LABEL: shuffle_v8i16_04404444:
570 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
573 ; AVX-LABEL: shuffle_v8i16_04404444:
575 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
577 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
578 ret <8 x i16> %shuffle
580 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
581 ; SSE2-LABEL: shuffle_v8i16_04400000:
583 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
584 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
585 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
588 ; SSSE3-LABEL: shuffle_v8i16_04400000:
590 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
593 ; SSE41-LABEL: shuffle_v8i16_04400000:
595 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
598 ; AVX-LABEL: shuffle_v8i16_04400000:
600 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
602 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
603 ret <8 x i16> %shuffle
605 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
606 ; SSE-LABEL: shuffle_v8i16_04404567:
608 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
609 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
612 ; AVX-LABEL: shuffle_v8i16_04404567:
614 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
615 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
617 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
618 ret <8 x i16> %shuffle
621 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
622 ; SSE2-LABEL: shuffle_v8i16_0X444444:
624 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
625 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
626 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
629 ; SSSE3-LABEL: shuffle_v8i16_0X444444:
631 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
634 ; SSE41-LABEL: shuffle_v8i16_0X444444:
636 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
639 ; AVX-LABEL: shuffle_v8i16_0X444444:
641 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
643 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
644 ret <8 x i16> %shuffle
646 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
647 ; SSE2-LABEL: shuffle_v8i16_44X04444:
649 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
650 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
651 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
654 ; SSSE3-LABEL: shuffle_v8i16_44X04444:
656 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
659 ; SSE41-LABEL: shuffle_v8i16_44X04444:
661 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
664 ; AVX-LABEL: shuffle_v8i16_44X04444:
666 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
668 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
669 ret <8 x i16> %shuffle
671 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
672 ; SSE2-LABEL: shuffle_v8i16_X4404444:
674 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
675 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
676 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
679 ; SSSE3-LABEL: shuffle_v8i16_X4404444:
681 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
684 ; SSE41-LABEL: shuffle_v8i16_X4404444:
686 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
689 ; AVX-LABEL: shuffle_v8i16_X4404444:
691 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
693 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
694 ret <8 x i16> %shuffle
697 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
698 ; SSE2-LABEL: shuffle_v8i16_0127XXXX:
700 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
701 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
702 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
705 ; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
707 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
710 ; SSE41-LABEL: shuffle_v8i16_0127XXXX:
712 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
715 ; AVX-LABEL: shuffle_v8i16_0127XXXX:
717 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
719 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
720 ret <8 x i16> %shuffle
723 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
724 ; SSE2-LABEL: shuffle_v8i16_XXXX4563:
726 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
727 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
728 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
731 ; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
733 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
736 ; SSE41-LABEL: shuffle_v8i16_XXXX4563:
738 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
741 ; AVX-LABEL: shuffle_v8i16_XXXX4563:
743 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
745 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
746 ret <8 x i16> %shuffle
749 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
750 ; SSE2-LABEL: shuffle_v8i16_4563XXXX:
752 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
753 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
754 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
757 ; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
759 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
762 ; SSE41-LABEL: shuffle_v8i16_4563XXXX:
764 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
767 ; AVX-LABEL: shuffle_v8i16_4563XXXX:
769 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
771 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
772 ret <8 x i16> %shuffle
775 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
776 ; SSE2-LABEL: shuffle_v8i16_01274563:
778 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
779 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
780 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
783 ; SSSE3-LABEL: shuffle_v8i16_01274563:
785 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
788 ; SSE41-LABEL: shuffle_v8i16_01274563:
790 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
793 ; AVX-LABEL: shuffle_v8i16_01274563:
795 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
797 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
798 ret <8 x i16> %shuffle
801 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
802 ; SSE2-LABEL: shuffle_v8i16_45630127:
804 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
805 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
806 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
809 ; SSSE3-LABEL: shuffle_v8i16_45630127:
811 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
814 ; SSE41-LABEL: shuffle_v8i16_45630127:
816 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
819 ; AVX-LABEL: shuffle_v8i16_45630127:
821 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
823 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
824 ret <8 x i16> %shuffle
827 define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
828 ; SSE2-LABEL: shuffle_v8i16_37102735:
830 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
831 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
832 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
833 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
834 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
835 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
838 ; SSSE3-LABEL: shuffle_v8i16_37102735:
840 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
843 ; SSE41-LABEL: shuffle_v8i16_37102735:
845 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
848 ; AVX-LABEL: shuffle_v8i16_37102735:
850 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
852 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
853 ret <8 x i16> %shuffle
856 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
857 ; SSE-LABEL: shuffle_v8i16_08192a3b:
859 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
862 ; AVX-LABEL: shuffle_v8i16_08192a3b:
864 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
866 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
867 ret <8 x i16> %shuffle
870 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
871 ; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
873 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
874 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
877 ; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
879 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
880 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
882 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
883 ret <8 x i16> %shuffle
886 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
887 ; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
889 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
892 ; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
894 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
896 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
897 ret <8 x i16> %shuffle
900 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
901 ; SSE-LABEL: shuffle_v8i16_48596a7b:
903 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
904 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
907 ; AVX-LABEL: shuffle_v8i16_48596a7b:
909 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
910 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
912 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
913 ret <8 x i16> %shuffle
916 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
917 ; SSE-LABEL: shuffle_v8i16_08196e7f:
919 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
920 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
921 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
924 ; AVX-LABEL: shuffle_v8i16_08196e7f:
926 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
927 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
928 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
930 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
931 ret <8 x i16> %shuffle
934 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
935 ; SSE-LABEL: shuffle_v8i16_0c1d6879:
937 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
938 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
939 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
942 ; AVX-LABEL: shuffle_v8i16_0c1d6879:
944 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
945 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
946 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
948 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
949 ret <8 x i16> %shuffle
952 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
953 ; SSE-LABEL: shuffle_v8i16_109832ba:
955 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
956 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
957 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
960 ; AVX-LABEL: shuffle_v8i16_109832ba:
962 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
963 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
964 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
966 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
967 ret <8 x i16> %shuffle
970 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
971 ; SSE-LABEL: shuffle_v8i16_8091a2b3:
973 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
974 ; SSE-NEXT: movdqa %xmm1, %xmm0
977 ; AVX-LABEL: shuffle_v8i16_8091a2b3:
979 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
981 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
982 ret <8 x i16> %shuffle
984 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
985 ; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
987 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
988 ; SSE-NEXT: movdqa %xmm1, %xmm0
991 ; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
993 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
995 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
996 ret <8 x i16> %shuffle
999 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
1000 ; SSE-LABEL: shuffle_v8i16_0213cedf:
1002 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1003 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1004 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1005 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1008 ; AVX-LABEL: shuffle_v8i16_0213cedf:
1010 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1011 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1012 ; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1013 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1015 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
1016 ret <8 x i16> %shuffle
1019 define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
1020 ; SSE2-LABEL: shuffle_v8i16_443aXXXX:
1022 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1023 ; SSE2-NEXT: pand %xmm2, %xmm0
1024 ; SSE2-NEXT: pandn %xmm1, %xmm2
1025 ; SSE2-NEXT: por %xmm0, %xmm2
1026 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1027 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1030 ; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
1032 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1033 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1034 ; SSSE3-NEXT: por %xmm1, %xmm0
1037 ; SSE41-LABEL: shuffle_v8i16_443aXXXX:
1039 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1040 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1041 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1044 ; AVX-LABEL: shuffle_v8i16_443aXXXX:
1046 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1047 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1048 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1050 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
1051 ret <8 x i16> %shuffle
1054 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
1055 ; SSE2-LABEL: shuffle_v8i16_032dXXXX:
1057 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1058 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
1059 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
1060 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1061 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1064 ; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
1066 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1067 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1068 ; SSSE3-NEXT: por %xmm1, %xmm0
1071 ; SSE41-LABEL: shuffle_v8i16_032dXXXX:
1073 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1074 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1077 ; AVX1-LABEL: shuffle_v8i16_032dXXXX:
1079 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1080 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1083 ; AVX2-LABEL: shuffle_v8i16_032dXXXX:
1085 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1086 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1088 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1089 ret <8 x i16> %shuffle
1091 define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
1092 ; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
1094 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1097 ; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
1099 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1101 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1102 ret <8 x i16> %shuffle
1105 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
1106 ; SSE2-LABEL: shuffle_v8i16_012dXXXX:
1108 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1109 ; SSE2-NEXT: pand %xmm2, %xmm0
1110 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1111 ; SSE2-NEXT: pandn %xmm1, %xmm2
1112 ; SSE2-NEXT: por %xmm2, %xmm0
1115 ; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
1117 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1118 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1119 ; SSSE3-NEXT: por %xmm1, %xmm0
1122 ; SSE41-LABEL: shuffle_v8i16_012dXXXX:
1124 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1125 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1128 ; AVX-LABEL: shuffle_v8i16_012dXXXX:
1130 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1131 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1133 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1134 ret <8 x i16> %shuffle
1137 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
1138 ; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
1140 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1141 ; SSE2-NEXT: pand %xmm2, %xmm1
1142 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1143 ; SSE2-NEXT: pandn %xmm0, %xmm2
1144 ; SSE2-NEXT: por %xmm1, %xmm2
1145 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1148 ; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
1150 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1151 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1152 ; SSSE3-NEXT: por %xmm1, %xmm0
1155 ; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
1157 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1158 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1161 ; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
1163 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1164 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1167 ; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
1169 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1170 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1172 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
1173 ret <8 x i16> %shuffle
1176 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
1177 ; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
1179 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1180 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1181 ; SSE2-NEXT: pand %xmm2, %xmm1
1182 ; SSE2-NEXT: pandn %xmm0, %xmm2
1183 ; SSE2-NEXT: por %xmm1, %xmm2
1184 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1187 ; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
1189 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1190 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1191 ; SSSE3-NEXT: por %xmm1, %xmm0
1194 ; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
1196 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1197 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1200 ; AVX-LABEL: shuffle_v8i16_cde3XXXX:
1202 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1203 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1205 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1206 ret <8 x i16> %shuffle
1209 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
1210 ; SSE2-LABEL: shuffle_v8i16_012dcde3:
1212 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1213 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1]
1214 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1215 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1216 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1217 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1218 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1219 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1222 ; SSSE3-LABEL: shuffle_v8i16_012dcde3:
1224 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1225 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1226 ; SSSE3-NEXT: por %xmm1, %xmm0
1229 ; SSE41-LABEL: shuffle_v8i16_012dcde3:
1231 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1232 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1235 ; AVX1-LABEL: shuffle_v8i16_012dcde3:
1237 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1238 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1241 ; AVX2-LABEL: shuffle_v8i16_012dcde3:
1243 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1244 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1246 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
1247 ret <8 x i16> %shuffle
1250 define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
1251 ; SSE2-LABEL: shuffle_v8i16_0923cde7:
1253 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1254 ; SSE2-NEXT: andps %xmm2, %xmm0
1255 ; SSE2-NEXT: andnps %xmm1, %xmm2
1256 ; SSE2-NEXT: orps %xmm2, %xmm0
1259 ; SSSE3-LABEL: shuffle_v8i16_0923cde7:
1261 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1262 ; SSSE3-NEXT: andps %xmm2, %xmm0
1263 ; SSSE3-NEXT: andnps %xmm1, %xmm2
1264 ; SSSE3-NEXT: orps %xmm2, %xmm0
1267 ; SSE41-LABEL: shuffle_v8i16_0923cde7:
1269 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1272 ; AVX-LABEL: shuffle_v8i16_0923cde7:
1274 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1276 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
1277 ret <8 x i16> %shuffle
1280 define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
1281 ; SSE2-LABEL: shuffle_v8i16_XXX1X579:
1283 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
1284 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1285 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1286 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1287 ; SSE2-NEXT: pand %xmm1, %xmm0
1288 ; SSE2-NEXT: pandn %xmm2, %xmm1
1289 ; SSE2-NEXT: por %xmm0, %xmm1
1290 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1293 ; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
1295 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1296 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1297 ; SSSE3-NEXT: por %xmm1, %xmm0
1300 ; SSE41-LABEL: shuffle_v8i16_XXX1X579:
1302 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1303 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1304 ; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1305 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1308 ; AVX1-LABEL: shuffle_v8i16_XXX1X579:
1310 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1311 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1312 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1313 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1316 ; AVX2-LABEL: shuffle_v8i16_XXX1X579:
1318 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
1319 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1320 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1321 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1323 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
1324 ret <8 x i16> %shuffle
1327 define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
1328 ; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
1330 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
1331 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
1332 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
1333 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
1334 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1337 ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
1339 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1340 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1341 ; SSSE3-NEXT: por %xmm1, %xmm0
1344 ; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
1346 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1347 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1348 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1351 ; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
1353 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1354 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1355 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1358 ; AVX2-LABEL: shuffle_v8i16_XX4X8acX:
1360 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1361 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1362 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1364 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
1365 ret <8 x i16> %shuffle
1368 define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
1369 ; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
1371 ; SSE-NEXT: movzwl %di, %eax
1372 ; SSE-NEXT: movd %eax, %xmm0
1375 ; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
1377 ; AVX-NEXT: movzwl %di, %eax
1378 ; AVX-NEXT: vmovd %eax, %xmm0
1380 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1381 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1382 ret <8 x i16> %shuffle
1385 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
1386 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1388 ; SSE-NEXT: pxor %xmm0, %xmm0
1389 ; SSE-NEXT: pinsrw $1, %edi, %xmm0
1392 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1394 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1395 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
1397 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1398 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1399 ret <8 x i16> %shuffle
1402 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
1403 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1405 ; SSE-NEXT: pxor %xmm0, %xmm0
1406 ; SSE-NEXT: pinsrw $5, %edi, %xmm0
1409 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1411 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1412 ; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
1414 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1415 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1416 ret <8 x i16> %shuffle
1419 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
1420 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1422 ; SSE-NEXT: pxor %xmm0, %xmm0
1423 ; SSE-NEXT: pinsrw $7, %edi, %xmm0
1426 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1428 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1429 ; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
1431 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1432 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1433 ret <8 x i16> %shuffle
1436 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
1437 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1439 ; SSE-NEXT: pxor %xmm0, %xmm0
1440 ; SSE-NEXT: pinsrw $2, %edi, %xmm0
1443 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1445 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1446 ; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
1448 %a = insertelement <8 x i16> undef, i16 %i, i32 3
1449 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1450 ret <8 x i16> %shuffle
1453 define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
1454 ; SSE2-LABEL: shuffle_v8i16_def01234:
1456 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1457 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1458 ; SSE2-NEXT: por %xmm1, %xmm0
1461 ; SSSE3-LABEL: shuffle_v8i16_def01234:
1463 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1466 ; SSE41-LABEL: shuffle_v8i16_def01234:
1468 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1471 ; AVX-LABEL: shuffle_v8i16_def01234:
1473 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1475 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
1476 ret <8 x i16> %shuffle
1479 define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
1480 ; SSE2-LABEL: shuffle_v8i16_ueuu123u:
1482 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1483 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1484 ; SSE2-NEXT: por %xmm1, %xmm0
1487 ; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
1489 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1492 ; SSE41-LABEL: shuffle_v8i16_ueuu123u:
1494 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1497 ; AVX-LABEL: shuffle_v8i16_ueuu123u:
1499 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1501 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1502 ret <8 x i16> %shuffle
1505 define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
1506 ; SSE2-LABEL: shuffle_v8i16_56701234:
1508 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1509 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1510 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1511 ; SSE2-NEXT: por %xmm1, %xmm0
1514 ; SSSE3-LABEL: shuffle_v8i16_56701234:
1516 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1519 ; SSE41-LABEL: shuffle_v8i16_56701234:
1521 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1524 ; AVX-LABEL: shuffle_v8i16_56701234:
1526 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1528 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
1529 ret <8 x i16> %shuffle
1532 define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
1533 ; SSE2-LABEL: shuffle_v8i16_u6uu123u:
1535 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1536 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1537 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1538 ; SSE2-NEXT: por %xmm1, %xmm0
1541 ; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
1543 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1546 ; SSE41-LABEL: shuffle_v8i16_u6uu123u:
1548 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1551 ; AVX-LABEL: shuffle_v8i16_u6uu123u:
1553 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1555 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1556 ret <8 x i16> %shuffle
1559 define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
1560 ; SSE-LABEL: shuffle_v8i16_uuuu123u:
1562 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1565 ; AVX-LABEL: shuffle_v8i16_uuuu123u:
1567 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1569 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1570 ret <8 x i16> %shuffle
1573 define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
1574 ; SSE2-LABEL: shuffle_v8i16_bcdef012:
1576 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1577 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1578 ; SSE2-NEXT: por %xmm1, %xmm0
1581 ; SSSE3-LABEL: shuffle_v8i16_bcdef012:
1583 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1586 ; SSE41-LABEL: shuffle_v8i16_bcdef012:
1588 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1591 ; AVX-LABEL: shuffle_v8i16_bcdef012:
1593 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1595 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
1596 ret <8 x i16> %shuffle
1599 define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
1600 ; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
1602 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1603 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1604 ; SSE2-NEXT: por %xmm1, %xmm0
1607 ; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
1609 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1612 ; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
1614 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1617 ; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
1619 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1621 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
1622 ret <8 x i16> %shuffle
1625 define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
1626 ; SSE2-LABEL: shuffle_v8i16_34567012:
1628 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1629 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1630 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1631 ; SSE2-NEXT: por %xmm1, %xmm0
1634 ; SSSE3-LABEL: shuffle_v8i16_34567012:
1636 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1639 ; SSE41-LABEL: shuffle_v8i16_34567012:
1641 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1644 ; AVX-LABEL: shuffle_v8i16_34567012:
1646 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1648 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
1649 ret <8 x i16> %shuffle
1652 define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
1653 ; SSE2-LABEL: shuffle_v8i16_u456uu1u:
1655 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1656 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1657 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1658 ; SSE2-NEXT: por %xmm1, %xmm0
1661 ; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
1663 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1666 ; SSE41-LABEL: shuffle_v8i16_u456uu1u:
1668 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1671 ; AVX-LABEL: shuffle_v8i16_u456uu1u:
1673 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1675 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
1676 ret <8 x i16> %shuffle
1679 define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
1680 ; SSE-LABEL: shuffle_v8i16_u456uuuu:
1682 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1685 ; AVX-LABEL: shuffle_v8i16_u456uuuu:
1687 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1689 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1690 ret <8 x i16> %shuffle
1693 define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
1694 ; SSE2-LABEL: shuffle_v8i16_3456789a:
1696 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1697 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1698 ; SSE2-NEXT: por %xmm1, %xmm0
1701 ; SSSE3-LABEL: shuffle_v8i16_3456789a:
1703 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1704 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1707 ; SSE41-LABEL: shuffle_v8i16_3456789a:
1709 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1710 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1713 ; AVX-LABEL: shuffle_v8i16_3456789a:
1715 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1717 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
1718 ret <8 x i16> %shuffle
1721 define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
1722 ; SSE2-LABEL: shuffle_v8i16_u456uu9u:
1724 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1725 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1726 ; SSE2-NEXT: por %xmm1, %xmm0
1729 ; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
1731 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1732 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1735 ; SSE41-LABEL: shuffle_v8i16_u456uu9u:
1737 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1738 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1741 ; AVX-LABEL: shuffle_v8i16_u456uu9u:
1743 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1745 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
1746 ret <8 x i16> %shuffle
1749 define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
1750 ; SSE2-LABEL: shuffle_v8i16_56789abc:
1752 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1753 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1754 ; SSE2-NEXT: por %xmm1, %xmm0
1757 ; SSSE3-LABEL: shuffle_v8i16_56789abc:
1759 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1760 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1763 ; SSE41-LABEL: shuffle_v8i16_56789abc:
1765 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1766 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1769 ; AVX-LABEL: shuffle_v8i16_56789abc:
1771 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1773 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1774 ret <8 x i16> %shuffle
1777 define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
1778 ; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
1780 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1781 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1782 ; SSE2-NEXT: por %xmm1, %xmm0
1785 ; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
1787 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1788 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1791 ; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
1793 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1794 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1797 ; AVX-LABEL: shuffle_v8i16_u6uu9abu:
1799 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1801 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
1802 ret <8 x i16> %shuffle
1805 define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
1806 ; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
1808 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
1809 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1812 ; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
1814 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
1815 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1818 ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
1820 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1823 ; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
1825 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1827 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
1828 ret <8 x i16> %shuffle
1831 define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
1832 ; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
1834 ; SSE2-NEXT: pxor %xmm1, %xmm1
1835 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1836 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1839 ; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
1841 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1842 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1843 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1846 ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
1848 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1851 ; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
1853 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1855 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1856 ret <8 x i16> %shuffle
1859 define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
1860 ; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
1862 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1865 ; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
1867 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1870 ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
1872 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1875 ; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
1877 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1879 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
1880 ret <8 x i16> %shuffle
1883 define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
1884 ; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
1886 ; SSE2-NEXT: pxor %xmm1, %xmm1
1887 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1890 ; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
1892 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1893 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1896 ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
1898 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1901 ; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
1903 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1905 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1906 ret <8 x i16> %shuffle
1910 ; Shuffle to logical bit shifts
1912 define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
1913 ; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
1915 ; SSE-NEXT: pslld $16, %xmm0
1918 ; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
1920 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0
1922 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
1923 ret <8 x i16> %shuffle
1926 define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
1927 ; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
1929 ; SSE-NEXT: psllq $48, %xmm0
1932 ; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
1934 ; AVX-NEXT: vpsllq $48, %xmm0, %xmm0
1936 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
1937 ret <8 x i16> %shuffle
1940 define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
1941 ; SSE-LABEL: shuffle_v8i16_zz01zX4X:
1943 ; SSE-NEXT: psllq $32, %xmm0
1946 ; AVX-LABEL: shuffle_v8i16_zz01zX4X:
1948 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
1950 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
1951 ret <8 x i16> %shuffle
1954 define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
1955 ; SSE-LABEL: shuffle_v8i16_z0X2z456:
1957 ; SSE-NEXT: psllq $16, %xmm0
1960 ; AVX-LABEL: shuffle_v8i16_z0X2z456:
1962 ; AVX-NEXT: vpsllq $16, %xmm0, %xmm0
1964 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
1965 ret <8 x i16> %shuffle
1968 define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
1969 ; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
1971 ; SSE-NEXT: psrld $16, %xmm0
1974 ; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
1976 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
1978 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
1979 ret <8 x i16> %shuffle
1982 define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
1983 ; SSE-LABEL: shuffle_v8i16_1X3z567z:
1985 ; SSE-NEXT: psrlq $16, %xmm0
1988 ; AVX-LABEL: shuffle_v8i16_1X3z567z:
1990 ; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
1992 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
1993 ret <8 x i16> %shuffle
1996 define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
1997 ; SSE-LABEL: shuffle_v8i16_23zz67zz:
1999 ; SSE-NEXT: psrlq $32, %xmm0
2002 ; AVX-LABEL: shuffle_v8i16_23zz67zz:
2004 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
2006 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
2007 ret <8 x i16> %shuffle
2010 define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2011 ; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2013 ; SSE-NEXT: psrlq $48, %xmm0
2016 ; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2018 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
2020 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2021 ret <8 x i16> %shuffle
2024 define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
2025 ; SSE-LABEL: shuffle_v8i16_01u3zzuz:
2027 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2030 ; AVX-LABEL: shuffle_v8i16_01u3zzuz:
2032 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2034 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
2035 ret <8 x i16> %shuffle
2038 define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
2039 ; SSE2-LABEL: shuffle_v8i16_0z234567:
2041 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2044 ; SSSE3-LABEL: shuffle_v8i16_0z234567:
2046 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2049 ; SSE41-LABEL: shuffle_v8i16_0z234567:
2051 ; SSE41-NEXT: pxor %xmm1, %xmm1
2052 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2055 ; AVX-LABEL: shuffle_v8i16_0z234567:
2057 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2058 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2060 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2061 ret <8 x i16> %shuffle
2064 define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
2065 ; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
2067 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2070 ; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
2072 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2075 ; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
2077 ; SSE41-NEXT: pxor %xmm1, %xmm1
2078 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2081 ; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
2083 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2084 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2086 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
2087 ret <8 x i16> %shuffle
2090 define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
2091 ; SSE2-LABEL: shuffle_v8i16_0123456z:
2093 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2096 ; SSSE3-LABEL: shuffle_v8i16_0123456z:
2098 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2101 ; SSE41-LABEL: shuffle_v8i16_0123456z:
2103 ; SSE41-NEXT: pxor %xmm1, %xmm1
2104 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2107 ; AVX-LABEL: shuffle_v8i16_0123456z:
2109 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2110 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2112 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
2113 ret <8 x i16> %shuffle
2116 define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
2117 ; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
2119 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2120 ; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2121 ; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2122 ; SSE-NEXT: movdqa %xmm1, %xmm0
2125 ; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
2127 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2128 ; AVX-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2129 ; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2131 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
2132 ret <8 x i16> %shuffle
2135 define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
2136 ; SSE-LABEL: shuffle_v8i16_8012345u:
2138 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2141 ; AVX-LABEL: shuffle_v8i16_8012345u:
2143 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2145 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
2147 ret <8 x i16> %shuffle
2150 define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
2151 ; SSE2-LABEL: insert_dup_mem_v8i16_i32:
2153 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2154 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2155 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2156 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2159 ; SSSE3-LABEL: insert_dup_mem_v8i16_i32:
2161 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2162 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2165 ; SSE41-LABEL: insert_dup_mem_v8i16_i32:
2167 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2168 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2171 ; AVX1-LABEL: insert_dup_mem_v8i16_i32:
2173 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2174 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2177 ; AVX2-LABEL: insert_dup_mem_v8i16_i32:
2179 ; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
2181 %tmp = load i32, i32* %ptr, align 4
2182 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2183 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2184 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
2188 define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
2189 ; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16:
2191 ; SSE2-NEXT: movswl (%rdi), %eax
2192 ; SSE2-NEXT: movd %eax, %xmm0
2193 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2194 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2195 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2198 ; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16:
2200 ; SSSE3-NEXT: movswl (%rdi), %eax
2201 ; SSSE3-NEXT: movd %eax, %xmm0
2202 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2205 ; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16:
2207 ; SSE41-NEXT: movswl (%rdi), %eax
2208 ; SSE41-NEXT: movd %eax, %xmm0
2209 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2212 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
2214 ; AVX1-NEXT: movswl (%rdi), %eax
2215 ; AVX1-NEXT: vmovd %eax, %xmm0
2216 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2219 ; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
2221 ; AVX2-NEXT: movswl (%rdi), %eax
2222 ; AVX2-NEXT: vmovd %eax, %xmm0
2223 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2225 %tmp = load i16, i16* %ptr, align 2
2226 %tmp1 = sext i16 %tmp to i32
2227 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2228 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2229 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
2233 define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
2234 ; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32:
2236 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2237 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2238 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2239 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2242 ; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32:
2244 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2245 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2248 ; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32:
2250 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2251 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2254 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
2256 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2257 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2260 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
2262 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
2264 %tmp = load i32, i32* %ptr, align 4
2265 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2266 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2267 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2271 define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
2272 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
2274 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2275 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
2276 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
2277 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
2280 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
2282 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2283 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2286 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
2288 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2289 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2292 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
2294 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2295 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2298 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
2300 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
2302 %tmp = load i32, i32* %ptr, align 4
2303 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
2304 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2305 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2309 define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
2310 ; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2312 ; SSE2-NEXT: movswl (%rdi), %eax
2313 ; SSE2-NEXT: movd %eax, %xmm0
2314 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2315 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2316 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2319 ; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2321 ; SSSE3-NEXT: movswl (%rdi), %eax
2322 ; SSSE3-NEXT: movd %eax, %xmm0
2323 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2326 ; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2328 ; SSE41-NEXT: movswl (%rdi), %eax
2329 ; SSE41-NEXT: movd %eax, %xmm0
2330 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2333 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2335 ; AVX1-NEXT: movswl (%rdi), %eax
2336 ; AVX1-NEXT: vmovd %eax, %xmm0
2337 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2340 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2342 ; AVX2-NEXT: movswl (%rdi), %eax
2343 ; AVX2-NEXT: shrl $16, %eax
2344 ; AVX2-NEXT: vmovd %eax, %xmm0
2345 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2347 %tmp = load i16, i16* %ptr, align 2
2348 %tmp1 = sext i16 %tmp to i32
2349 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2350 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2351 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2355 define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
2356 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2358 ; SSE2-NEXT: movswl (%rdi), %eax
2359 ; SSE2-NEXT: movd %eax, %xmm0
2360 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
2361 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
2362 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
2365 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2367 ; SSSE3-NEXT: movswl (%rdi), %eax
2368 ; SSSE3-NEXT: movd %eax, %xmm0
2369 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2372 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2374 ; SSE41-NEXT: movswl (%rdi), %eax
2375 ; SSE41-NEXT: movd %eax, %xmm0
2376 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2379 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2381 ; AVX1-NEXT: movswl (%rdi), %eax
2382 ; AVX1-NEXT: vmovd %eax, %xmm0
2383 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2386 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2388 ; AVX2-NEXT: movswl (%rdi), %eax
2389 ; AVX2-NEXT: shrl $16, %eax
2390 ; AVX2-NEXT: vmovd %eax, %xmm0
2391 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2393 %tmp = load i16, i16* %ptr, align 2
2394 %tmp1 = sext i16 %tmp to i32
2395 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
2396 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2397 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>