1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-unknown-unknown"
12 define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) {
13 ; SSE-LABEL: shuffle_v4i32_0001:
15 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
18 ; AVX-LABEL: shuffle_v4i32_0001:
20 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
22 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
23 ret <4 x i32> %shuffle
25 define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) {
26 ; SSE-LABEL: shuffle_v4i32_0020:
28 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
31 ; AVX-LABEL: shuffle_v4i32_0020:
33 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
35 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
36 ret <4 x i32> %shuffle
38 define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
39 ; SSE-LABEL: shuffle_v4i32_0112:
41 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
44 ; AVX-LABEL: shuffle_v4i32_0112:
46 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
48 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
49 ret <4 x i32> %shuffle
51 define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
52 ; SSE-LABEL: shuffle_v4i32_0300:
54 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
57 ; AVX-LABEL: shuffle_v4i32_0300:
59 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
61 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
62 ret <4 x i32> %shuffle
64 define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) {
65 ; SSE-LABEL: shuffle_v4i32_1000:
67 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
70 ; AVX-LABEL: shuffle_v4i32_1000:
72 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
74 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
75 ret <4 x i32> %shuffle
77 define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) {
78 ; SSE-LABEL: shuffle_v4i32_2200:
80 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
83 ; AVX-LABEL: shuffle_v4i32_2200:
85 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
87 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
88 ret <4 x i32> %shuffle
90 define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) {
91 ; SSE-LABEL: shuffle_v4i32_3330:
93 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
96 ; AVX-LABEL: shuffle_v4i32_3330:
98 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
100 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
101 ret <4 x i32> %shuffle
103 define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
104 ; SSE-LABEL: shuffle_v4i32_3210:
106 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
109 ; AVX-LABEL: shuffle_v4i32_3210:
111 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
113 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
114 ret <4 x i32> %shuffle
117 define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
118 ; SSE-LABEL: shuffle_v4i32_2121:
120 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
123 ; AVX-LABEL: shuffle_v4i32_2121:
125 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
127 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
128 ret <4 x i32> %shuffle
131 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
132 ; SSE-LABEL: shuffle_v4f32_0001:
134 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
137 ; AVX-LABEL: shuffle_v4f32_0001:
139 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1]
141 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
142 ret <4 x float> %shuffle
144 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
145 ; SSE-LABEL: shuffle_v4f32_0020:
147 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
150 ; AVX-LABEL: shuffle_v4f32_0020:
152 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0]
154 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
155 ret <4 x float> %shuffle
157 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
158 ; SSE-LABEL: shuffle_v4f32_0300:
160 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
163 ; AVX-LABEL: shuffle_v4f32_0300:
165 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0]
167 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
168 ret <4 x float> %shuffle
170 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
171 ; SSE-LABEL: shuffle_v4f32_1000:
173 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
176 ; AVX-LABEL: shuffle_v4f32_1000:
178 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
180 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
181 ret <4 x float> %shuffle
183 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
184 ; SSE-LABEL: shuffle_v4f32_2200:
186 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
189 ; AVX-LABEL: shuffle_v4f32_2200:
191 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0]
193 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
194 ret <4 x float> %shuffle
196 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
197 ; SSE-LABEL: shuffle_v4f32_3330:
199 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
202 ; AVX-LABEL: shuffle_v4f32_3330:
204 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0]
206 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
207 ret <4 x float> %shuffle
209 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
210 ; SSE-LABEL: shuffle_v4f32_3210:
212 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
215 ; AVX-LABEL: shuffle_v4f32_3210:
217 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
219 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
220 ret <4 x float> %shuffle
222 define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
223 ; SSE-LABEL: shuffle_v4f32_0011:
225 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
228 ; AVX-LABEL: shuffle_v4f32_0011:
230 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
232 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
233 ret <4 x float> %shuffle
235 define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
236 ; SSE-LABEL: shuffle_v4f32_2233:
238 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
241 ; AVX-LABEL: shuffle_v4f32_2233:
243 ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
245 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
246 ret <4 x float> %shuffle
248 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
249 ; SSE2-LABEL: shuffle_v4f32_0022:
251 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
254 ; SSE3-LABEL: shuffle_v4f32_0022:
256 ; SSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
259 ; SSSE3-LABEL: shuffle_v4f32_0022:
261 ; SSSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
264 ; SSE41-LABEL: shuffle_v4f32_0022:
266 ; SSE41-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
269 ; AVX-LABEL: shuffle_v4f32_0022:
271 ; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
273 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
274 ret <4 x float> %shuffle
276 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
277 ; SSE2-LABEL: shuffle_v4f32_1133:
279 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
282 ; SSE3-LABEL: shuffle_v4f32_1133:
284 ; SSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
287 ; SSSE3-LABEL: shuffle_v4f32_1133:
289 ; SSSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
292 ; SSE41-LABEL: shuffle_v4f32_1133:
294 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
297 ; AVX-LABEL: shuffle_v4f32_1133:
299 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
301 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
302 ret <4 x float> %shuffle
305 define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
306 ; SSE2-LABEL: shuffle_v4i32_0124:
308 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
309 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
312 ; SSE3-LABEL: shuffle_v4i32_0124:
314 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
315 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
318 ; SSSE3-LABEL: shuffle_v4i32_0124:
320 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
321 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
324 ; SSE41-LABEL: shuffle_v4i32_0124:
326 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
327 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
330 ; AVX1-LABEL: shuffle_v4i32_0124:
332 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
333 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
336 ; AVX2-LABEL: shuffle_v4i32_0124:
338 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
339 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
341 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
342 ret <4 x i32> %shuffle
344 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
345 ; SSE2-LABEL: shuffle_v4i32_0142:
347 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
348 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
351 ; SSE3-LABEL: shuffle_v4i32_0142:
353 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
354 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
357 ; SSSE3-LABEL: shuffle_v4i32_0142:
359 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
360 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
363 ; SSE41-LABEL: shuffle_v4i32_0142:
365 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
366 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
367 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
370 ; AVX1-LABEL: shuffle_v4i32_0142:
372 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
373 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
374 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
377 ; AVX2-LABEL: shuffle_v4i32_0142:
379 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
380 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
381 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
383 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
384 ret <4 x i32> %shuffle
386 define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) {
387 ; SSE2-LABEL: shuffle_v4i32_0412:
389 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
390 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
391 ; SSE2-NEXT: movaps %xmm1, %xmm0
394 ; SSE3-LABEL: shuffle_v4i32_0412:
396 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
397 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
398 ; SSE3-NEXT: movaps %xmm1, %xmm0
401 ; SSSE3-LABEL: shuffle_v4i32_0412:
403 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
404 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
405 ; SSSE3-NEXT: movaps %xmm1, %xmm0
408 ; SSE41-LABEL: shuffle_v4i32_0412:
410 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
411 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
412 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
415 ; AVX1-LABEL: shuffle_v4i32_0412:
417 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
418 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
419 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
422 ; AVX2-LABEL: shuffle_v4i32_0412:
424 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
425 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
426 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
428 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
429 ret <4 x i32> %shuffle
431 define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
432 ; SSE2-LABEL: shuffle_v4i32_4012:
434 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
435 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
436 ; SSE2-NEXT: movaps %xmm1, %xmm0
439 ; SSE3-LABEL: shuffle_v4i32_4012:
441 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
442 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
443 ; SSE3-NEXT: movaps %xmm1, %xmm0
446 ; SSSE3-LABEL: shuffle_v4i32_4012:
448 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
449 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
450 ; SSSE3-NEXT: movaps %xmm1, %xmm0
453 ; SSE41-LABEL: shuffle_v4i32_4012:
455 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
456 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
459 ; AVX1-LABEL: shuffle_v4i32_4012:
461 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
462 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
465 ; AVX2-LABEL: shuffle_v4i32_4012:
467 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
468 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
470 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
471 ret <4 x i32> %shuffle
473 define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
474 ; SSE-LABEL: shuffle_v4i32_0145:
476 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
479 ; AVX-LABEL: shuffle_v4i32_0145:
481 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
483 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
484 ret <4 x i32> %shuffle
486 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
487 ; SSE2-LABEL: shuffle_v4i32_0451:
489 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
490 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
493 ; SSE3-LABEL: shuffle_v4i32_0451:
495 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
496 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
499 ; SSSE3-LABEL: shuffle_v4i32_0451:
501 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
502 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
505 ; SSE41-LABEL: shuffle_v4i32_0451:
507 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
508 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
509 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
512 ; AVX1-LABEL: shuffle_v4i32_0451:
514 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
515 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
516 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
519 ; AVX2-LABEL: shuffle_v4i32_0451:
521 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
522 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
523 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
525 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
526 ret <4 x i32> %shuffle
528 define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
529 ; SSE-LABEL: shuffle_v4i32_4501:
531 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
532 ; SSE-NEXT: movdqa %xmm1, %xmm0
535 ; AVX-LABEL: shuffle_v4i32_4501:
537 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
539 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
540 ret <4 x i32> %shuffle
542 define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
543 ; SSE2-LABEL: shuffle_v4i32_4015:
545 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
546 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
549 ; SSE3-LABEL: shuffle_v4i32_4015:
551 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
552 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
555 ; SSSE3-LABEL: shuffle_v4i32_4015:
557 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
558 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
561 ; SSE41-LABEL: shuffle_v4i32_4015:
563 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
564 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
565 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
568 ; AVX1-LABEL: shuffle_v4i32_4015:
570 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
571 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
572 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
575 ; AVX2-LABEL: shuffle_v4i32_4015:
577 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
578 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
579 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
581 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
582 ret <4 x i32> %shuffle
585 define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
586 ; SSE2-LABEL: shuffle_v4f32_4zzz:
588 ; SSE2-NEXT: xorps %xmm1, %xmm1
589 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
590 ; SSE2-NEXT: movaps %xmm1, %xmm0
593 ; SSE3-LABEL: shuffle_v4f32_4zzz:
595 ; SSE3-NEXT: xorps %xmm1, %xmm1
596 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
597 ; SSE3-NEXT: movaps %xmm1, %xmm0
600 ; SSSE3-LABEL: shuffle_v4f32_4zzz:
602 ; SSSE3-NEXT: xorps %xmm1, %xmm1
603 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
604 ; SSSE3-NEXT: movaps %xmm1, %xmm0
607 ; SSE41-LABEL: shuffle_v4f32_4zzz:
609 ; SSE41-NEXT: xorps %xmm1, %xmm1
610 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
613 ; AVX-LABEL: shuffle_v4f32_4zzz:
615 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
616 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
618 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
619 ret <4 x float> %shuffle
622 define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
623 ; SSE2-LABEL: shuffle_v4f32_z4zz:
625 ; SSE2-NEXT: xorps %xmm1, %xmm1
626 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
627 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
630 ; SSE3-LABEL: shuffle_v4f32_z4zz:
632 ; SSE3-NEXT: xorps %xmm1, %xmm1
633 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
634 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
637 ; SSSE3-LABEL: shuffle_v4f32_z4zz:
639 ; SSSE3-NEXT: xorps %xmm1, %xmm1
640 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
641 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
644 ; SSE41-LABEL: shuffle_v4f32_z4zz:
646 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
649 ; AVX-LABEL: shuffle_v4f32_z4zz:
651 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
653 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
654 ret <4 x float> %shuffle
657 define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
658 ; SSE2-LABEL: shuffle_v4f32_zz4z:
660 ; SSE2-NEXT: xorps %xmm1, %xmm1
661 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
662 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
663 ; SSE2-NEXT: movaps %xmm1, %xmm0
666 ; SSE3-LABEL: shuffle_v4f32_zz4z:
668 ; SSE3-NEXT: xorps %xmm1, %xmm1
669 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
670 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
671 ; SSE3-NEXT: movaps %xmm1, %xmm0
674 ; SSSE3-LABEL: shuffle_v4f32_zz4z:
676 ; SSSE3-NEXT: xorps %xmm1, %xmm1
677 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
678 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
679 ; SSSE3-NEXT: movaps %xmm1, %xmm0
682 ; SSE41-LABEL: shuffle_v4f32_zz4z:
684 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
687 ; AVX-LABEL: shuffle_v4f32_zz4z:
689 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
691 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
692 ret <4 x float> %shuffle
695 define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
696 ; SSE2-LABEL: shuffle_v4f32_zuu4:
698 ; SSE2-NEXT: xorps %xmm1, %xmm1
699 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
700 ; SSE2-NEXT: movaps %xmm1, %xmm0
703 ; SSE3-LABEL: shuffle_v4f32_zuu4:
705 ; SSE3-NEXT: xorps %xmm1, %xmm1
706 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
707 ; SSE3-NEXT: movaps %xmm1, %xmm0
710 ; SSSE3-LABEL: shuffle_v4f32_zuu4:
712 ; SSSE3-NEXT: xorps %xmm1, %xmm1
713 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
714 ; SSSE3-NEXT: movaps %xmm1, %xmm0
717 ; SSE41-LABEL: shuffle_v4f32_zuu4:
719 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
722 ; AVX-LABEL: shuffle_v4f32_zuu4:
724 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
726 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
727 ret <4 x float> %shuffle
730 define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
731 ; SSE2-LABEL: shuffle_v4f32_zzz7:
733 ; SSE2-NEXT: xorps %xmm1, %xmm1
734 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
735 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
736 ; SSE2-NEXT: movaps %xmm1, %xmm0
739 ; SSE3-LABEL: shuffle_v4f32_zzz7:
741 ; SSE3-NEXT: xorps %xmm1, %xmm1
742 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
743 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
744 ; SSE3-NEXT: movaps %xmm1, %xmm0
747 ; SSSE3-LABEL: shuffle_v4f32_zzz7:
749 ; SSSE3-NEXT: xorps %xmm1, %xmm1
750 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
751 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
752 ; SSSE3-NEXT: movaps %xmm1, %xmm0
755 ; SSE41-LABEL: shuffle_v4f32_zzz7:
757 ; SSE41-NEXT: xorps %xmm1, %xmm1
758 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
761 ; AVX-LABEL: shuffle_v4f32_zzz7:
763 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
764 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
766 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
767 ret <4 x float> %shuffle
770 define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
771 ; SSE2-LABEL: shuffle_v4f32_z6zz:
773 ; SSE2-NEXT: xorps %xmm1, %xmm1
774 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
775 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
778 ; SSE3-LABEL: shuffle_v4f32_z6zz:
780 ; SSE3-NEXT: xorps %xmm1, %xmm1
781 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
782 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
785 ; SSSE3-LABEL: shuffle_v4f32_z6zz:
787 ; SSSE3-NEXT: xorps %xmm1, %xmm1
788 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
789 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
792 ; SSE41-LABEL: shuffle_v4f32_z6zz:
794 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
797 ; AVX-LABEL: shuffle_v4f32_z6zz:
799 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
801 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
802 ret <4 x float> %shuffle
805 define <4 x float> @shuffle_v4f32_0z23(<4 x float> %a) {
806 ; SSE2-LABEL: shuffle_v4f32_0z23:
808 ; SSE2-NEXT: xorps %xmm1, %xmm1
809 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
810 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
811 ; SSE2-NEXT: movaps %xmm1, %xmm0
814 ; SSE3-LABEL: shuffle_v4f32_0z23:
816 ; SSE3-NEXT: xorps %xmm1, %xmm1
817 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
818 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
819 ; SSE3-NEXT: movaps %xmm1, %xmm0
822 ; SSSE3-LABEL: shuffle_v4f32_0z23:
824 ; SSSE3-NEXT: xorps %xmm1, %xmm1
825 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
826 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
827 ; SSSE3-NEXT: movaps %xmm1, %xmm0
830 ; SSE41-LABEL: shuffle_v4f32_0z23:
832 ; SSE41-NEXT: xorps %xmm1, %xmm1
833 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
836 ; AVX-LABEL: shuffle_v4f32_0z23:
838 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
839 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
841 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
842 ret <4 x float> %shuffle
845 define <4 x float> @shuffle_v4f32_01z3(<4 x float> %a) {
846 ; SSE2-LABEL: shuffle_v4f32_01z3:
848 ; SSE2-NEXT: xorps %xmm1, %xmm1
849 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
850 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
853 ; SSE3-LABEL: shuffle_v4f32_01z3:
855 ; SSE3-NEXT: xorps %xmm1, %xmm1
856 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
857 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
860 ; SSSE3-LABEL: shuffle_v4f32_01z3:
862 ; SSSE3-NEXT: xorps %xmm1, %xmm1
863 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
864 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
867 ; SSE41-LABEL: shuffle_v4f32_01z3:
869 ; SSE41-NEXT: xorps %xmm1, %xmm1
870 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
873 ; AVX-LABEL: shuffle_v4f32_01z3:
875 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
876 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
878 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
879 ret <4 x float> %shuffle
882 define <4 x float> @shuffle_v4f32_012z(<4 x float> %a) {
883 ; SSE2-LABEL: shuffle_v4f32_012z:
885 ; SSE2-NEXT: xorps %xmm1, %xmm1
886 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
887 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
890 ; SSE3-LABEL: shuffle_v4f32_012z:
892 ; SSE3-NEXT: xorps %xmm1, %xmm1
893 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
894 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
897 ; SSSE3-LABEL: shuffle_v4f32_012z:
899 ; SSSE3-NEXT: xorps %xmm1, %xmm1
900 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
901 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
904 ; SSE41-LABEL: shuffle_v4f32_012z:
906 ; SSE41-NEXT: xorps %xmm1, %xmm1
907 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
910 ; AVX-LABEL: shuffle_v4f32_012z:
912 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
913 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
915 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
916 ret <4 x float> %shuffle
919 define <4 x float> @shuffle_v4f32_0zz3(<4 x float> %a) {
920 ; SSE2-LABEL: shuffle_v4f32_0zz3:
922 ; SSE2-NEXT: xorps %xmm1, %xmm1
923 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
924 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
927 ; SSE3-LABEL: shuffle_v4f32_0zz3:
929 ; SSE3-NEXT: xorps %xmm1, %xmm1
930 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
931 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
934 ; SSSE3-LABEL: shuffle_v4f32_0zz3:
936 ; SSSE3-NEXT: xorps %xmm1, %xmm1
937 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
938 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
941 ; SSE41-LABEL: shuffle_v4f32_0zz3:
943 ; SSE41-NEXT: xorps %xmm1, %xmm1
944 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
947 ; AVX-LABEL: shuffle_v4f32_0zz3:
949 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
950 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
952 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
953 ret <4 x float> %shuffle
956 define <4 x float> @shuffle_v4f32_0z2z(<4 x float> %v) {
957 ; SSE2-LABEL: shuffle_v4f32_0z2z:
959 ; SSE2-NEXT: xorps %xmm1, %xmm1
960 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
961 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
964 ; SSE3-LABEL: shuffle_v4f32_0z2z:
966 ; SSE3-NEXT: xorps %xmm1, %xmm1
967 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
968 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
971 ; SSSE3-LABEL: shuffle_v4f32_0z2z:
973 ; SSSE3-NEXT: xorps %xmm1, %xmm1
974 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
975 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
978 ; SSE41-LABEL: shuffle_v4f32_0z2z:
980 ; SSE41-NEXT: xorps %xmm1, %xmm1
981 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
984 ; AVX-LABEL: shuffle_v4f32_0z2z:
986 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
987 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
989 %shuffle = shufflevector <4 x float> %v, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
990 ret <4 x float> %shuffle
993 define <4 x float> @shuffle_v4f32_u051(<4 x float> %a, <4 x float> %b) {
994 ; SSE-LABEL: shuffle_v4f32_u051:
996 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
997 ; SSE-NEXT: movaps %xmm1, %xmm0
1000 ; AVX-LABEL: shuffle_v4f32_u051:
1002 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1004 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 undef, i32 0, i32 5, i32 1>
1005 ret <4 x float> %shuffle
1008 define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) {
1009 ; SSE2-LABEL: shuffle_v4i32_4zzz:
1011 ; SSE2-NEXT: xorps %xmm1, %xmm1
1012 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1013 ; SSE2-NEXT: movaps %xmm1, %xmm0
1016 ; SSE3-LABEL: shuffle_v4i32_4zzz:
1018 ; SSE3-NEXT: xorps %xmm1, %xmm1
1019 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1020 ; SSE3-NEXT: movaps %xmm1, %xmm0
1023 ; SSSE3-LABEL: shuffle_v4i32_4zzz:
1025 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1026 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1027 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1030 ; SSE41-LABEL: shuffle_v4i32_4zzz:
1032 ; SSE41-NEXT: pxor %xmm1, %xmm1
1033 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1036 ; AVX-LABEL: shuffle_v4i32_4zzz:
1038 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1039 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1041 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1042 ret <4 x i32> %shuffle
1045 define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) {
1046 ; SSE2-LABEL: shuffle_v4i32_z4zz:
1048 ; SSE2-NEXT: xorps %xmm1, %xmm1
1049 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1050 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1053 ; SSE3-LABEL: shuffle_v4i32_z4zz:
1055 ; SSE3-NEXT: xorps %xmm1, %xmm1
1056 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1057 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1060 ; SSSE3-LABEL: shuffle_v4i32_z4zz:
1062 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1063 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1064 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1067 ; SSE41-LABEL: shuffle_v4i32_z4zz:
1069 ; SSE41-NEXT: pxor %xmm1, %xmm1
1070 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1071 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1074 ; AVX-LABEL: shuffle_v4i32_z4zz:
1076 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1077 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1078 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
1080 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
1081 ret <4 x i32> %shuffle
1084 define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) {
1085 ; SSE2-LABEL: shuffle_v4i32_zz4z:
1087 ; SSE2-NEXT: xorps %xmm1, %xmm1
1088 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1089 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1092 ; SSE3-LABEL: shuffle_v4i32_zz4z:
1094 ; SSE3-NEXT: xorps %xmm1, %xmm1
1095 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1096 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1099 ; SSSE3-LABEL: shuffle_v4i32_zz4z:
1101 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1102 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1103 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1106 ; SSE41-LABEL: shuffle_v4i32_zz4z:
1108 ; SSE41-NEXT: pxor %xmm1, %xmm1
1109 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1110 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1113 ; AVX-LABEL: shuffle_v4i32_zz4z:
1115 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1116 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1117 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
1119 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
1120 ret <4 x i32> %shuffle
1123 define <4 x i32> @shuffle_v4i32_zuu4(<4 x i32> %a) {
1124 ; SSE-LABEL: shuffle_v4i32_zuu4:
1126 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
1129 ; AVX-LABEL: shuffle_v4i32_zuu4:
1131 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
1133 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
1134 ret <4 x i32> %shuffle
1137 define <4 x i32> @shuffle_v4i32_z6zz(<4 x i32> %a) {
1138 ; SSE2-LABEL: shuffle_v4i32_z6zz:
1140 ; SSE2-NEXT: xorps %xmm1, %xmm1
1141 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1142 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1145 ; SSE3-LABEL: shuffle_v4i32_z6zz:
1147 ; SSE3-NEXT: xorps %xmm1, %xmm1
1148 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1149 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1152 ; SSSE3-LABEL: shuffle_v4i32_z6zz:
1154 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1155 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1156 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1159 ; SSE41-LABEL: shuffle_v4i32_z6zz:
1161 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
1162 ; SSE41-NEXT: pxor %xmm0, %xmm0
1163 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1166 ; AVX1-LABEL: shuffle_v4i32_z6zz:
1168 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1169 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1170 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1173 ; AVX2-LABEL: shuffle_v4i32_z6zz:
1175 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1176 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1177 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1179 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
1180 ret <4 x i32> %shuffle
1183 define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) {
1184 ; SSE2-LABEL: shuffle_v4i32_7012:
1186 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
1187 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
1188 ; SSE2-NEXT: movaps %xmm1, %xmm0
1191 ; SSE3-LABEL: shuffle_v4i32_7012:
1193 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
1194 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
1195 ; SSE3-NEXT: movaps %xmm1, %xmm0
1198 ; SSSE3-LABEL: shuffle_v4i32_7012:
1200 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1203 ; SSE41-LABEL: shuffle_v4i32_7012:
1205 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1208 ; AVX-LABEL: shuffle_v4i32_7012:
1210 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1212 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
1213 ret <4 x i32> %shuffle
1216 define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) {
1217 ; SSE2-LABEL: shuffle_v4i32_6701:
1219 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
1220 ; SSE2-NEXT: movapd %xmm1, %xmm0
1223 ; SSE3-LABEL: shuffle_v4i32_6701:
1225 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
1226 ; SSE3-NEXT: movapd %xmm1, %xmm0
1229 ; SSSE3-LABEL: shuffle_v4i32_6701:
1231 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1234 ; SSE41-LABEL: shuffle_v4i32_6701:
1236 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1239 ; AVX-LABEL: shuffle_v4i32_6701:
1241 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1243 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1244 ret <4 x i32> %shuffle
1247 define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) {
1248 ; SSE2-LABEL: shuffle_v4i32_5670:
1250 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1251 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
1252 ; SSE2-NEXT: movaps %xmm1, %xmm0
1255 ; SSE3-LABEL: shuffle_v4i32_5670:
1257 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1258 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
1259 ; SSE3-NEXT: movaps %xmm1, %xmm0
1262 ; SSSE3-LABEL: shuffle_v4i32_5670:
1264 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1267 ; SSE41-LABEL: shuffle_v4i32_5670:
1269 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1272 ; AVX-LABEL: shuffle_v4i32_5670:
1274 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1276 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
1277 ret <4 x i32> %shuffle
1280 define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) {
1281 ; SSE2-LABEL: shuffle_v4i32_1234:
1283 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1284 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
1287 ; SSE3-LABEL: shuffle_v4i32_1234:
1289 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1290 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
1293 ; SSSE3-LABEL: shuffle_v4i32_1234:
1295 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1296 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1299 ; SSE41-LABEL: shuffle_v4i32_1234:
1301 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1302 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1305 ; AVX-LABEL: shuffle_v4i32_1234:
1307 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1309 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
1310 ret <4 x i32> %shuffle
1313 define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
1314 ; SSE2-LABEL: shuffle_v4i32_2345:
1316 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1319 ; SSE3-LABEL: shuffle_v4i32_2345:
1321 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1324 ; SSSE3-LABEL: shuffle_v4i32_2345:
1326 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1327 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1330 ; SSE41-LABEL: shuffle_v4i32_2345:
1332 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1333 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1336 ; AVX-LABEL: shuffle_v4i32_2345:
1338 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1340 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1341 ret <4 x i32> %shuffle
1344 define <4 x i32> @shuffle_v4i32_40u1(<4 x i32> %a, <4 x i32> %b) {
1345 ; SSE-LABEL: shuffle_v4i32_40u1:
1347 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1348 ; SSE-NEXT: movdqa %xmm1, %xmm0
1351 ; AVX-LABEL: shuffle_v4i32_40u1:
1353 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1355 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 1>
1356 ret <4 x i32> %shuffle
1359 define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
1360 ; SSE2-LABEL: shuffle_v4i32_3456:
1362 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
1363 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
1366 ; SSE3-LABEL: shuffle_v4i32_3456:
1368 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
1369 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
1372 ; SSSE3-LABEL: shuffle_v4i32_3456:
1374 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1375 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1378 ; SSE41-LABEL: shuffle_v4i32_3456:
1380 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1381 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1384 ; AVX-LABEL: shuffle_v4i32_3456:
1386 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1388 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1389 ret <4 x i32> %shuffle
1392 define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
1393 ; SSE2-LABEL: shuffle_v4i32_0u1u:
1395 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1398 ; SSE3-LABEL: shuffle_v4i32_0u1u:
1400 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1403 ; SSSE3-LABEL: shuffle_v4i32_0u1u:
1405 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1408 ; SSE41-LABEL: shuffle_v4i32_0u1u:
1410 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1413 ; AVX-LABEL: shuffle_v4i32_0u1u:
1415 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1417 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
1418 ret <4 x i32> %shuffle
1421 define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
1422 ; SSE2-LABEL: shuffle_v4i32_0z1z:
1424 ; SSE2-NEXT: pxor %xmm1, %xmm1
1425 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1428 ; SSE3-LABEL: shuffle_v4i32_0z1z:
1430 ; SSE3-NEXT: pxor %xmm1, %xmm1
1431 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1434 ; SSSE3-LABEL: shuffle_v4i32_0z1z:
1436 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1437 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1440 ; SSE41-LABEL: shuffle_v4i32_0z1z:
1442 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1445 ; AVX-LABEL: shuffle_v4i32_0z1z:
1447 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1449 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
1450 ret <4 x i32> %shuffle
1453 define <4 x i32> @shuffle_v4i32_01zu(<4 x i32> %a) {
1454 ; SSE-LABEL: shuffle_v4i32_01zu:
1456 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1459 ; AVX-LABEL: shuffle_v4i32_01zu:
1461 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1463 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 7, i32 undef>
1464 ret <4 x i32> %shuffle
1467 define <4 x i32> @shuffle_v4i32_0z23(<4 x i32> %a) {
1468 ; SSE2-LABEL: shuffle_v4i32_0z23:
1470 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1473 ; SSE3-LABEL: shuffle_v4i32_0z23:
1475 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1478 ; SSSE3-LABEL: shuffle_v4i32_0z23:
1480 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1483 ; SSE41-LABEL: shuffle_v4i32_0z23:
1485 ; SSE41-NEXT: pxor %xmm1, %xmm1
1486 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1489 ; AVX1-LABEL: shuffle_v4i32_0z23:
1491 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1492 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1495 ; AVX2-LABEL: shuffle_v4i32_0z23:
1497 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1498 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
1500 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
1501 ret <4 x i32> %shuffle
1504 define <4 x i32> @shuffle_v4i32_01z3(<4 x i32> %a) {
1505 ; SSE2-LABEL: shuffle_v4i32_01z3:
1507 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1510 ; SSE3-LABEL: shuffle_v4i32_01z3:
1512 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1515 ; SSSE3-LABEL: shuffle_v4i32_01z3:
1517 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1520 ; SSE41-LABEL: shuffle_v4i32_01z3:
1522 ; SSE41-NEXT: pxor %xmm1, %xmm1
1523 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1526 ; AVX1-LABEL: shuffle_v4i32_01z3:
1528 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1529 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1532 ; AVX2-LABEL: shuffle_v4i32_01z3:
1534 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1535 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1537 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
1538 ret <4 x i32> %shuffle
1541 define <4 x i32> @shuffle_v4i32_012z(<4 x i32> %a) {
1542 ; SSE2-LABEL: shuffle_v4i32_012z:
1544 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1547 ; SSE3-LABEL: shuffle_v4i32_012z:
1549 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1552 ; SSSE3-LABEL: shuffle_v4i32_012z:
1554 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1557 ; SSE41-LABEL: shuffle_v4i32_012z:
1559 ; SSE41-NEXT: pxor %xmm1, %xmm1
1560 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
1563 ; AVX1-LABEL: shuffle_v4i32_012z:
1565 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1566 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
1569 ; AVX2-LABEL: shuffle_v4i32_012z:
1571 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1572 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
1574 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1575 ret <4 x i32> %shuffle
1578 define <4 x i32> @shuffle_v4i32_0zz3(<4 x i32> %a) {
1579 ; SSE2-LABEL: shuffle_v4i32_0zz3:
1581 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1584 ; SSE3-LABEL: shuffle_v4i32_0zz3:
1586 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1589 ; SSSE3-LABEL: shuffle_v4i32_0zz3:
1591 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1594 ; SSE41-LABEL: shuffle_v4i32_0zz3:
1596 ; SSE41-NEXT: pxor %xmm1, %xmm1
1597 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
1600 ; AVX1-LABEL: shuffle_v4i32_0zz3:
1602 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1603 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
1606 ; AVX2-LABEL: shuffle_v4i32_0zz3:
1608 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1609 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
1611 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
1612 ret <4 x i32> %shuffle
1615 define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
1616 ; SSE-LABEL: shuffle_v4i32_bitcast_0415:
1618 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1621 ; AVX-LABEL: shuffle_v4i32_bitcast_0415:
1623 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1625 %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 4>
1626 %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
1627 %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1628 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32>
1629 ret <4 x i32> %bitcast32
1632 define <4 x float> @shuffle_v4f32_bitcast_4401(<4 x float> %a, <4 x i32> %b) {
1633 ; SSE-LABEL: shuffle_v4f32_bitcast_4401:
1635 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1636 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1637 ; SSE-NEXT: movapd %xmm1, %xmm0
1640 ; AVX-LABEL: shuffle_v4f32_bitcast_4401:
1642 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1643 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1645 %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
1646 %2 = bitcast <4 x i32> %1 to <2 x double>
1647 %3 = bitcast <4 x float> %a to <2 x double>
1648 %4 = shufflevector <2 x double> %2, <2 x double> %3, <2 x i32> <i32 0, i32 2>
1649 %5 = bitcast <2 x double> %4 to <4 x float>
1653 define <4 x float> @shuffle_v4f32_bitcast_0045(<4 x float> %a, <4 x i32> %b) {
1654 ; SSE-LABEL: shuffle_v4f32_bitcast_0045:
1656 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
1659 ; AVX-LABEL: shuffle_v4f32_bitcast_0045:
1661 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
1663 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
1664 %2 = bitcast <4 x i32> %b to <4 x float>
1665 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 1, i32 0, i32 4, i32 5>
1669 define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
1670 ; SSE-LABEL: insert_reg_and_zero_v4i32:
1672 ; SSE-NEXT: movd %edi, %xmm0
1675 ; AVX-LABEL: insert_reg_and_zero_v4i32:
1677 ; AVX-NEXT: vmovd %edi, %xmm0
1679 %v = insertelement <4 x i32> undef, i32 %a, i32 0
1680 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1681 ret <4 x i32> %shuffle
1684 define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) {
1685 ; SSE-LABEL: insert_mem_and_zero_v4i32:
1687 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1690 ; AVX-LABEL: insert_mem_and_zero_v4i32:
1692 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1694 %a = load i32, i32* %ptr
1695 %v = insertelement <4 x i32> undef, i32 %a, i32 0
1696 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1697 ret <4 x i32> %shuffle
1700 define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
1701 ; SSE2-LABEL: insert_reg_and_zero_v4f32:
1703 ; SSE2-NEXT: xorps %xmm1, %xmm1
1704 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1705 ; SSE2-NEXT: movaps %xmm1, %xmm0
1708 ; SSE3-LABEL: insert_reg_and_zero_v4f32:
1710 ; SSE3-NEXT: xorps %xmm1, %xmm1
1711 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1712 ; SSE3-NEXT: movaps %xmm1, %xmm0
1715 ; SSSE3-LABEL: insert_reg_and_zero_v4f32:
1717 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1718 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1719 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1722 ; SSE41-LABEL: insert_reg_and_zero_v4f32:
1724 ; SSE41-NEXT: xorps %xmm1, %xmm1
1725 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1728 ; AVX-LABEL: insert_reg_and_zero_v4f32:
1730 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1731 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1733 %v = insertelement <4 x float> undef, float %a, i32 0
1734 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1735 ret <4 x float> %shuffle
1738 define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
1739 ; SSE-LABEL: insert_mem_and_zero_v4f32:
1741 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1744 ; AVX-LABEL: insert_mem_and_zero_v4f32:
1746 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1748 %a = load float, float* %ptr
1749 %v = insertelement <4 x float> undef, float %a, i32 0
1750 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1751 ret <4 x float> %shuffle
1754 define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) {
1755 ; SSE2-LABEL: insert_reg_lo_v4i32:
1757 ; SSE2-NEXT: movd %rdi, %xmm1
1758 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1761 ; SSE3-LABEL: insert_reg_lo_v4i32:
1763 ; SSE3-NEXT: movd %rdi, %xmm1
1764 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1767 ; SSSE3-LABEL: insert_reg_lo_v4i32:
1769 ; SSSE3-NEXT: movd %rdi, %xmm1
1770 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1773 ; SSE41-LABEL: insert_reg_lo_v4i32:
1775 ; SSE41-NEXT: movd %rdi, %xmm1
1776 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1779 ; AVX1-LABEL: insert_reg_lo_v4i32:
1781 ; AVX1-NEXT: vmovq %rdi, %xmm1
1782 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1785 ; AVX2-LABEL: insert_reg_lo_v4i32:
1787 ; AVX2-NEXT: vmovq %rdi, %xmm1
1788 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1790 %a.cast = bitcast i64 %a to <2 x i32>
1791 %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1792 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1793 ret <4 x i32> %shuffle
1796 define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
1797 ; SSE2-LABEL: insert_mem_lo_v4i32:
1799 ; SSE2-NEXT: movlpd (%rdi), %xmm0
1802 ; SSE3-LABEL: insert_mem_lo_v4i32:
1804 ; SSE3-NEXT: movlpd (%rdi), %xmm0
1807 ; SSSE3-LABEL: insert_mem_lo_v4i32:
1809 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
1812 ; SSE41-LABEL: insert_mem_lo_v4i32:
1814 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1815 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1818 ; AVX1-LABEL: insert_mem_lo_v4i32:
1820 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1821 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1824 ; AVX2-LABEL: insert_mem_lo_v4i32:
1826 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1827 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1829 %a = load <2 x i32>, <2 x i32>* %ptr
1830 %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1831 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1832 ret <4 x i32> %shuffle
1835 define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) {
1836 ; SSE-LABEL: insert_reg_hi_v4i32:
1838 ; SSE-NEXT: movd %rdi, %xmm1
1839 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1842 ; AVX-LABEL: insert_reg_hi_v4i32:
1844 ; AVX-NEXT: vmovq %rdi, %xmm1
1845 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1847 %a.cast = bitcast i64 %a to <2 x i32>
1848 %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1849 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
1850 ret <4 x i32> %shuffle
1853 define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
1854 ; SSE-LABEL: insert_mem_hi_v4i32:
1856 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1857 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1860 ; AVX-LABEL: insert_mem_hi_v4i32:
1862 ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1863 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1865 %a = load <2 x i32>, <2 x i32>* %ptr
1866 %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1867 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
1868 ret <4 x i32> %shuffle
1871 define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
1872 ; SSE-LABEL: insert_reg_lo_v4f32:
1874 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1875 ; SSE-NEXT: movapd %xmm1, %xmm0
1878 ; AVX-LABEL: insert_reg_lo_v4f32:
1880 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1882 %a.cast = bitcast double %a to <2 x float>
1883 %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1884 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1885 ret <4 x float> %shuffle
1888 define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
1889 ; SSE-LABEL: insert_mem_lo_v4f32:
1891 ; SSE-NEXT: movlpd (%rdi), %xmm0
1894 ; AVX-LABEL: insert_mem_lo_v4f32:
1896 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1898 %a = load <2 x float>, <2 x float>* %ptr
1899 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1900 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1901 ret <4 x float> %shuffle
1904 define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) {
1905 ; SSE-LABEL: insert_reg_hi_v4f32:
1907 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1908 ; SSE-NEXT: movapd %xmm1, %xmm0
1911 ; AVX-LABEL: insert_reg_hi_v4f32:
1913 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1915 %a.cast = bitcast double %a to <2 x float>
1916 %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1917 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
1918 ret <4 x float> %shuffle
1921 define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
1922 ; SSE-LABEL: insert_mem_hi_v4f32:
1924 ; SSE-NEXT: movhpd (%rdi), %xmm0
1927 ; AVX-LABEL: insert_mem_hi_v4f32:
1929 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1931 %a = load <2 x float>, <2 x float>* %ptr
1932 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1933 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
1934 ret <4 x float> %shuffle
1937 define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
1938 ; SSE-LABEL: shuffle_mem_v4f32_3210:
1940 ; SSE-NEXT: movaps (%rdi), %xmm0
1941 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1944 ; AVX-LABEL: shuffle_mem_v4f32_3210:
1946 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
1948 %a = load <4 x float>, <4 x float>* %ptr
1949 %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1950 ret <4 x float> %shuffle
1953 define <4 x i32> @insert_dup_mem_v4i32(i32* %ptr) {
1954 ; SSE-LABEL: insert_dup_mem_v4i32:
1956 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1957 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1960 ; AVX-LABEL: insert_dup_mem_v4i32:
1962 ; AVX-NEXT: vbroadcastss (%rdi), %xmm0
1964 %tmp = load i32, i32* %ptr, align 4
1965 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
1966 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
1971 ; Shuffle to logical bit shifts
1974 define <4 x i32> @shuffle_v4i32_z0zX(<4 x i32> %a) {
1975 ; SSE-LABEL: shuffle_v4i32_z0zX:
1977 ; SSE-NEXT: psllq $32, %xmm0
1980 ; AVX-LABEL: shuffle_v4i32_z0zX:
1982 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
1984 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 undef>
1985 ret <4 x i32> %shuffle
1988 define <4 x i32> @shuffle_v4i32_1z3z(<4 x i32> %a) {
1989 ; SSE-LABEL: shuffle_v4i32_1z3z:
1991 ; SSE-NEXT: psrlq $32, %xmm0
1994 ; AVX-LABEL: shuffle_v4i32_1z3z:
1996 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
1998 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
1999 ret <4 x i32> %shuffle