1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-unknown-unknown"
12 define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) {
13 ; SSE-LABEL: shuffle_v4i32_0001:
15 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
18 ; AVX-LABEL: shuffle_v4i32_0001:
20 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
22 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
23 ret <4 x i32> %shuffle
25 define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) {
26 ; SSE-LABEL: shuffle_v4i32_0020:
28 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
31 ; AVX-LABEL: shuffle_v4i32_0020:
33 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
35 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
36 ret <4 x i32> %shuffle
38 define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
39 ; SSE-LABEL: shuffle_v4i32_0112:
41 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
44 ; AVX-LABEL: shuffle_v4i32_0112:
46 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
48 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
49 ret <4 x i32> %shuffle
51 define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
52 ; SSE-LABEL: shuffle_v4i32_0300:
54 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
57 ; AVX-LABEL: shuffle_v4i32_0300:
59 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
61 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
62 ret <4 x i32> %shuffle
64 define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) {
65 ; SSE-LABEL: shuffle_v4i32_1000:
67 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
70 ; AVX-LABEL: shuffle_v4i32_1000:
72 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
74 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
75 ret <4 x i32> %shuffle
77 define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) {
78 ; SSE-LABEL: shuffle_v4i32_2200:
80 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
83 ; AVX-LABEL: shuffle_v4i32_2200:
85 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
87 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
88 ret <4 x i32> %shuffle
90 define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) {
91 ; SSE-LABEL: shuffle_v4i32_3330:
93 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
96 ; AVX-LABEL: shuffle_v4i32_3330:
98 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
100 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
101 ret <4 x i32> %shuffle
103 define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
104 ; SSE-LABEL: shuffle_v4i32_3210:
106 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
109 ; AVX-LABEL: shuffle_v4i32_3210:
111 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
113 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
114 ret <4 x i32> %shuffle
117 define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
118 ; SSE-LABEL: shuffle_v4i32_2121:
120 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
123 ; AVX-LABEL: shuffle_v4i32_2121:
125 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
127 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
128 ret <4 x i32> %shuffle
131 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
132 ; SSE-LABEL: shuffle_v4f32_0001:
134 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
137 ; AVX-LABEL: shuffle_v4f32_0001:
139 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1]
141 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
142 ret <4 x float> %shuffle
144 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
145 ; SSE-LABEL: shuffle_v4f32_0020:
147 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
150 ; AVX-LABEL: shuffle_v4f32_0020:
152 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0]
154 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
155 ret <4 x float> %shuffle
157 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
158 ; SSE-LABEL: shuffle_v4f32_0300:
160 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
163 ; AVX-LABEL: shuffle_v4f32_0300:
165 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0]
167 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
168 ret <4 x float> %shuffle
170 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
171 ; SSE-LABEL: shuffle_v4f32_1000:
173 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
176 ; AVX-LABEL: shuffle_v4f32_1000:
178 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
180 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
181 ret <4 x float> %shuffle
183 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
184 ; SSE-LABEL: shuffle_v4f32_2200:
186 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
189 ; AVX-LABEL: shuffle_v4f32_2200:
191 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0]
193 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
194 ret <4 x float> %shuffle
196 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
197 ; SSE-LABEL: shuffle_v4f32_3330:
199 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
202 ; AVX-LABEL: shuffle_v4f32_3330:
204 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0]
206 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
207 ret <4 x float> %shuffle
209 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
210 ; SSE-LABEL: shuffle_v4f32_3210:
212 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
215 ; AVX-LABEL: shuffle_v4f32_3210:
217 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
219 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
220 ret <4 x float> %shuffle
222 define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
223 ; SSE-LABEL: shuffle_v4f32_0011:
225 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
228 ; AVX-LABEL: shuffle_v4f32_0011:
230 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
232 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
233 ret <4 x float> %shuffle
235 define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
236 ; SSE-LABEL: shuffle_v4f32_2233:
238 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
241 ; AVX-LABEL: shuffle_v4f32_2233:
243 ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
245 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
246 ret <4 x float> %shuffle
248 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
249 ; SSE2-LABEL: shuffle_v4f32_0022:
251 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
254 ; SSE3-LABEL: shuffle_v4f32_0022:
256 ; SSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
259 ; SSSE3-LABEL: shuffle_v4f32_0022:
261 ; SSSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
264 ; SSE41-LABEL: shuffle_v4f32_0022:
266 ; SSE41-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
269 ; AVX-LABEL: shuffle_v4f32_0022:
271 ; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
273 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
274 ret <4 x float> %shuffle
276 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
277 ; SSE2-LABEL: shuffle_v4f32_1133:
279 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
282 ; SSE3-LABEL: shuffle_v4f32_1133:
284 ; SSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
287 ; SSSE3-LABEL: shuffle_v4f32_1133:
289 ; SSSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
292 ; SSE41-LABEL: shuffle_v4f32_1133:
294 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
297 ; AVX-LABEL: shuffle_v4f32_1133:
299 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
301 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
302 ret <4 x float> %shuffle
305 define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
306 ; SSE2-LABEL: shuffle_v4i32_0124:
308 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
309 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
312 ; SSE3-LABEL: shuffle_v4i32_0124:
314 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
315 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
318 ; SSSE3-LABEL: shuffle_v4i32_0124:
320 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
321 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
324 ; SSE41-LABEL: shuffle_v4i32_0124:
326 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
327 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
330 ; AVX1-LABEL: shuffle_v4i32_0124:
332 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
333 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
336 ; AVX2-LABEL: shuffle_v4i32_0124:
338 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
339 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
341 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
342 ret <4 x i32> %shuffle
344 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
345 ; SSE2-LABEL: shuffle_v4i32_0142:
347 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
348 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
351 ; SSE3-LABEL: shuffle_v4i32_0142:
353 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
354 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
357 ; SSSE3-LABEL: shuffle_v4i32_0142:
359 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
360 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
363 ; SSE41-LABEL: shuffle_v4i32_0142:
365 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
366 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
367 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
370 ; AVX1-LABEL: shuffle_v4i32_0142:
372 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
373 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
374 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
377 ; AVX2-LABEL: shuffle_v4i32_0142:
379 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
380 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
381 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
383 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
384 ret <4 x i32> %shuffle
386 define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) {
387 ; SSE2-LABEL: shuffle_v4i32_0412:
389 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
390 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
391 ; SSE2-NEXT: movaps %xmm1, %xmm0
394 ; SSE3-LABEL: shuffle_v4i32_0412:
396 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
397 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
398 ; SSE3-NEXT: movaps %xmm1, %xmm0
401 ; SSSE3-LABEL: shuffle_v4i32_0412:
403 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
404 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
405 ; SSSE3-NEXT: movaps %xmm1, %xmm0
408 ; SSE41-LABEL: shuffle_v4i32_0412:
410 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
411 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
412 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
415 ; AVX1-LABEL: shuffle_v4i32_0412:
417 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
418 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
419 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
422 ; AVX2-LABEL: shuffle_v4i32_0412:
424 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
425 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
426 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
428 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
429 ret <4 x i32> %shuffle
431 define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
432 ; SSE2-LABEL: shuffle_v4i32_4012:
434 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
435 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
436 ; SSE2-NEXT: movaps %xmm1, %xmm0
439 ; SSE3-LABEL: shuffle_v4i32_4012:
441 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
442 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
443 ; SSE3-NEXT: movaps %xmm1, %xmm0
446 ; SSSE3-LABEL: shuffle_v4i32_4012:
448 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
449 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
450 ; SSSE3-NEXT: movaps %xmm1, %xmm0
453 ; SSE41-LABEL: shuffle_v4i32_4012:
455 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
456 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
459 ; AVX1-LABEL: shuffle_v4i32_4012:
461 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
462 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
465 ; AVX2-LABEL: shuffle_v4i32_4012:
467 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
468 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
470 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
471 ret <4 x i32> %shuffle
473 define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
474 ; SSE-LABEL: shuffle_v4i32_0145:
476 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
479 ; AVX-LABEL: shuffle_v4i32_0145:
481 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
483 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
484 ret <4 x i32> %shuffle
486 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
487 ; SSE2-LABEL: shuffle_v4i32_0451:
489 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
490 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
493 ; SSE3-LABEL: shuffle_v4i32_0451:
495 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
496 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
499 ; SSSE3-LABEL: shuffle_v4i32_0451:
501 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
502 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
505 ; SSE41-LABEL: shuffle_v4i32_0451:
507 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
508 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
509 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
512 ; AVX1-LABEL: shuffle_v4i32_0451:
514 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
515 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
516 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
519 ; AVX2-LABEL: shuffle_v4i32_0451:
521 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
522 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
523 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
525 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
526 ret <4 x i32> %shuffle
528 define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
529 ; SSE-LABEL: shuffle_v4i32_4501:
531 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
532 ; SSE-NEXT: movdqa %xmm1, %xmm0
535 ; AVX-LABEL: shuffle_v4i32_4501:
537 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
539 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
540 ret <4 x i32> %shuffle
542 define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
543 ; SSE2-LABEL: shuffle_v4i32_4015:
545 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
546 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
549 ; SSE3-LABEL: shuffle_v4i32_4015:
551 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
552 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
555 ; SSSE3-LABEL: shuffle_v4i32_4015:
557 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
558 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
561 ; SSE41-LABEL: shuffle_v4i32_4015:
563 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
564 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
565 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
568 ; AVX1-LABEL: shuffle_v4i32_4015:
570 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
571 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
572 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
575 ; AVX2-LABEL: shuffle_v4i32_4015:
577 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
578 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
579 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
581 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
582 ret <4 x i32> %shuffle
585 define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
586 ; SSE2-LABEL: shuffle_v4f32_4zzz:
588 ; SSE2-NEXT: xorps %xmm1, %xmm1
589 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
590 ; SSE2-NEXT: movaps %xmm1, %xmm0
593 ; SSE3-LABEL: shuffle_v4f32_4zzz:
595 ; SSE3-NEXT: xorps %xmm1, %xmm1
596 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
597 ; SSE3-NEXT: movaps %xmm1, %xmm0
600 ; SSSE3-LABEL: shuffle_v4f32_4zzz:
602 ; SSSE3-NEXT: xorps %xmm1, %xmm1
603 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
604 ; SSSE3-NEXT: movaps %xmm1, %xmm0
607 ; SSE41-LABEL: shuffle_v4f32_4zzz:
609 ; SSE41-NEXT: xorps %xmm1, %xmm1
610 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
613 ; AVX-LABEL: shuffle_v4f32_4zzz:
615 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
616 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
618 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
619 ret <4 x float> %shuffle
622 define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
623 ; SSE2-LABEL: shuffle_v4f32_z4zz:
625 ; SSE2-NEXT: xorps %xmm1, %xmm1
626 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
627 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
630 ; SSE3-LABEL: shuffle_v4f32_z4zz:
632 ; SSE3-NEXT: xorps %xmm1, %xmm1
633 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
634 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
637 ; SSSE3-LABEL: shuffle_v4f32_z4zz:
639 ; SSSE3-NEXT: xorps %xmm1, %xmm1
640 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
641 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
644 ; SSE41-LABEL: shuffle_v4f32_z4zz:
646 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
649 ; AVX-LABEL: shuffle_v4f32_z4zz:
651 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
653 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
654 ret <4 x float> %shuffle
657 define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
658 ; SSE2-LABEL: shuffle_v4f32_zz4z:
660 ; SSE2-NEXT: xorps %xmm1, %xmm1
661 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
662 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
663 ; SSE2-NEXT: movaps %xmm1, %xmm0
666 ; SSE3-LABEL: shuffle_v4f32_zz4z:
668 ; SSE3-NEXT: xorps %xmm1, %xmm1
669 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
670 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
671 ; SSE3-NEXT: movaps %xmm1, %xmm0
674 ; SSSE3-LABEL: shuffle_v4f32_zz4z:
676 ; SSSE3-NEXT: xorps %xmm1, %xmm1
677 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
678 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
679 ; SSSE3-NEXT: movaps %xmm1, %xmm0
682 ; SSE41-LABEL: shuffle_v4f32_zz4z:
684 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
687 ; AVX-LABEL: shuffle_v4f32_zz4z:
689 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
691 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
692 ret <4 x float> %shuffle
695 define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
696 ; SSE2-LABEL: shuffle_v4f32_zuu4:
698 ; SSE2-NEXT: xorps %xmm1, %xmm1
699 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
700 ; SSE2-NEXT: movaps %xmm1, %xmm0
703 ; SSE3-LABEL: shuffle_v4f32_zuu4:
705 ; SSE3-NEXT: xorps %xmm1, %xmm1
706 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
707 ; SSE3-NEXT: movaps %xmm1, %xmm0
710 ; SSSE3-LABEL: shuffle_v4f32_zuu4:
712 ; SSSE3-NEXT: xorps %xmm1, %xmm1
713 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
714 ; SSSE3-NEXT: movaps %xmm1, %xmm0
717 ; SSE41-LABEL: shuffle_v4f32_zuu4:
719 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
722 ; AVX-LABEL: shuffle_v4f32_zuu4:
724 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
726 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
727 ret <4 x float> %shuffle
730 define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
731 ; SSE2-LABEL: shuffle_v4f32_zzz7:
733 ; SSE2-NEXT: xorps %xmm1, %xmm1
734 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
735 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
736 ; SSE2-NEXT: movaps %xmm1, %xmm0
739 ; SSE3-LABEL: shuffle_v4f32_zzz7:
741 ; SSE3-NEXT: xorps %xmm1, %xmm1
742 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
743 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
744 ; SSE3-NEXT: movaps %xmm1, %xmm0
747 ; SSSE3-LABEL: shuffle_v4f32_zzz7:
749 ; SSSE3-NEXT: xorps %xmm1, %xmm1
750 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
751 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
752 ; SSSE3-NEXT: movaps %xmm1, %xmm0
755 ; SSE41-LABEL: shuffle_v4f32_zzz7:
757 ; SSE41-NEXT: xorps %xmm1, %xmm1
758 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
761 ; AVX-LABEL: shuffle_v4f32_zzz7:
763 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
764 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
766 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
767 ret <4 x float> %shuffle
770 define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
771 ; SSE2-LABEL: shuffle_v4f32_z6zz:
773 ; SSE2-NEXT: xorps %xmm1, %xmm1
774 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
775 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
778 ; SSE3-LABEL: shuffle_v4f32_z6zz:
780 ; SSE3-NEXT: xorps %xmm1, %xmm1
781 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
782 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
785 ; SSSE3-LABEL: shuffle_v4f32_z6zz:
787 ; SSSE3-NEXT: xorps %xmm1, %xmm1
788 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
789 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
792 ; SSE41-LABEL: shuffle_v4f32_z6zz:
794 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
797 ; AVX-LABEL: shuffle_v4f32_z6zz:
799 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
801 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
802 ret <4 x float> %shuffle
805 define <4 x float> @shuffle_v4f32_0z23(<4 x float> %a) {
806 ; SSE2-LABEL: shuffle_v4f32_0z23:
808 ; SSE2-NEXT: xorps %xmm1, %xmm1
809 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
810 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
811 ; SSE2-NEXT: movaps %xmm1, %xmm0
814 ; SSE3-LABEL: shuffle_v4f32_0z23:
816 ; SSE3-NEXT: xorps %xmm1, %xmm1
817 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
818 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
819 ; SSE3-NEXT: movaps %xmm1, %xmm0
822 ; SSSE3-LABEL: shuffle_v4f32_0z23:
824 ; SSSE3-NEXT: xorps %xmm1, %xmm1
825 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
826 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
827 ; SSSE3-NEXT: movaps %xmm1, %xmm0
830 ; SSE41-LABEL: shuffle_v4f32_0z23:
832 ; SSE41-NEXT: xorps %xmm1, %xmm1
833 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
836 ; AVX-LABEL: shuffle_v4f32_0z23:
838 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
839 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
841 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
842 ret <4 x float> %shuffle
845 define <4 x float> @shuffle_v4f32_01z3(<4 x float> %a) {
846 ; SSE2-LABEL: shuffle_v4f32_01z3:
848 ; SSE2-NEXT: xorps %xmm1, %xmm1
849 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
850 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
853 ; SSE3-LABEL: shuffle_v4f32_01z3:
855 ; SSE3-NEXT: xorps %xmm1, %xmm1
856 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
857 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
860 ; SSSE3-LABEL: shuffle_v4f32_01z3:
862 ; SSSE3-NEXT: xorps %xmm1, %xmm1
863 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
864 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
867 ; SSE41-LABEL: shuffle_v4f32_01z3:
869 ; SSE41-NEXT: xorps %xmm1, %xmm1
870 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
873 ; AVX-LABEL: shuffle_v4f32_01z3:
875 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
876 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
878 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
879 ret <4 x float> %shuffle
882 define <4 x float> @shuffle_v4f32_012z(<4 x float> %a) {
883 ; SSE2-LABEL: shuffle_v4f32_012z:
885 ; SSE2-NEXT: xorps %xmm1, %xmm1
886 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
887 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
890 ; SSE3-LABEL: shuffle_v4f32_012z:
892 ; SSE3-NEXT: xorps %xmm1, %xmm1
893 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
894 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
897 ; SSSE3-LABEL: shuffle_v4f32_012z:
899 ; SSSE3-NEXT: xorps %xmm1, %xmm1
900 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
901 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
904 ; SSE41-LABEL: shuffle_v4f32_012z:
906 ; SSE41-NEXT: xorps %xmm1, %xmm1
907 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
910 ; AVX-LABEL: shuffle_v4f32_012z:
912 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
913 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
915 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
916 ret <4 x float> %shuffle
919 define <4 x float> @shuffle_v4f32_0zz3(<4 x float> %a) {
920 ; SSE2-LABEL: shuffle_v4f32_0zz3:
922 ; SSE2-NEXT: xorps %xmm1, %xmm1
923 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
924 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
927 ; SSE3-LABEL: shuffle_v4f32_0zz3:
929 ; SSE3-NEXT: xorps %xmm1, %xmm1
930 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
931 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
934 ; SSSE3-LABEL: shuffle_v4f32_0zz3:
936 ; SSSE3-NEXT: xorps %xmm1, %xmm1
937 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
938 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
941 ; SSE41-LABEL: shuffle_v4f32_0zz3:
943 ; SSE41-NEXT: xorps %xmm1, %xmm1
944 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
947 ; AVX-LABEL: shuffle_v4f32_0zz3:
949 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
950 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
952 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
953 ret <4 x float> %shuffle
956 define <4 x float> @shuffle_v4f32_0z2z(<4 x float> %v) {
957 ; SSE2-LABEL: shuffle_v4f32_0z2z:
959 ; SSE2-NEXT: xorps %xmm1, %xmm1
960 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
961 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
964 ; SSE3-LABEL: shuffle_v4f32_0z2z:
966 ; SSE3-NEXT: xorps %xmm1, %xmm1
967 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
968 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
971 ; SSSE3-LABEL: shuffle_v4f32_0z2z:
973 ; SSSE3-NEXT: xorps %xmm1, %xmm1
974 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
975 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
978 ; SSE41-LABEL: shuffle_v4f32_0z2z:
980 ; SSE41-NEXT: xorps %xmm1, %xmm1
981 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
984 ; AVX-LABEL: shuffle_v4f32_0z2z:
986 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
987 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
989 %shuffle = shufflevector <4 x float> %v, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
990 ret <4 x float> %shuffle
993 define <4 x float> @shuffle_v4f32_u051(<4 x float> %a, <4 x float> %b) {
994 ; SSE-LABEL: shuffle_v4f32_u051:
996 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
997 ; SSE-NEXT: movaps %xmm1, %xmm0
1000 ; AVX-LABEL: shuffle_v4f32_u051:
1002 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1004 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 undef, i32 0, i32 5, i32 1>
1005 ret <4 x float> %shuffle
1008 define <4 x float> @shuffle_v4f32_0zz4(<4 x float> %a, <4 x float> %b) {
1009 ; SSE2-LABEL: shuffle_v4f32_0zz4:
1011 ; SSE2-NEXT: xorps %xmm2, %xmm2
1012 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0]
1013 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1014 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
1015 ; SSE2-NEXT: movaps %xmm2, %xmm0
1018 ; SSE3-LABEL: shuffle_v4f32_0zz4:
1020 ; SSE3-NEXT: xorps %xmm2, %xmm2
1021 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0]
1022 ; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1023 ; SSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
1024 ; SSE3-NEXT: movaps %xmm2, %xmm0
1027 ; SSSE3-LABEL: shuffle_v4f32_0zz4:
1029 ; SSSE3-NEXT: xorps %xmm2, %xmm2
1030 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0]
1031 ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1032 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
1033 ; SSSE3-NEXT: movaps %xmm2, %xmm0
1036 ; SSE41-LABEL: shuffle_v4f32_0zz4:
1038 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = zero,zero,zero,xmm1[0]
1039 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1042 ; AVX-LABEL: shuffle_v4f32_0zz4:
1044 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = zero,zero,zero,xmm1[0]
1045 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1047 %shuffle = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <4 x i32> <i32 undef, i32 5, i32 6, i32 0>
1048 %shuffle1 = shufflevector <4 x float> %a, <4 x float> %shuffle, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1049 ret <4 x float> %shuffle1
1052 define <4 x float> @shuffle_v4f32_0zz6(<4 x float> %a, <4 x float> %b) {
1053 ; SSE2-LABEL: shuffle_v4f32_0zz6:
1055 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
1056 ; SSE2-NEXT: xorps %xmm1, %xmm1
1057 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
1058 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0,1,3]
1059 ; SSE2-NEXT: movaps %xmm1, %xmm0
1062 ; SSE3-LABEL: shuffle_v4f32_0zz6:
1064 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
1065 ; SSE3-NEXT: xorps %xmm1, %xmm1
1066 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
1067 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0,1,3]
1068 ; SSE3-NEXT: movaps %xmm1, %xmm0
1071 ; SSSE3-LABEL: shuffle_v4f32_0zz6:
1073 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
1074 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1075 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
1076 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0,1,3]
1077 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1080 ; SSE41-LABEL: shuffle_v4f32_0zz6:
1082 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2]
1083 ; SSE41-NEXT: xorps %xmm1, %xmm1
1084 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
1087 ; AVX-LABEL: shuffle_v4f32_0zz6:
1089 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2]
1090 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1091 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
1093 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 6>
1094 %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
1095 ret <4 x float> %shuffle1
1098 define <4 x float> @shuffle_v4f32_0z24(<4 x float> %a, <4 x float> %b) {
1099 ; SSE2-LABEL: shuffle_v4f32_0z24:
1101 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
1102 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
1103 ; SSE2-NEXT: xorps %xmm1, %xmm1
1104 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1105 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1106 ; SSE2-NEXT: movaps %xmm1, %xmm0
1109 ; SSE3-LABEL: shuffle_v4f32_0z24:
1111 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
1112 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
1113 ; SSE3-NEXT: xorps %xmm1, %xmm1
1114 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1115 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1116 ; SSE3-NEXT: movaps %xmm1, %xmm0
1119 ; SSSE3-LABEL: shuffle_v4f32_0z24:
1121 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
1122 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
1123 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1124 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1125 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1126 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1129 ; SSE41-LABEL: shuffle_v4f32_0z24:
1131 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
1132 ; SSE41-NEXT: xorps %xmm1, %xmm1
1133 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
1136 ; AVX-LABEL: shuffle_v4f32_0z24:
1138 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
1139 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1140 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
1142 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 undef, i32 2, i32 4>
1143 %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
1144 ret <4 x float> %shuffle1
1147 define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) {
1148 ; SSE2-LABEL: shuffle_v4i32_4zzz:
1150 ; SSE2-NEXT: xorps %xmm1, %xmm1
1151 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1152 ; SSE2-NEXT: movaps %xmm1, %xmm0
1155 ; SSE3-LABEL: shuffle_v4i32_4zzz:
1157 ; SSE3-NEXT: xorps %xmm1, %xmm1
1158 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1159 ; SSE3-NEXT: movaps %xmm1, %xmm0
1162 ; SSSE3-LABEL: shuffle_v4i32_4zzz:
1164 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1165 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1166 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1169 ; SSE41-LABEL: shuffle_v4i32_4zzz:
1171 ; SSE41-NEXT: pxor %xmm1, %xmm1
1172 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1175 ; AVX-LABEL: shuffle_v4i32_4zzz:
1177 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1178 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1180 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1181 ret <4 x i32> %shuffle
1184 define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) {
1185 ; SSE2-LABEL: shuffle_v4i32_z4zz:
1187 ; SSE2-NEXT: xorps %xmm1, %xmm1
1188 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1189 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1192 ; SSE3-LABEL: shuffle_v4i32_z4zz:
1194 ; SSE3-NEXT: xorps %xmm1, %xmm1
1195 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1196 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1199 ; SSSE3-LABEL: shuffle_v4i32_z4zz:
1201 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1202 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1203 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1206 ; SSE41-LABEL: shuffle_v4i32_z4zz:
1208 ; SSE41-NEXT: pxor %xmm1, %xmm1
1209 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1210 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
1213 ; AVX-LABEL: shuffle_v4i32_z4zz:
1215 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1216 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1217 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
1219 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
1220 ret <4 x i32> %shuffle
1223 define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) {
1224 ; SSE2-LABEL: shuffle_v4i32_zz4z:
1226 ; SSE2-NEXT: xorps %xmm1, %xmm1
1227 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1228 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1231 ; SSE3-LABEL: shuffle_v4i32_zz4z:
1233 ; SSE3-NEXT: xorps %xmm1, %xmm1
1234 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1235 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1238 ; SSSE3-LABEL: shuffle_v4i32_zz4z:
1240 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1241 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1242 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1245 ; SSE41-LABEL: shuffle_v4i32_zz4z:
1247 ; SSE41-NEXT: pxor %xmm1, %xmm1
1248 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1249 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
1252 ; AVX-LABEL: shuffle_v4i32_zz4z:
1254 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1255 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1256 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
1258 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
1259 ret <4 x i32> %shuffle
1262 define <4 x i32> @shuffle_v4i32_zuu4(<4 x i32> %a) {
1263 ; SSE-LABEL: shuffle_v4i32_zuu4:
1265 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
1268 ; AVX-LABEL: shuffle_v4i32_zuu4:
1270 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
1272 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
1273 ret <4 x i32> %shuffle
1276 define <4 x i32> @shuffle_v4i32_z6zz(<4 x i32> %a) {
1277 ; SSE2-LABEL: shuffle_v4i32_z6zz:
1279 ; SSE2-NEXT: xorps %xmm1, %xmm1
1280 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1281 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1284 ; SSE3-LABEL: shuffle_v4i32_z6zz:
1286 ; SSE3-NEXT: xorps %xmm1, %xmm1
1287 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1288 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1291 ; SSSE3-LABEL: shuffle_v4i32_z6zz:
1293 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1294 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
1295 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1298 ; SSE41-LABEL: shuffle_v4i32_z6zz:
1300 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
1301 ; SSE41-NEXT: pxor %xmm0, %xmm0
1302 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1305 ; AVX1-LABEL: shuffle_v4i32_z6zz:
1307 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1308 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1309 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1312 ; AVX2-LABEL: shuffle_v4i32_z6zz:
1314 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1315 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1316 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1318 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
1319 ret <4 x i32> %shuffle
1322 define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) {
1323 ; SSE2-LABEL: shuffle_v4i32_7012:
1325 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
1326 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
1327 ; SSE2-NEXT: movaps %xmm1, %xmm0
1330 ; SSE3-LABEL: shuffle_v4i32_7012:
1332 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
1333 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
1334 ; SSE3-NEXT: movaps %xmm1, %xmm0
1337 ; SSSE3-LABEL: shuffle_v4i32_7012:
1339 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1342 ; SSE41-LABEL: shuffle_v4i32_7012:
1344 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1347 ; AVX-LABEL: shuffle_v4i32_7012:
1349 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1351 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
1352 ret <4 x i32> %shuffle
1355 define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) {
1356 ; SSE2-LABEL: shuffle_v4i32_6701:
1358 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
1359 ; SSE2-NEXT: movapd %xmm1, %xmm0
1362 ; SSE3-LABEL: shuffle_v4i32_6701:
1364 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
1365 ; SSE3-NEXT: movapd %xmm1, %xmm0
1368 ; SSSE3-LABEL: shuffle_v4i32_6701:
1370 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1373 ; SSE41-LABEL: shuffle_v4i32_6701:
1375 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1378 ; AVX-LABEL: shuffle_v4i32_6701:
1380 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1382 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1383 ret <4 x i32> %shuffle
1386 define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) {
1387 ; SSE2-LABEL: shuffle_v4i32_5670:
1389 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1390 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
1391 ; SSE2-NEXT: movaps %xmm1, %xmm0
1394 ; SSE3-LABEL: shuffle_v4i32_5670:
1396 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1397 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
1398 ; SSE3-NEXT: movaps %xmm1, %xmm0
1401 ; SSSE3-LABEL: shuffle_v4i32_5670:
1403 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1406 ; SSE41-LABEL: shuffle_v4i32_5670:
1408 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1411 ; AVX-LABEL: shuffle_v4i32_5670:
1413 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
1415 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
1416 ret <4 x i32> %shuffle
1419 define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) {
1420 ; SSE2-LABEL: shuffle_v4i32_1234:
1422 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1423 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
1426 ; SSE3-LABEL: shuffle_v4i32_1234:
1428 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1429 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
1432 ; SSSE3-LABEL: shuffle_v4i32_1234:
1434 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1435 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1438 ; SSE41-LABEL: shuffle_v4i32_1234:
1440 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1441 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1444 ; AVX-LABEL: shuffle_v4i32_1234:
1446 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
1448 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
1449 ret <4 x i32> %shuffle
1452 define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
1453 ; SSE2-LABEL: shuffle_v4i32_2345:
1455 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1458 ; SSE3-LABEL: shuffle_v4i32_2345:
1460 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1463 ; SSSE3-LABEL: shuffle_v4i32_2345:
1465 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1466 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1469 ; SSE41-LABEL: shuffle_v4i32_2345:
1471 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1472 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1475 ; AVX-LABEL: shuffle_v4i32_2345:
1477 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
1479 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1480 ret <4 x i32> %shuffle
1483 define <4 x i32> @shuffle_v4i32_40u1(<4 x i32> %a, <4 x i32> %b) {
1484 ; SSE-LABEL: shuffle_v4i32_40u1:
1486 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1487 ; SSE-NEXT: movdqa %xmm1, %xmm0
1490 ; AVX-LABEL: shuffle_v4i32_40u1:
1492 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1494 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 1>
1495 ret <4 x i32> %shuffle
1498 define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
1499 ; SSE2-LABEL: shuffle_v4i32_3456:
1501 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
1502 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
1505 ; SSE3-LABEL: shuffle_v4i32_3456:
1507 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
1508 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
1511 ; SSSE3-LABEL: shuffle_v4i32_3456:
1513 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1514 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1517 ; SSE41-LABEL: shuffle_v4i32_3456:
1519 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1520 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1523 ; AVX-LABEL: shuffle_v4i32_3456:
1525 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
1527 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1528 ret <4 x i32> %shuffle
1531 define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
1532 ; SSE2-LABEL: shuffle_v4i32_0u1u:
1534 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1537 ; SSE3-LABEL: shuffle_v4i32_0u1u:
1539 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1542 ; SSSE3-LABEL: shuffle_v4i32_0u1u:
1544 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1547 ; SSE41-LABEL: shuffle_v4i32_0u1u:
1549 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1552 ; AVX-LABEL: shuffle_v4i32_0u1u:
1554 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1556 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
1557 ret <4 x i32> %shuffle
1560 define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
1561 ; SSE2-LABEL: shuffle_v4i32_0z1z:
1563 ; SSE2-NEXT: pxor %xmm1, %xmm1
1564 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1567 ; SSE3-LABEL: shuffle_v4i32_0z1z:
1569 ; SSE3-NEXT: pxor %xmm1, %xmm1
1570 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1573 ; SSSE3-LABEL: shuffle_v4i32_0z1z:
1575 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1576 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1579 ; SSE41-LABEL: shuffle_v4i32_0z1z:
1581 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1584 ; AVX-LABEL: shuffle_v4i32_0z1z:
1586 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1588 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
1589 ret <4 x i32> %shuffle
1592 define <4 x i32> @shuffle_v4i32_01zu(<4 x i32> %a) {
1593 ; SSE-LABEL: shuffle_v4i32_01zu:
1595 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1598 ; AVX-LABEL: shuffle_v4i32_01zu:
1600 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1602 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 7, i32 undef>
1603 ret <4 x i32> %shuffle
1606 define <4 x i32> @shuffle_v4i32_0z23(<4 x i32> %a) {
1607 ; SSE2-LABEL: shuffle_v4i32_0z23:
1609 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1612 ; SSE3-LABEL: shuffle_v4i32_0z23:
1614 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1617 ; SSSE3-LABEL: shuffle_v4i32_0z23:
1619 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1622 ; SSE41-LABEL: shuffle_v4i32_0z23:
1624 ; SSE41-NEXT: pxor %xmm1, %xmm1
1625 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1628 ; AVX1-LABEL: shuffle_v4i32_0z23:
1630 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1631 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1634 ; AVX2-LABEL: shuffle_v4i32_0z23:
1636 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1637 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
1639 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
1640 ret <4 x i32> %shuffle
1643 define <4 x i32> @shuffle_v4i32_01z3(<4 x i32> %a) {
1644 ; SSE2-LABEL: shuffle_v4i32_01z3:
1646 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1649 ; SSE3-LABEL: shuffle_v4i32_01z3:
1651 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1654 ; SSSE3-LABEL: shuffle_v4i32_01z3:
1656 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1659 ; SSE41-LABEL: shuffle_v4i32_01z3:
1661 ; SSE41-NEXT: pxor %xmm1, %xmm1
1662 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1665 ; AVX1-LABEL: shuffle_v4i32_01z3:
1667 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1668 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1671 ; AVX2-LABEL: shuffle_v4i32_01z3:
1673 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1674 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1676 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
1677 ret <4 x i32> %shuffle
1680 define <4 x i32> @shuffle_v4i32_012z(<4 x i32> %a) {
1681 ; SSE2-LABEL: shuffle_v4i32_012z:
1683 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1686 ; SSE3-LABEL: shuffle_v4i32_012z:
1688 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1691 ; SSSE3-LABEL: shuffle_v4i32_012z:
1693 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1696 ; SSE41-LABEL: shuffle_v4i32_012z:
1698 ; SSE41-NEXT: pxor %xmm1, %xmm1
1699 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
1702 ; AVX1-LABEL: shuffle_v4i32_012z:
1704 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1705 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
1708 ; AVX2-LABEL: shuffle_v4i32_012z:
1710 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1711 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
1713 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1714 ret <4 x i32> %shuffle
1717 define <4 x i32> @shuffle_v4i32_0zz3(<4 x i32> %a) {
1718 ; SSE2-LABEL: shuffle_v4i32_0zz3:
1720 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1723 ; SSE3-LABEL: shuffle_v4i32_0zz3:
1725 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1728 ; SSSE3-LABEL: shuffle_v4i32_0zz3:
1730 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1733 ; SSE41-LABEL: shuffle_v4i32_0zz3:
1735 ; SSE41-NEXT: pxor %xmm1, %xmm1
1736 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
1739 ; AVX1-LABEL: shuffle_v4i32_0zz3:
1741 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1742 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
1745 ; AVX2-LABEL: shuffle_v4i32_0zz3:
1747 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1748 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
1750 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
1751 ret <4 x i32> %shuffle
1754 define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
1755 ; SSE-LABEL: shuffle_v4i32_bitcast_0415:
1757 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1760 ; AVX-LABEL: shuffle_v4i32_bitcast_0415:
1762 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1764 %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 4>
1765 %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
1766 %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1767 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32>
1768 ret <4 x i32> %bitcast32
1771 define <4 x float> @shuffle_v4f32_bitcast_4401(<4 x float> %a, <4 x i32> %b) {
1772 ; SSE-LABEL: shuffle_v4f32_bitcast_4401:
1774 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1775 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1776 ; SSE-NEXT: movapd %xmm1, %xmm0
1779 ; AVX-LABEL: shuffle_v4f32_bitcast_4401:
1781 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1782 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1784 %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
1785 %2 = bitcast <4 x i32> %1 to <2 x double>
1786 %3 = bitcast <4 x float> %a to <2 x double>
1787 %4 = shufflevector <2 x double> %2, <2 x double> %3, <2 x i32> <i32 0, i32 2>
1788 %5 = bitcast <2 x double> %4 to <4 x float>
1792 define <4 x float> @shuffle_v4f32_bitcast_0045(<4 x float> %a, <4 x i32> %b) {
1793 ; SSE-LABEL: shuffle_v4f32_bitcast_0045:
1795 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
1798 ; AVX-LABEL: shuffle_v4f32_bitcast_0045:
1800 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
1802 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
1803 %2 = bitcast <4 x i32> %b to <4 x float>
1804 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 1, i32 0, i32 4, i32 5>
1808 define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
1809 ; SSE-LABEL: insert_reg_and_zero_v4i32:
1811 ; SSE-NEXT: movd %edi, %xmm0
1814 ; AVX-LABEL: insert_reg_and_zero_v4i32:
1816 ; AVX-NEXT: vmovd %edi, %xmm0
1818 %v = insertelement <4 x i32> undef, i32 %a, i32 0
1819 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1820 ret <4 x i32> %shuffle
1823 define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) {
1824 ; SSE-LABEL: insert_mem_and_zero_v4i32:
1826 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1829 ; AVX-LABEL: insert_mem_and_zero_v4i32:
1831 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1833 %a = load i32, i32* %ptr
1834 %v = insertelement <4 x i32> undef, i32 %a, i32 0
1835 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1836 ret <4 x i32> %shuffle
1839 define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
1840 ; SSE2-LABEL: insert_reg_and_zero_v4f32:
1842 ; SSE2-NEXT: xorps %xmm1, %xmm1
1843 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1844 ; SSE2-NEXT: movaps %xmm1, %xmm0
1847 ; SSE3-LABEL: insert_reg_and_zero_v4f32:
1849 ; SSE3-NEXT: xorps %xmm1, %xmm1
1850 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1851 ; SSE3-NEXT: movaps %xmm1, %xmm0
1854 ; SSSE3-LABEL: insert_reg_and_zero_v4f32:
1856 ; SSSE3-NEXT: xorps %xmm1, %xmm1
1857 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1858 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1861 ; SSE41-LABEL: insert_reg_and_zero_v4f32:
1863 ; SSE41-NEXT: xorps %xmm1, %xmm1
1864 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1867 ; AVX-LABEL: insert_reg_and_zero_v4f32:
1869 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1870 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1872 %v = insertelement <4 x float> undef, float %a, i32 0
1873 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1874 ret <4 x float> %shuffle
1877 define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
1878 ; SSE-LABEL: insert_mem_and_zero_v4f32:
1880 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1883 ; AVX-LABEL: insert_mem_and_zero_v4f32:
1885 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1887 %a = load float, float* %ptr
1888 %v = insertelement <4 x float> undef, float %a, i32 0
1889 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1890 ret <4 x float> %shuffle
1893 define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) {
1894 ; SSE2-LABEL: insert_reg_lo_v4i32:
1896 ; SSE2-NEXT: movd %rdi, %xmm1
1897 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1900 ; SSE3-LABEL: insert_reg_lo_v4i32:
1902 ; SSE3-NEXT: movd %rdi, %xmm1
1903 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1906 ; SSSE3-LABEL: insert_reg_lo_v4i32:
1908 ; SSSE3-NEXT: movd %rdi, %xmm1
1909 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1912 ; SSE41-LABEL: insert_reg_lo_v4i32:
1914 ; SSE41-NEXT: movd %rdi, %xmm1
1915 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1918 ; AVX1-LABEL: insert_reg_lo_v4i32:
1920 ; AVX1-NEXT: vmovq %rdi, %xmm1
1921 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1924 ; AVX2-LABEL: insert_reg_lo_v4i32:
1926 ; AVX2-NEXT: vmovq %rdi, %xmm1
1927 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1929 %a.cast = bitcast i64 %a to <2 x i32>
1930 %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1931 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1932 ret <4 x i32> %shuffle
1935 define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
1936 ; SSE2-LABEL: insert_mem_lo_v4i32:
1938 ; SSE2-NEXT: movlpd (%rdi), %xmm0
1941 ; SSE3-LABEL: insert_mem_lo_v4i32:
1943 ; SSE3-NEXT: movlpd (%rdi), %xmm0
1946 ; SSSE3-LABEL: insert_mem_lo_v4i32:
1948 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
1951 ; SSE41-LABEL: insert_mem_lo_v4i32:
1953 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1954 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1957 ; AVX1-LABEL: insert_mem_lo_v4i32:
1959 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1960 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1963 ; AVX2-LABEL: insert_mem_lo_v4i32:
1965 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1966 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1968 %a = load <2 x i32>, <2 x i32>* %ptr
1969 %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1970 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1971 ret <4 x i32> %shuffle
1974 define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) {
1975 ; SSE-LABEL: insert_reg_hi_v4i32:
1977 ; SSE-NEXT: movd %rdi, %xmm1
1978 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1981 ; AVX-LABEL: insert_reg_hi_v4i32:
1983 ; AVX-NEXT: vmovq %rdi, %xmm1
1984 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1986 %a.cast = bitcast i64 %a to <2 x i32>
1987 %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1988 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
1989 ret <4 x i32> %shuffle
1992 define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
1993 ; SSE-LABEL: insert_mem_hi_v4i32:
1995 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1996 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1999 ; AVX-LABEL: insert_mem_hi_v4i32:
2001 ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
2002 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2004 %a = load <2 x i32>, <2 x i32>* %ptr
2005 %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2006 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
2007 ret <4 x i32> %shuffle
2010 define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
2011 ; SSE-LABEL: insert_reg_lo_v4f32:
2013 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2014 ; SSE-NEXT: movapd %xmm1, %xmm0
2017 ; AVX-LABEL: insert_reg_lo_v4f32:
2019 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2021 %a.cast = bitcast double %a to <2 x float>
2022 %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2023 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
2024 ret <4 x float> %shuffle
2027 define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
2028 ; SSE-LABEL: insert_mem_lo_v4f32:
2030 ; SSE-NEXT: movlpd (%rdi), %xmm0
2033 ; AVX-LABEL: insert_mem_lo_v4f32:
2035 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
2037 %a = load <2 x float>, <2 x float>* %ptr
2038 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2039 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
2040 ret <4 x float> %shuffle
2043 define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) {
2044 ; SSE-LABEL: insert_reg_hi_v4f32:
2046 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2047 ; SSE-NEXT: movapd %xmm1, %xmm0
2050 ; AVX-LABEL: insert_reg_hi_v4f32:
2052 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2054 %a.cast = bitcast double %a to <2 x float>
2055 %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2056 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
2057 ret <4 x float> %shuffle
2060 define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
2061 ; SSE-LABEL: insert_mem_hi_v4f32:
2063 ; SSE-NEXT: movhpd (%rdi), %xmm0
2066 ; AVX-LABEL: insert_mem_hi_v4f32:
2068 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
2070 %a = load <2 x float>, <2 x float>* %ptr
2071 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2072 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
2073 ret <4 x float> %shuffle
2076 define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
2077 ; SSE-LABEL: shuffle_mem_v4f32_3210:
2079 ; SSE-NEXT: movaps (%rdi), %xmm0
2080 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2083 ; AVX-LABEL: shuffle_mem_v4f32_3210:
2085 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
2087 %a = load <4 x float>, <4 x float>* %ptr
2088 %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2089 ret <4 x float> %shuffle
2092 define <4 x i32> @insert_dup_mem_v4i32(i32* %ptr) {
2093 ; SSE-LABEL: insert_dup_mem_v4i32:
2095 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2096 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2099 ; AVX-LABEL: insert_dup_mem_v4i32:
2101 ; AVX-NEXT: vbroadcastss (%rdi), %xmm0
2103 %tmp = load i32, i32* %ptr, align 4
2104 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2105 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
2110 ; Shuffle to logical bit shifts
2113 define <4 x i32> @shuffle_v4i32_z0zX(<4 x i32> %a) {
2114 ; SSE-LABEL: shuffle_v4i32_z0zX:
2116 ; SSE-NEXT: psllq $32, %xmm0
2119 ; AVX-LABEL: shuffle_v4i32_z0zX:
2121 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
2123 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 undef>
2124 ret <4 x i32> %shuffle
2127 define <4 x i32> @shuffle_v4i32_1z3z(<4 x i32> %a) {
2128 ; SSE-LABEL: shuffle_v4i32_1z3z:
2130 ; SSE-NEXT: psrlq $32, %xmm0
2133 ; AVX-LABEL: shuffle_v4i32_1z3z:
2135 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
2137 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
2138 ret <4 x i32> %shuffle