1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8 target triple = "x86_64-unknown-unknown"
10 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
11 ; SSE-LABEL: shuffle_v2i64_00:
13 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
16 ; AVX-LABEL: shuffle_v2i64_00:
18 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
20 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
21 ret <2 x i64> %shuffle
23 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
24 ; SSE-LABEL: shuffle_v2i64_10:
26 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
29 ; AVX-LABEL: shuffle_v2i64_10:
31 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
33 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
34 ret <2 x i64> %shuffle
36 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
37 ; SSE-LABEL: shuffle_v2i64_11:
39 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
42 ; AVX-LABEL: shuffle_v2i64_11:
44 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
46 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
47 ret <2 x i64> %shuffle
49 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
50 ; SSE-LABEL: shuffle_v2i64_22:
52 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
55 ; AVX-LABEL: shuffle_v2i64_22:
57 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
59 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
60 ret <2 x i64> %shuffle
62 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
63 ; SSE-LABEL: shuffle_v2i64_32:
65 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
68 ; AVX-LABEL: shuffle_v2i64_32:
70 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
72 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
73 ret <2 x i64> %shuffle
75 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
76 ; SSE-LABEL: shuffle_v2i64_33:
78 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
81 ; AVX-LABEL: shuffle_v2i64_33:
83 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
85 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
86 ret <2 x i64> %shuffle
89 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
90 ; SSE2-LABEL: shuffle_v2f64_00:
92 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
95 ; SSE3-LABEL: shuffle_v2f64_00:
97 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
100 ; SSSE3-LABEL: shuffle_v2f64_00:
102 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
105 ; SSE41-LABEL: shuffle_v2f64_00:
107 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
110 ; AVX-LABEL: shuffle_v2f64_00:
112 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
114 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
115 ret <2 x double> %shuffle
117 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
118 ; SSE-LABEL: shuffle_v2f64_10:
120 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
123 ; AVX-LABEL: shuffle_v2f64_10:
125 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
127 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
128 ret <2 x double> %shuffle
130 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
131 ; SSE-LABEL: shuffle_v2f64_11:
133 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
136 ; AVX-LABEL: shuffle_v2f64_11:
138 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
140 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
141 ret <2 x double> %shuffle
143 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
144 ; SSE2-LABEL: shuffle_v2f64_22:
146 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
147 ; SSE2-NEXT: movaps %xmm1, %xmm0
150 ; SSE3-LABEL: shuffle_v2f64_22:
152 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
153 ; SSE3-NEXT: movapd %xmm1, %xmm0
156 ; SSSE3-LABEL: shuffle_v2f64_22:
158 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
159 ; SSSE3-NEXT: movapd %xmm1, %xmm0
162 ; SSE41-LABEL: shuffle_v2f64_22:
164 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
165 ; SSE41-NEXT: movapd %xmm1, %xmm0
168 ; AVX-LABEL: shuffle_v2f64_22:
170 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
172 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
173 ret <2 x double> %shuffle
175 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
176 ; SSE-LABEL: shuffle_v2f64_32:
178 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
181 ; AVX-LABEL: shuffle_v2f64_32:
183 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
185 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
186 ret <2 x double> %shuffle
188 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
189 ; SSE-LABEL: shuffle_v2f64_33:
191 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
192 ; SSE-NEXT: movaps %xmm1, %xmm0
195 ; AVX-LABEL: shuffle_v2f64_33:
197 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
199 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
200 ret <2 x double> %shuffle
202 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
203 ; SSE2-LABEL: shuffle_v2f64_03:
205 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
208 ; SSE3-LABEL: shuffle_v2f64_03:
210 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
213 ; SSSE3-LABEL: shuffle_v2f64_03:
215 ; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
218 ; SSE41-LABEL: shuffle_v2f64_03:
220 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
223 ; AVX-LABEL: shuffle_v2f64_03:
225 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
227 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
228 ret <2 x double> %shuffle
230 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
231 ; SSE2-LABEL: shuffle_v2f64_21:
233 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
234 ; SSE2-NEXT: movapd %xmm1, %xmm0
237 ; SSE3-LABEL: shuffle_v2f64_21:
239 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
240 ; SSE3-NEXT: movapd %xmm1, %xmm0
243 ; SSSE3-LABEL: shuffle_v2f64_21:
245 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
246 ; SSSE3-NEXT: movapd %xmm1, %xmm0
249 ; SSE41-LABEL: shuffle_v2f64_21:
251 ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
252 ; SSE41-NEXT: movapd %xmm1, %xmm0
255 ; AVX-LABEL: shuffle_v2f64_21:
257 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
259 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
260 ret <2 x double> %shuffle
264 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
265 ; SSE-LABEL: shuffle_v2i64_02:
267 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
270 ; AVX-LABEL: shuffle_v2i64_02:
272 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
274 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
275 ret <2 x i64> %shuffle
277 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
278 ; SSE-LABEL: shuffle_v2i64_02_copy:
280 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
281 ; SSE-NEXT: movdqa %xmm1, %xmm0
284 ; AVX-LABEL: shuffle_v2i64_02_copy:
286 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
288 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
289 ret <2 x i64> %shuffle
291 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
292 ; SSE2-LABEL: shuffle_v2i64_03:
294 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
297 ; SSE3-LABEL: shuffle_v2i64_03:
299 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
302 ; SSSE3-LABEL: shuffle_v2i64_03:
304 ; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
307 ; SSE41-LABEL: shuffle_v2i64_03:
309 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
312 ; AVX-LABEL: shuffle_v2i64_03:
314 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
316 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
317 ret <2 x i64> %shuffle
319 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
320 ; SSE2-LABEL: shuffle_v2i64_03_copy:
322 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
323 ; SSE2-NEXT: movapd %xmm1, %xmm0
326 ; SSE3-LABEL: shuffle_v2i64_03_copy:
328 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
329 ; SSE3-NEXT: movapd %xmm1, %xmm0
332 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
334 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
335 ; SSSE3-NEXT: movapd %xmm1, %xmm0
338 ; SSE41-LABEL: shuffle_v2i64_03_copy:
340 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
341 ; SSE41-NEXT: movdqa %xmm1, %xmm0
344 ; AVX-LABEL: shuffle_v2i64_03_copy:
346 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
348 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
349 ret <2 x i64> %shuffle
351 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
352 ; SSE2-LABEL: shuffle_v2i64_12:
354 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
357 ; SSE3-LABEL: shuffle_v2i64_12:
359 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
362 ; SSSE3-LABEL: shuffle_v2i64_12:
364 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
365 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
368 ; SSE41-LABEL: shuffle_v2i64_12:
370 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
371 ; SSE41-NEXT: movdqa %xmm1, %xmm0
374 ; AVX-LABEL: shuffle_v2i64_12:
376 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
378 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
379 ret <2 x i64> %shuffle
381 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
382 ; SSE2-LABEL: shuffle_v2i64_12_copy:
384 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
385 ; SSE2-NEXT: movapd %xmm1, %xmm0
388 ; SSE3-LABEL: shuffle_v2i64_12_copy:
390 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
391 ; SSE3-NEXT: movapd %xmm1, %xmm0
394 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
396 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
397 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
400 ; SSE41-LABEL: shuffle_v2i64_12_copy:
402 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
403 ; SSE41-NEXT: movdqa %xmm2, %xmm0
406 ; AVX-LABEL: shuffle_v2i64_12_copy:
408 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
410 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
411 ret <2 x i64> %shuffle
413 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
414 ; SSE-LABEL: shuffle_v2i64_13:
416 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
419 ; AVX-LABEL: shuffle_v2i64_13:
421 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
423 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
424 ret <2 x i64> %shuffle
426 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
427 ; SSE-LABEL: shuffle_v2i64_13_copy:
429 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
430 ; SSE-NEXT: movdqa %xmm1, %xmm0
433 ; AVX-LABEL: shuffle_v2i64_13_copy:
435 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
437 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
438 ret <2 x i64> %shuffle
440 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
441 ; SSE-LABEL: shuffle_v2i64_20:
443 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
444 ; SSE-NEXT: movdqa %xmm1, %xmm0
447 ; AVX-LABEL: shuffle_v2i64_20:
449 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
451 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
452 ret <2 x i64> %shuffle
454 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
455 ; SSE-LABEL: shuffle_v2i64_20_copy:
457 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
458 ; SSE-NEXT: movdqa %xmm2, %xmm0
461 ; AVX-LABEL: shuffle_v2i64_20_copy:
463 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
465 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
466 ret <2 x i64> %shuffle
468 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
469 ; SSE2-LABEL: shuffle_v2i64_21:
471 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
472 ; SSE2-NEXT: movapd %xmm1, %xmm0
475 ; SSE3-LABEL: shuffle_v2i64_21:
477 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
478 ; SSE3-NEXT: movapd %xmm1, %xmm0
481 ; SSSE3-LABEL: shuffle_v2i64_21:
483 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
484 ; SSSE3-NEXT: movapd %xmm1, %xmm0
487 ; SSE41-LABEL: shuffle_v2i64_21:
489 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
490 ; SSE41-NEXT: movdqa %xmm1, %xmm0
493 ; AVX-LABEL: shuffle_v2i64_21:
495 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
497 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
498 ret <2 x i64> %shuffle
500 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
501 ; SSE2-LABEL: shuffle_v2i64_21_copy:
503 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
504 ; SSE2-NEXT: movapd %xmm2, %xmm0
507 ; SSE3-LABEL: shuffle_v2i64_21_copy:
509 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
510 ; SSE3-NEXT: movapd %xmm2, %xmm0
513 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
515 ; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
516 ; SSSE3-NEXT: movapd %xmm2, %xmm0
519 ; SSE41-LABEL: shuffle_v2i64_21_copy:
521 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
522 ; SSE41-NEXT: movdqa %xmm2, %xmm0
525 ; AVX-LABEL: shuffle_v2i64_21_copy:
527 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
529 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
530 ret <2 x i64> %shuffle
532 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
533 ; SSE2-LABEL: shuffle_v2i64_30:
535 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
536 ; SSE2-NEXT: movapd %xmm1, %xmm0
539 ; SSE3-LABEL: shuffle_v2i64_30:
541 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
542 ; SSE3-NEXT: movapd %xmm1, %xmm0
545 ; SSSE3-LABEL: shuffle_v2i64_30:
547 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
550 ; SSE41-LABEL: shuffle_v2i64_30:
552 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
555 ; AVX-LABEL: shuffle_v2i64_30:
557 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
559 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
560 ret <2 x i64> %shuffle
562 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
563 ; SSE2-LABEL: shuffle_v2i64_30_copy:
565 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
566 ; SSE2-NEXT: movapd %xmm2, %xmm0
569 ; SSE3-LABEL: shuffle_v2i64_30_copy:
571 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
572 ; SSE3-NEXT: movapd %xmm2, %xmm0
575 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
577 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
578 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
581 ; SSE41-LABEL: shuffle_v2i64_30_copy:
583 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
584 ; SSE41-NEXT: movdqa %xmm1, %xmm0
587 ; AVX-LABEL: shuffle_v2i64_30_copy:
589 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
591 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
592 ret <2 x i64> %shuffle
594 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
595 ; SSE-LABEL: shuffle_v2i64_31:
597 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
598 ; SSE-NEXT: movdqa %xmm1, %xmm0
601 ; AVX-LABEL: shuffle_v2i64_31:
603 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
605 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
606 ret <2 x i64> %shuffle
608 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
609 ; SSE-LABEL: shuffle_v2i64_31_copy:
611 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
612 ; SSE-NEXT: movdqa %xmm2, %xmm0
615 ; AVX-LABEL: shuffle_v2i64_31_copy:
617 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
619 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
620 ret <2 x i64> %shuffle
624 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
625 ; SSE-LABEL: insert_reg_and_zero_v2i64:
627 ; SSE-NEXT: movd %rdi, %xmm0
630 ; AVX-LABEL: insert_reg_and_zero_v2i64:
632 ; AVX-NEXT: vmovq %rdi, %xmm0
634 %v = insertelement <2 x i64> undef, i64 %a, i32 0
635 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
636 ret <2 x i64> %shuffle
639 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
640 ; SSE-LABEL: insert_mem_and_zero_v2i64:
642 ; SSE-NEXT: movq (%rdi), %xmm0
645 ; AVX-LABEL: insert_mem_and_zero_v2i64:
647 ; AVX-NEXT: vmovq (%rdi), %xmm0
650 %v = insertelement <2 x i64> undef, i64 %a, i32 0
651 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
652 ret <2 x i64> %shuffle
655 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
656 ; SSE-LABEL: insert_reg_and_zero_v2f64:
658 ; SSE-NEXT: movq %xmm0, %xmm0
661 ; AVX-LABEL: insert_reg_and_zero_v2f64:
663 ; AVX-NEXT: vmovq %xmm0, %xmm0
665 %v = insertelement <2 x double> undef, double %a, i32 0
666 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
667 ret <2 x double> %shuffle
670 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
671 ; SSE-LABEL: insert_mem_and_zero_v2f64:
673 ; SSE-NEXT: movsd (%rdi), %xmm0
676 ; AVX-LABEL: insert_mem_and_zero_v2f64:
678 ; AVX-NEXT: vmovsd (%rdi), %xmm0
680 %a = load double* %ptr
681 %v = insertelement <2 x double> undef, double %a, i32 0
682 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
683 ret <2 x double> %shuffle
686 define <2 x double> @insert_dup_reg_v2f64(double %a) {
687 ; FIXME: We should match movddup for SSE3 and higher here.
689 ; SSE2-LABEL: insert_dup_reg_v2f64:
691 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
694 ; SSE3-LABEL: insert_dup_reg_v2f64:
696 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
699 ; SSSE3-LABEL: insert_dup_reg_v2f64:
701 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
704 ; SSE41-LABEL: insert_dup_reg_v2f64:
706 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
709 ; AVX-LABEL: insert_dup_reg_v2f64:
711 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
713 %v = insertelement <2 x double> undef, double %a, i32 0
714 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
715 ret <2 x double> %shuffle
717 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
718 ; SSE2-LABEL: insert_dup_mem_v2f64:
720 ; SSE2-NEXT: movsd (%rdi), %xmm0
721 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
724 ; SSE3-LABEL: insert_dup_mem_v2f64:
726 ; SSE3-NEXT: movddup (%rdi), %xmm0
729 ; SSSE3-LABEL: insert_dup_mem_v2f64:
731 ; SSSE3-NEXT: movddup (%rdi), %xmm0
734 ; SSE41-LABEL: insert_dup_mem_v2f64:
736 ; SSE41-NEXT: movddup (%rdi), %xmm0
739 ; AVX-LABEL: insert_dup_mem_v2f64:
741 ; AVX-NEXT: vmovddup (%rdi), %xmm0
743 %a = load double* %ptr
744 %v = insertelement <2 x double> undef, double %a, i32 0
745 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
746 ret <2 x double> %shuffle
749 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
750 ; SSE-LABEL: shuffle_mem_v2f64_10:
752 ; SSE-NEXT: movapd (%rdi), %xmm0
753 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
756 ; AVX-LABEL: shuffle_mem_v2f64_10:
758 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
760 %a = load <2 x double>* %ptr
761 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
762 ret <2 x double> %shuffle