1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9 target triple = "x86_64-unknown-unknown"
11 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
12 ; SSE-LABEL: shuffle_v2i64_00:
14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
17 ; AVX-LABEL: shuffle_v2i64_00:
19 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
21 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
22 ret <2 x i64> %shuffle
24 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
25 ; SSE-LABEL: shuffle_v2i64_10:
27 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
30 ; AVX-LABEL: shuffle_v2i64_10:
32 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
34 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
35 ret <2 x i64> %shuffle
37 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
38 ; SSE-LABEL: shuffle_v2i64_11:
40 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
43 ; AVX-LABEL: shuffle_v2i64_11:
45 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
47 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
48 ret <2 x i64> %shuffle
50 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
51 ; SSE-LABEL: shuffle_v2i64_22:
53 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
56 ; AVX-LABEL: shuffle_v2i64_22:
58 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
60 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
61 ret <2 x i64> %shuffle
63 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
64 ; SSE-LABEL: shuffle_v2i64_32:
66 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
69 ; AVX-LABEL: shuffle_v2i64_32:
71 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
73 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
74 ret <2 x i64> %shuffle
76 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
77 ; SSE-LABEL: shuffle_v2i64_33:
79 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
82 ; AVX-LABEL: shuffle_v2i64_33:
84 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
86 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
87 ret <2 x i64> %shuffle
90 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
91 ; SSE2-LABEL: shuffle_v2f64_00:
93 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
96 ; SSE3-LABEL: shuffle_v2f64_00:
98 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
101 ; SSSE3-LABEL: shuffle_v2f64_00:
103 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
106 ; SSE41-LABEL: shuffle_v2f64_00:
108 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
111 ; AVX-LABEL: shuffle_v2f64_00:
113 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
115 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
116 ret <2 x double> %shuffle
118 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
119 ; SSE-LABEL: shuffle_v2f64_10:
121 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
124 ; AVX-LABEL: shuffle_v2f64_10:
126 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
128 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
129 ret <2 x double> %shuffle
131 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
132 ; SSE-LABEL: shuffle_v2f64_11:
134 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
137 ; AVX-LABEL: shuffle_v2f64_11:
139 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
141 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
142 ret <2 x double> %shuffle
144 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
145 ; SSE2-LABEL: shuffle_v2f64_22:
147 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
148 ; SSE2-NEXT: movaps %xmm1, %xmm0
151 ; SSE3-LABEL: shuffle_v2f64_22:
153 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
154 ; SSE3-NEXT: movapd %xmm1, %xmm0
157 ; SSSE3-LABEL: shuffle_v2f64_22:
159 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
160 ; SSSE3-NEXT: movapd %xmm1, %xmm0
163 ; SSE41-LABEL: shuffle_v2f64_22:
165 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
166 ; SSE41-NEXT: movapd %xmm1, %xmm0
169 ; AVX-LABEL: shuffle_v2f64_22:
171 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
173 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
174 ret <2 x double> %shuffle
176 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
177 ; SSE-LABEL: shuffle_v2f64_32:
179 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
182 ; AVX-LABEL: shuffle_v2f64_32:
184 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
186 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
187 ret <2 x double> %shuffle
189 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
190 ; SSE-LABEL: shuffle_v2f64_33:
192 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
193 ; SSE-NEXT: movaps %xmm1, %xmm0
196 ; AVX-LABEL: shuffle_v2f64_33:
198 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
200 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
201 ret <2 x double> %shuffle
203 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
204 ; SSE2-LABEL: shuffle_v2f64_03:
206 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
209 ; SSE3-LABEL: shuffle_v2f64_03:
211 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
214 ; SSSE3-LABEL: shuffle_v2f64_03:
216 ; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
219 ; SSE41-LABEL: shuffle_v2f64_03:
221 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
224 ; AVX-LABEL: shuffle_v2f64_03:
226 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
228 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
229 ret <2 x double> %shuffle
231 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
232 ; SSE2-LABEL: shuffle_v2f64_21:
234 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
235 ; SSE2-NEXT: movapd %xmm1, %xmm0
238 ; SSE3-LABEL: shuffle_v2f64_21:
240 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
241 ; SSE3-NEXT: movapd %xmm1, %xmm0
244 ; SSSE3-LABEL: shuffle_v2f64_21:
246 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
247 ; SSSE3-NEXT: movapd %xmm1, %xmm0
250 ; SSE41-LABEL: shuffle_v2f64_21:
252 ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
253 ; SSE41-NEXT: movapd %xmm1, %xmm0
256 ; AVX-LABEL: shuffle_v2f64_21:
258 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
260 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
261 ret <2 x double> %shuffle
265 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
266 ; SSE-LABEL: shuffle_v2i64_02:
268 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
271 ; AVX-LABEL: shuffle_v2i64_02:
273 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
275 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
276 ret <2 x i64> %shuffle
278 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
279 ; SSE-LABEL: shuffle_v2i64_02_copy:
281 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
282 ; SSE-NEXT: movdqa %xmm1, %xmm0
285 ; AVX-LABEL: shuffle_v2i64_02_copy:
287 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
289 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
290 ret <2 x i64> %shuffle
292 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
293 ; SSE2-LABEL: shuffle_v2i64_03:
295 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
298 ; SSE3-LABEL: shuffle_v2i64_03:
300 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
303 ; SSSE3-LABEL: shuffle_v2i64_03:
305 ; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
308 ; SSE41-LABEL: shuffle_v2i64_03:
310 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
313 ; AVX1-LABEL: shuffle_v2i64_03:
315 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
318 ; AVX2-LABEL: shuffle_v2i64_03:
320 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
322 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
323 ret <2 x i64> %shuffle
325 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
326 ; SSE2-LABEL: shuffle_v2i64_03_copy:
328 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
329 ; SSE2-NEXT: movapd %xmm1, %xmm0
332 ; SSE3-LABEL: shuffle_v2i64_03_copy:
334 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
335 ; SSE3-NEXT: movapd %xmm1, %xmm0
338 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
340 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
341 ; SSSE3-NEXT: movapd %xmm1, %xmm0
344 ; SSE41-LABEL: shuffle_v2i64_03_copy:
346 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
347 ; SSE41-NEXT: movdqa %xmm1, %xmm0
350 ; AVX1-LABEL: shuffle_v2i64_03_copy:
352 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
355 ; AVX2-LABEL: shuffle_v2i64_03_copy:
357 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
359 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
360 ret <2 x i64> %shuffle
362 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
363 ; SSE2-LABEL: shuffle_v2i64_12:
365 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
368 ; SSE3-LABEL: shuffle_v2i64_12:
370 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
373 ; SSSE3-LABEL: shuffle_v2i64_12:
375 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
376 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
379 ; SSE41-LABEL: shuffle_v2i64_12:
381 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
382 ; SSE41-NEXT: movdqa %xmm1, %xmm0
385 ; AVX-LABEL: shuffle_v2i64_12:
387 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
389 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
390 ret <2 x i64> %shuffle
392 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
393 ; SSE2-LABEL: shuffle_v2i64_12_copy:
395 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
396 ; SSE2-NEXT: movapd %xmm1, %xmm0
399 ; SSE3-LABEL: shuffle_v2i64_12_copy:
401 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
402 ; SSE3-NEXT: movapd %xmm1, %xmm0
405 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
407 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
408 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
411 ; SSE41-LABEL: shuffle_v2i64_12_copy:
413 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
414 ; SSE41-NEXT: movdqa %xmm2, %xmm0
417 ; AVX-LABEL: shuffle_v2i64_12_copy:
419 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
421 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
422 ret <2 x i64> %shuffle
424 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
425 ; SSE-LABEL: shuffle_v2i64_13:
427 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
430 ; AVX-LABEL: shuffle_v2i64_13:
432 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
434 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
435 ret <2 x i64> %shuffle
437 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
438 ; SSE-LABEL: shuffle_v2i64_13_copy:
440 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
441 ; SSE-NEXT: movdqa %xmm1, %xmm0
444 ; AVX-LABEL: shuffle_v2i64_13_copy:
446 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
448 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
449 ret <2 x i64> %shuffle
451 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
452 ; SSE-LABEL: shuffle_v2i64_20:
454 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
455 ; SSE-NEXT: movdqa %xmm1, %xmm0
458 ; AVX-LABEL: shuffle_v2i64_20:
460 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
462 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
463 ret <2 x i64> %shuffle
465 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
466 ; SSE-LABEL: shuffle_v2i64_20_copy:
468 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
469 ; SSE-NEXT: movdqa %xmm2, %xmm0
472 ; AVX-LABEL: shuffle_v2i64_20_copy:
474 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
476 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
477 ret <2 x i64> %shuffle
479 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
480 ; SSE2-LABEL: shuffle_v2i64_21:
482 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
483 ; SSE2-NEXT: movapd %xmm1, %xmm0
486 ; SSE3-LABEL: shuffle_v2i64_21:
488 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
489 ; SSE3-NEXT: movapd %xmm1, %xmm0
492 ; SSSE3-LABEL: shuffle_v2i64_21:
494 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
495 ; SSSE3-NEXT: movapd %xmm1, %xmm0
498 ; SSE41-LABEL: shuffle_v2i64_21:
500 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
501 ; SSE41-NEXT: movdqa %xmm1, %xmm0
504 ; AVX1-LABEL: shuffle_v2i64_21:
506 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
509 ; AVX2-LABEL: shuffle_v2i64_21:
511 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
513 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
514 ret <2 x i64> %shuffle
516 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
517 ; SSE2-LABEL: shuffle_v2i64_21_copy:
519 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
520 ; SSE2-NEXT: movapd %xmm2, %xmm0
523 ; SSE3-LABEL: shuffle_v2i64_21_copy:
525 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
526 ; SSE3-NEXT: movapd %xmm2, %xmm0
529 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
531 ; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
532 ; SSSE3-NEXT: movapd %xmm2, %xmm0
535 ; SSE41-LABEL: shuffle_v2i64_21_copy:
537 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
538 ; SSE41-NEXT: movdqa %xmm2, %xmm0
541 ; AVX1-LABEL: shuffle_v2i64_21_copy:
543 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
546 ; AVX2-LABEL: shuffle_v2i64_21_copy:
548 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
550 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
551 ret <2 x i64> %shuffle
553 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
554 ; SSE2-LABEL: shuffle_v2i64_30:
556 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
557 ; SSE2-NEXT: movapd %xmm1, %xmm0
560 ; SSE3-LABEL: shuffle_v2i64_30:
562 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
563 ; SSE3-NEXT: movapd %xmm1, %xmm0
566 ; SSSE3-LABEL: shuffle_v2i64_30:
568 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
571 ; SSE41-LABEL: shuffle_v2i64_30:
573 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
576 ; AVX-LABEL: shuffle_v2i64_30:
578 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
580 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
581 ret <2 x i64> %shuffle
583 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
584 ; SSE2-LABEL: shuffle_v2i64_30_copy:
586 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
587 ; SSE2-NEXT: movapd %xmm2, %xmm0
590 ; SSE3-LABEL: shuffle_v2i64_30_copy:
592 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
593 ; SSE3-NEXT: movapd %xmm2, %xmm0
596 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
598 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
599 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
602 ; SSE41-LABEL: shuffle_v2i64_30_copy:
604 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
605 ; SSE41-NEXT: movdqa %xmm1, %xmm0
608 ; AVX-LABEL: shuffle_v2i64_30_copy:
610 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
612 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
613 ret <2 x i64> %shuffle
615 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
616 ; SSE-LABEL: shuffle_v2i64_31:
618 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
619 ; SSE-NEXT: movdqa %xmm1, %xmm0
622 ; AVX-LABEL: shuffle_v2i64_31:
624 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
626 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
627 ret <2 x i64> %shuffle
629 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
630 ; SSE-LABEL: shuffle_v2i64_31_copy:
632 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
633 ; SSE-NEXT: movdqa %xmm2, %xmm0
636 ; AVX-LABEL: shuffle_v2i64_31_copy:
638 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
640 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
641 ret <2 x i64> %shuffle
645 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
646 ; SSE-LABEL: insert_reg_and_zero_v2i64:
648 ; SSE-NEXT: movd %rdi, %xmm0
651 ; AVX-LABEL: insert_reg_and_zero_v2i64:
653 ; AVX-NEXT: vmovq %rdi, %xmm0
655 %v = insertelement <2 x i64> undef, i64 %a, i32 0
656 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
657 ret <2 x i64> %shuffle
660 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
661 ; SSE-LABEL: insert_mem_and_zero_v2i64:
663 ; SSE-NEXT: movq (%rdi), %xmm0
666 ; AVX-LABEL: insert_mem_and_zero_v2i64:
668 ; AVX-NEXT: vmovq (%rdi), %xmm0
671 %v = insertelement <2 x i64> undef, i64 %a, i32 0
672 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
673 ret <2 x i64> %shuffle
676 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
677 ; SSE-LABEL: insert_reg_and_zero_v2f64:
679 ; SSE-NEXT: movq %xmm0, %xmm0
682 ; AVX-LABEL: insert_reg_and_zero_v2f64:
684 ; AVX-NEXT: vmovq %xmm0, %xmm0
686 %v = insertelement <2 x double> undef, double %a, i32 0
687 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
688 ret <2 x double> %shuffle
691 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
692 ; SSE-LABEL: insert_mem_and_zero_v2f64:
694 ; SSE-NEXT: movsd (%rdi), %xmm0
697 ; AVX-LABEL: insert_mem_and_zero_v2f64:
699 ; AVX-NEXT: vmovsd (%rdi), %xmm0
701 %a = load double* %ptr
702 %v = insertelement <2 x double> undef, double %a, i32 0
703 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
704 ret <2 x double> %shuffle
707 define <2 x double> @insert_dup_reg_v2f64(double %a) {
708 ; FIXME: We should match movddup for SSE3 and higher here.
710 ; SSE2-LABEL: insert_dup_reg_v2f64:
712 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
715 ; SSE3-LABEL: insert_dup_reg_v2f64:
717 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
720 ; SSSE3-LABEL: insert_dup_reg_v2f64:
722 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
725 ; SSE41-LABEL: insert_dup_reg_v2f64:
727 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
730 ; AVX-LABEL: insert_dup_reg_v2f64:
732 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
734 %v = insertelement <2 x double> undef, double %a, i32 0
735 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
736 ret <2 x double> %shuffle
738 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
739 ; SSE2-LABEL: insert_dup_mem_v2f64:
741 ; SSE2-NEXT: movsd (%rdi), %xmm0
742 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
745 ; SSE3-LABEL: insert_dup_mem_v2f64:
747 ; SSE3-NEXT: movddup (%rdi), %xmm0
750 ; SSSE3-LABEL: insert_dup_mem_v2f64:
752 ; SSSE3-NEXT: movddup (%rdi), %xmm0
755 ; SSE41-LABEL: insert_dup_mem_v2f64:
757 ; SSE41-NEXT: movddup (%rdi), %xmm0
760 ; AVX-LABEL: insert_dup_mem_v2f64:
762 ; AVX-NEXT: vmovddup (%rdi), %xmm0
764 %a = load double* %ptr
765 %v = insertelement <2 x double> undef, double %a, i32 0
766 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
767 ret <2 x double> %shuffle
770 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
771 ; SSE-LABEL: shuffle_mem_v2f64_10:
773 ; SSE-NEXT: movapd (%rdi), %xmm0
774 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
777 ; AVX-LABEL: shuffle_mem_v2f64_10:
779 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
781 %a = load <2 x double>* %ptr
782 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
783 ret <2 x double> %shuffle