1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-unknown-unknown"
12 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
13 ; SSE-LABEL: shuffle_v2i64_00:
15 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
18 ; AVX1-LABEL: shuffle_v2i64_00:
20 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
23 ; AVX2-LABEL: shuffle_v2i64_00:
25 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
28 ; AVX512VL-LABEL: shuffle_v2i64_00:
30 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0
32 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
33 ret <2 x i64> %shuffle
35 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
36 ; SSE-LABEL: shuffle_v2i64_10:
38 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
41 ; AVX-LABEL: shuffle_v2i64_10:
43 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
45 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
46 ret <2 x i64> %shuffle
48 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
49 ; SSE-LABEL: shuffle_v2i64_11:
51 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
54 ; AVX-LABEL: shuffle_v2i64_11:
56 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
58 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
59 ret <2 x i64> %shuffle
61 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
62 ; SSE-LABEL: shuffle_v2i64_22:
64 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
67 ; AVX1-LABEL: shuffle_v2i64_22:
69 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
72 ; AVX2-LABEL: shuffle_v2i64_22:
74 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
77 ; AVX512VL-LABEL: shuffle_v2i64_22:
79 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0
81 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
82 ret <2 x i64> %shuffle
84 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
85 ; SSE-LABEL: shuffle_v2i64_32:
87 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
90 ; AVX-LABEL: shuffle_v2i64_32:
92 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
94 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
95 ret <2 x i64> %shuffle
97 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
98 ; SSE-LABEL: shuffle_v2i64_33:
100 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
103 ; AVX-LABEL: shuffle_v2i64_33:
105 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
107 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
108 ret <2 x i64> %shuffle
111 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
112 ; SSE2-LABEL: shuffle_v2f64_00:
114 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
117 ; SSE3-LABEL: shuffle_v2f64_00:
119 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
122 ; SSSE3-LABEL: shuffle_v2f64_00:
124 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
127 ; SSE41-LABEL: shuffle_v2f64_00:
129 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
132 ; AVX-LABEL: shuffle_v2f64_00:
134 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
136 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
137 ret <2 x double> %shuffle
139 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
140 ; SSE-LABEL: shuffle_v2f64_10:
142 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
145 ; AVX1-LABEL: shuffle_v2f64_10:
147 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
150 ; AVX2-LABEL: shuffle_v2f64_10:
152 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
155 ; AVX512VL-LABEL: shuffle_v2f64_10:
157 ; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0
158 ; AVX512VL-NEXT: retq
160 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
161 ret <2 x double> %shuffle
163 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
164 ; SSE-LABEL: shuffle_v2f64_11:
166 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
169 ; AVX1-LABEL: shuffle_v2f64_11:
171 ; AVX1-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
174 ; AVX2-LABEL: shuffle_v2f64_11:
176 ; AVX2-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
179 ; AVX512VL-LABEL: shuffle_v2f64_11:
181 ; AVX512VL-NEXT: vmovhlps %xmm0, %xmm0, %xmm0
182 ; AVX512VL-NEXT: retq
183 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
184 ret <2 x double> %shuffle
186 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
187 ; SSE2-LABEL: shuffle_v2f64_22:
189 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
190 ; SSE2-NEXT: movaps %xmm1, %xmm0
193 ; SSE3-LABEL: shuffle_v2f64_22:
195 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
198 ; SSSE3-LABEL: shuffle_v2f64_22:
200 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
203 ; SSE41-LABEL: shuffle_v2f64_22:
205 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
208 ; AVX-LABEL: shuffle_v2f64_22:
210 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
212 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
213 ret <2 x double> %shuffle
215 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
216 ; SSE-LABEL: shuffle_v2f64_32:
218 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
219 ; SSE-NEXT: movapd %xmm1, %xmm0
222 ; AVX1-LABEL: shuffle_v2f64_32:
224 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
227 ; AVX2-LABEL: shuffle_v2f64_32:
229 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
232 ; AVX512VL-LABEL: shuffle_v2f64_32:
234 ; AVX512VL-NEXT: vpermilpd $1, %xmm1, %xmm0
235 ; AVX512VL-NEXT: retq
237 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
238 ret <2 x double> %shuffle
240 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
241 ; SSE-LABEL: shuffle_v2f64_33:
243 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
244 ; SSE-NEXT: movaps %xmm1, %xmm0
247 ; AVX1-LABEL: shuffle_v2f64_33:
249 ; AVX1-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
252 ; AVX2-LABEL: shuffle_v2f64_33:
254 ; AVX2-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
257 ; AVX512VL-LABEL: shuffle_v2f64_33:
259 ; AVX512VL-NEXT: vmovhlps %xmm1, %xmm1, %xmm0
260 ; AVX512VL-NEXT: retq
261 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
262 ret <2 x double> %shuffle
264 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
265 ; SSE2-LABEL: shuffle_v2f64_03:
267 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
268 ; SSE2-NEXT: movapd %xmm1, %xmm0
271 ; SSE3-LABEL: shuffle_v2f64_03:
273 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
274 ; SSE3-NEXT: movapd %xmm1, %xmm0
277 ; SSSE3-LABEL: shuffle_v2f64_03:
279 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
280 ; SSSE3-NEXT: movapd %xmm1, %xmm0
283 ; SSE41-LABEL: shuffle_v2f64_03:
285 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
288 ; AVX-LABEL: shuffle_v2f64_03:
290 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
292 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
293 ret <2 x double> %shuffle
295 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
296 ; SSE2-LABEL: shuffle_v2f64_21:
298 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
301 ; SSE3-LABEL: shuffle_v2f64_21:
303 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
306 ; SSSE3-LABEL: shuffle_v2f64_21:
308 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
311 ; SSE41-LABEL: shuffle_v2f64_21:
313 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
316 ; AVX-LABEL: shuffle_v2f64_21:
318 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
320 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
321 ret <2 x double> %shuffle
325 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
326 ; SSE-LABEL: shuffle_v2i64_02:
328 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
331 ; AVX1-LABEL: shuffle_v2i64_02:
333 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
336 ; AVX2-LABEL: shuffle_v2i64_02:
338 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
341 ; AVX512VL-LABEL: shuffle_v2i64_02:
343 ; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0
344 ; AVX512VL-NEXT: retq
345 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
346 ret <2 x i64> %shuffle
348 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
349 ; SSE-LABEL: shuffle_v2i64_02_copy:
351 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
352 ; SSE-NEXT: movdqa %xmm1, %xmm0
355 ; AVX1-LABEL: shuffle_v2i64_02_copy:
357 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
360 ; AVX2-LABEL: shuffle_v2i64_02_copy:
362 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
365 ; AVX512VL-LABEL: shuffle_v2i64_02_copy:
367 ; AVX512VL-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm0
368 ; AVX512VL-NEXT: retq
369 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
370 ret <2 x i64> %shuffle
372 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
373 ; SSE2-LABEL: shuffle_v2i64_03:
375 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
376 ; SSE2-NEXT: movapd %xmm1, %xmm0
379 ; SSE3-LABEL: shuffle_v2i64_03:
381 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
382 ; SSE3-NEXT: movapd %xmm1, %xmm0
385 ; SSSE3-LABEL: shuffle_v2i64_03:
387 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
388 ; SSSE3-NEXT: movapd %xmm1, %xmm0
391 ; SSE41-LABEL: shuffle_v2i64_03:
393 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
396 ; AVX1-LABEL: shuffle_v2i64_03:
398 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
401 ; AVX2-LABEL: shuffle_v2i64_03:
403 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
406 ; AVX512VL-LABEL: shuffle_v2i64_03:
408 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
409 ; AVX512VL-NEXT: retq
410 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
411 ret <2 x i64> %shuffle
413 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
414 ; SSE2-LABEL: shuffle_v2i64_03_copy:
416 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
417 ; SSE2-NEXT: movapd %xmm2, %xmm0
420 ; SSE3-LABEL: shuffle_v2i64_03_copy:
422 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
423 ; SSE3-NEXT: movapd %xmm2, %xmm0
426 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
428 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
429 ; SSSE3-NEXT: movapd %xmm2, %xmm0
432 ; SSE41-LABEL: shuffle_v2i64_03_copy:
434 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
435 ; SSE41-NEXT: movdqa %xmm1, %xmm0
438 ; AVX1-LABEL: shuffle_v2i64_03_copy:
440 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
443 ; AVX2-LABEL: shuffle_v2i64_03_copy:
445 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
448 ; AVX512VL-LABEL: shuffle_v2i64_03_copy:
450 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
451 ; AVX512VL-NEXT: retq
452 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
453 ret <2 x i64> %shuffle
455 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
456 ; SSE2-LABEL: shuffle_v2i64_12:
458 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
461 ; SSE3-LABEL: shuffle_v2i64_12:
463 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
466 ; SSSE3-LABEL: shuffle_v2i64_12:
468 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
469 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
472 ; SSE41-LABEL: shuffle_v2i64_12:
474 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
475 ; SSE41-NEXT: movdqa %xmm1, %xmm0
478 ; AVX-LABEL: shuffle_v2i64_12:
480 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
482 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
483 ret <2 x i64> %shuffle
485 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
486 ; SSE2-LABEL: shuffle_v2i64_12_copy:
488 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
489 ; SSE2-NEXT: movapd %xmm1, %xmm0
492 ; SSE3-LABEL: shuffle_v2i64_12_copy:
494 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
495 ; SSE3-NEXT: movapd %xmm1, %xmm0
498 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
500 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
501 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
504 ; SSE41-LABEL: shuffle_v2i64_12_copy:
506 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
507 ; SSE41-NEXT: movdqa %xmm2, %xmm0
510 ; AVX-LABEL: shuffle_v2i64_12_copy:
512 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
514 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
515 ret <2 x i64> %shuffle
517 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
518 ; SSE-LABEL: shuffle_v2i64_13:
520 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
523 ; AVX1-LABEL: shuffle_v2i64_13:
525 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
528 ; AVX2-LABEL: shuffle_v2i64_13:
530 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
533 ; AVX512VL-LABEL: shuffle_v2i64_13:
535 ; AVX512VL-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0
536 ; AVX512VL-NEXT: retq
537 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
538 ret <2 x i64> %shuffle
540 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
541 ; SSE-LABEL: shuffle_v2i64_13_copy:
543 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
544 ; SSE-NEXT: movdqa %xmm1, %xmm0
547 ; AVX1-LABEL: shuffle_v2i64_13_copy:
549 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
552 ; AVX2-LABEL: shuffle_v2i64_13_copy:
554 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
557 ; AVX512VL-LABEL: shuffle_v2i64_13_copy:
559 ; AVX512VL-NEXT: vpunpckhqdq %xmm2, %xmm1, %xmm0
560 ; AVX512VL-NEXT: retq
561 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
562 ret <2 x i64> %shuffle
564 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
565 ; SSE-LABEL: shuffle_v2i64_20:
567 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
568 ; SSE-NEXT: movdqa %xmm1, %xmm0
571 ; AVX1-LABEL: shuffle_v2i64_20:
573 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
576 ; AVX2-LABEL: shuffle_v2i64_20:
578 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
581 ; AVX512VL-LABEL: shuffle_v2i64_20:
583 ; AVX512VL-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0
584 ; AVX512VL-NEXT: retq
585 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
586 ret <2 x i64> %shuffle
588 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
589 ; SSE-LABEL: shuffle_v2i64_20_copy:
591 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
592 ; SSE-NEXT: movdqa %xmm2, %xmm0
595 ; AVX1-LABEL: shuffle_v2i64_20_copy:
597 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
600 ; AVX2-LABEL: shuffle_v2i64_20_copy:
602 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
605 ; AVX512VL-LABEL: shuffle_v2i64_20_copy:
607 ; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm2, %xmm0
608 ; AVX512VL-NEXT: retq
609 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
610 ret <2 x i64> %shuffle
612 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
613 ; SSE2-LABEL: shuffle_v2i64_21:
615 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
618 ; SSE3-LABEL: shuffle_v2i64_21:
620 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
623 ; SSSE3-LABEL: shuffle_v2i64_21:
625 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
628 ; SSE41-LABEL: shuffle_v2i64_21:
630 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
633 ; AVX1-LABEL: shuffle_v2i64_21:
635 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
638 ; AVX2-LABEL: shuffle_v2i64_21:
640 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
643 ; AVX512VL-LABEL: shuffle_v2i64_21:
645 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
646 ; AVX512VL-NEXT: retq
647 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
648 ret <2 x i64> %shuffle
650 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
651 ; SSE2-LABEL: shuffle_v2i64_21_copy:
653 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
654 ; SSE2-NEXT: movapd %xmm1, %xmm0
657 ; SSE3-LABEL: shuffle_v2i64_21_copy:
659 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
660 ; SSE3-NEXT: movapd %xmm1, %xmm0
663 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
665 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
666 ; SSSE3-NEXT: movapd %xmm1, %xmm0
669 ; SSE41-LABEL: shuffle_v2i64_21_copy:
671 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
672 ; SSE41-NEXT: movdqa %xmm1, %xmm0
675 ; AVX1-LABEL: shuffle_v2i64_21_copy:
677 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
680 ; AVX2-LABEL: shuffle_v2i64_21_copy:
682 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
685 ; AVX512VL-LABEL: shuffle_v2i64_21_copy:
687 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
688 ; AVX512VL-NEXT: retq
689 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
690 ret <2 x i64> %shuffle
692 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
693 ; SSE2-LABEL: shuffle_v2i64_30:
695 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
696 ; SSE2-NEXT: movapd %xmm1, %xmm0
699 ; SSE3-LABEL: shuffle_v2i64_30:
701 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
702 ; SSE3-NEXT: movapd %xmm1, %xmm0
705 ; SSSE3-LABEL: shuffle_v2i64_30:
707 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
710 ; SSE41-LABEL: shuffle_v2i64_30:
712 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
715 ; AVX-LABEL: shuffle_v2i64_30:
717 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
719 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
720 ret <2 x i64> %shuffle
722 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
723 ; SSE2-LABEL: shuffle_v2i64_30_copy:
725 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
726 ; SSE2-NEXT: movapd %xmm2, %xmm0
729 ; SSE3-LABEL: shuffle_v2i64_30_copy:
731 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
732 ; SSE3-NEXT: movapd %xmm2, %xmm0
735 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
737 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
738 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
741 ; SSE41-LABEL: shuffle_v2i64_30_copy:
743 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
744 ; SSE41-NEXT: movdqa %xmm1, %xmm0
747 ; AVX-LABEL: shuffle_v2i64_30_copy:
749 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
751 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
752 ret <2 x i64> %shuffle
754 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
755 ; SSE-LABEL: shuffle_v2i64_31:
757 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
758 ; SSE-NEXT: movdqa %xmm1, %xmm0
761 ; AVX1-LABEL: shuffle_v2i64_31:
763 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
766 ; AVX2-LABEL: shuffle_v2i64_31:
768 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
771 ; AVX512VL-LABEL: shuffle_v2i64_31:
773 ; AVX512VL-NEXT: vpunpckhqdq %xmm0, %xmm1, %xmm0
774 ; AVX512VL-NEXT: retq
775 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
776 ret <2 x i64> %shuffle
778 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
779 ; SSE-LABEL: shuffle_v2i64_31_copy:
781 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
782 ; SSE-NEXT: movdqa %xmm2, %xmm0
785 ; AVX1-LABEL: shuffle_v2i64_31_copy:
787 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
790 ; AVX2-LABEL: shuffle_v2i64_31_copy:
792 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
795 ; AVX512VL-LABEL: shuffle_v2i64_31_copy:
797 ; AVX512VL-NEXT: vpunpckhqdq %xmm1, %xmm2, %xmm0
798 ; AVX512VL-NEXT: retq
799 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
800 ret <2 x i64> %shuffle
803 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
804 ; SSE-LABEL: shuffle_v2i64_0z:
806 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
809 ; AVX1-LABEL: shuffle_v2i64_0z:
811 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
814 ; AVX2-LABEL: shuffle_v2i64_0z:
816 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
819 ; AVX512VL-LABEL: shuffle_v2i64_0z:
821 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
822 ; AVX512VL-NEXT: retq
823 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
824 ret <2 x i64> %shuffle
827 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
828 ; SSE-LABEL: shuffle_v2i64_1z:
830 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
833 ; AVX-LABEL: shuffle_v2i64_1z:
835 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
837 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
838 ret <2 x i64> %shuffle
841 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
842 ; SSE-LABEL: shuffle_v2i64_z0:
844 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
847 ; AVX-LABEL: shuffle_v2i64_z0:
849 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
851 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
852 ret <2 x i64> %shuffle
855 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
856 ; SSE2-LABEL: shuffle_v2i64_z1:
858 ; SSE2-NEXT: xorpd %xmm1, %xmm1
859 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
862 ; SSE3-LABEL: shuffle_v2i64_z1:
864 ; SSE3-NEXT: xorpd %xmm1, %xmm1
865 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
868 ; SSSE3-LABEL: shuffle_v2i64_z1:
870 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
871 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
874 ; SSE41-LABEL: shuffle_v2i64_z1:
876 ; SSE41-NEXT: pxor %xmm1, %xmm1
877 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
880 ; AVX1-LABEL: shuffle_v2i64_z1:
882 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
883 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
886 ; AVX2-LABEL: shuffle_v2i64_z1:
888 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
889 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
892 ; AVX512VL-LABEL: shuffle_v2i64_z1:
894 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
895 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
896 ; AVX512VL-NEXT: retq
897 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
898 ret <2 x i64> %shuffle
901 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
902 ; SSE-LABEL: shuffle_v2f64_0z:
904 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
907 ; AVX1-LABEL: shuffle_v2f64_0z:
909 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
912 ; AVX2-LABEL: shuffle_v2f64_0z:
914 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
917 ; AVX512VL-LABEL: shuffle_v2f64_0z:
919 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
920 ; AVX512VL-NEXT: retq
921 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
922 ret <2 x double> %shuffle
925 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
926 ; SSE-LABEL: shuffle_v2f64_1z:
928 ; SSE-NEXT: xorpd %xmm1, %xmm1
929 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
932 ; AVX1-LABEL: shuffle_v2f64_1z:
934 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
935 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
938 ; AVX2-LABEL: shuffle_v2f64_1z:
940 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
941 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
944 ; AVX512VL-LABEL: shuffle_v2f64_1z:
946 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
947 ; AVX512VL-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0
948 ; AVX512VL-NEXT: retq
949 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
950 ret <2 x double> %shuffle
953 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
954 ; SSE-LABEL: shuffle_v2f64_z0:
956 ; SSE-NEXT: xorpd %xmm1, %xmm1
957 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
958 ; SSE-NEXT: movapd %xmm1, %xmm0
961 ; AVX1-LABEL: shuffle_v2f64_z0:
963 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
964 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
967 ; AVX2-LABEL: shuffle_v2f64_z0:
969 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
970 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
973 ; AVX512VL-LABEL: shuffle_v2f64_z0:
975 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
976 ; AVX512VL-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0
977 ; AVX512VL-NEXT: retq
978 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
979 ret <2 x double> %shuffle
982 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
983 ; SSE2-LABEL: shuffle_v2f64_z1:
985 ; SSE2-NEXT: xorpd %xmm1, %xmm1
986 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
989 ; SSE3-LABEL: shuffle_v2f64_z1:
991 ; SSE3-NEXT: xorpd %xmm1, %xmm1
992 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
995 ; SSSE3-LABEL: shuffle_v2f64_z1:
997 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
998 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1001 ; SSE41-LABEL: shuffle_v2f64_z1:
1003 ; SSE41-NEXT: xorpd %xmm1, %xmm1
1004 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1007 ; AVX-LABEL: shuffle_v2f64_z1:
1009 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1010 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1012 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
1013 ret <2 x double> %shuffle
1016 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
1017 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
1019 ; SSE-NEXT: xorpd %xmm1, %xmm1
1020 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1023 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
1025 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1026 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1029 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
1031 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1032 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1035 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
1037 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1038 ; AVX512VL-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0
1039 ; AVX512VL-NEXT: retq
1040 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
1041 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
1042 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
1043 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
1044 ret <2 x double> %bitcast64
1047 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
1048 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
1050 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1051 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1052 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1055 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
1057 ; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1058 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1059 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1062 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
1064 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1065 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1066 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1069 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
1071 ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1072 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1073 ; SSE41-NEXT: xorps %xmm1, %xmm1
1074 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1077 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
1079 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1080 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1081 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1082 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1085 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
1087 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1088 ; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1089 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
1090 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1093 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
1095 ; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm1
1096 ; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1097 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1098 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1099 ; AVX512VL-NEXT: retq
1100 %bitcast32 = bitcast <2 x i64> %x to <4 x float>
1101 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1102 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
1103 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
1107 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
1108 ; SSE-LABEL: insert_reg_and_zero_v2i64:
1110 ; SSE-NEXT: movd %rdi, %xmm0
1113 ; AVX-LABEL: insert_reg_and_zero_v2i64:
1115 ; AVX-NEXT: vmovq %rdi, %xmm0
1117 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1118 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1119 ret <2 x i64> %shuffle
1122 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
1123 ; SSE-LABEL: insert_mem_and_zero_v2i64:
1125 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1128 ; AVX1-LABEL: insert_mem_and_zero_v2i64:
1130 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1133 ; AVX2-LABEL: insert_mem_and_zero_v2i64:
1135 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1138 ; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
1140 ; AVX512VL-NEXT: vmovq (%rdi), %xmm0
1141 ; AVX512VL-NEXT: retq
1142 %a = load i64, i64* %ptr
1143 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1144 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1145 ret <2 x i64> %shuffle
1148 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
1149 ; SSE-LABEL: insert_reg_and_zero_v2f64:
1151 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1154 ; AVX1-LABEL: insert_reg_and_zero_v2f64:
1156 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1159 ; AVX2-LABEL: insert_reg_and_zero_v2f64:
1161 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1164 ; AVX512VL-LABEL: insert_reg_and_zero_v2f64:
1166 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
1167 ; AVX512VL-NEXT: retq
1168 %v = insertelement <2 x double> undef, double %a, i32 0
1169 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1170 ret <2 x double> %shuffle
1173 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
1174 ; SSE-LABEL: insert_mem_and_zero_v2f64:
1176 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1179 ; AVX1-LABEL: insert_mem_and_zero_v2f64:
1181 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1184 ; AVX2-LABEL: insert_mem_and_zero_v2f64:
1186 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1189 ; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
1191 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1192 ; AVX512VL-NEXT: retq
1193 %a = load double, double* %ptr
1194 %v = insertelement <2 x double> undef, double %a, i32 0
1195 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1196 ret <2 x double> %shuffle
1199 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
1200 ; SSE2-LABEL: insert_reg_lo_v2i64:
1202 ; SSE2-NEXT: movd %rdi, %xmm1
1203 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1206 ; SSE3-LABEL: insert_reg_lo_v2i64:
1208 ; SSE3-NEXT: movd %rdi, %xmm1
1209 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1212 ; SSSE3-LABEL: insert_reg_lo_v2i64:
1214 ; SSSE3-NEXT: movd %rdi, %xmm1
1215 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1218 ; SSE41-LABEL: insert_reg_lo_v2i64:
1220 ; SSE41-NEXT: movd %rdi, %xmm1
1221 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1224 ; AVX1-LABEL: insert_reg_lo_v2i64:
1226 ; AVX1-NEXT: vmovq %rdi, %xmm1
1227 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1230 ; AVX2-LABEL: insert_reg_lo_v2i64:
1232 ; AVX2-NEXT: vmovq %rdi, %xmm1
1233 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1236 ; AVX512VL-LABEL: insert_reg_lo_v2i64:
1238 ; AVX512VL-NEXT: vmovq %rdi, %xmm1
1239 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1240 ; AVX512VL-NEXT: retq
1241 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1242 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1243 ret <2 x i64> %shuffle
1246 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
1247 ; SSE2-LABEL: insert_mem_lo_v2i64:
1249 ; SSE2-NEXT: movlpd (%rdi), %xmm0
1252 ; SSE3-LABEL: insert_mem_lo_v2i64:
1254 ; SSE3-NEXT: movlpd (%rdi), %xmm0
1257 ; SSSE3-LABEL: insert_mem_lo_v2i64:
1259 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
1262 ; SSE41-LABEL: insert_mem_lo_v2i64:
1264 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1265 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1268 ; AVX1-LABEL: insert_mem_lo_v2i64:
1270 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1271 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1274 ; AVX2-LABEL: insert_mem_lo_v2i64:
1276 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1277 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1280 ; AVX512VL-LABEL: insert_mem_lo_v2i64:
1282 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1283 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1284 ; AVX512VL-NEXT: retq
1285 %a = load i64, i64* %ptr
1286 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1287 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1288 ret <2 x i64> %shuffle
1291 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1292 ; SSE-LABEL: insert_reg_hi_v2i64:
1294 ; SSE-NEXT: movd %rdi, %xmm1
1295 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1298 ; AVX1-LABEL: insert_reg_hi_v2i64:
1300 ; AVX1-NEXT: vmovq %rdi, %xmm1
1301 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1304 ; AVX2-LABEL: insert_reg_hi_v2i64:
1306 ; AVX2-NEXT: vmovq %rdi, %xmm1
1307 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1310 ; AVX512VL-LABEL: insert_reg_hi_v2i64:
1312 ; AVX512VL-NEXT: vmovq %rdi, %xmm1
1313 ; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0
1314 ; AVX512VL-NEXT: retq
1315 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1316 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1317 ret <2 x i64> %shuffle
1320 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1321 ; SSE-LABEL: insert_mem_hi_v2i64:
1323 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1324 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1327 ; AVX1-LABEL: insert_mem_hi_v2i64:
1329 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1330 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1333 ; AVX2-LABEL: insert_mem_hi_v2i64:
1335 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1336 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1339 ; AVX512VL-LABEL: insert_mem_hi_v2i64:
1341 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1342 ; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0
1343 ; AVX512VL-NEXT: retq
1344 %a = load i64, i64* %ptr
1345 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1346 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1347 ret <2 x i64> %shuffle
1350 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1351 ; SSE-LABEL: insert_reg_lo_v2f64:
1353 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1354 ; SSE-NEXT: movapd %xmm1, %xmm0
1357 ; AVX1-LABEL: insert_reg_lo_v2f64:
1359 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1362 ; AVX2-LABEL: insert_reg_lo_v2f64:
1364 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1367 ; AVX512VL-LABEL: insert_reg_lo_v2f64:
1369 ; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1370 ; AVX512VL-NEXT: retq
1371 %v = insertelement <2 x double> undef, double %a, i32 0
1372 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1373 ret <2 x double> %shuffle
1376 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1377 ; SSE-LABEL: insert_mem_lo_v2f64:
1379 ; SSE-NEXT: movlpd (%rdi), %xmm0
1382 ; AVX-LABEL: insert_mem_lo_v2f64:
1384 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1386 %a = load double, double* %ptr
1387 %v = insertelement <2 x double> undef, double %a, i32 0
1388 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1389 ret <2 x double> %shuffle
1392 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1393 ; SSE-LABEL: insert_reg_hi_v2f64:
1395 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1396 ; SSE-NEXT: movapd %xmm1, %xmm0
1399 ; AVX1-LABEL: insert_reg_hi_v2f64:
1401 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1404 ; AVX2-LABEL: insert_reg_hi_v2f64:
1406 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1409 ; AVX512VL-LABEL: insert_reg_hi_v2f64:
1411 ; AVX512VL-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0
1412 ; AVX512VL-NEXT: retq
1413 %v = insertelement <2 x double> undef, double %a, i32 0
1414 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1415 ret <2 x double> %shuffle
1418 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1419 ; SSE-LABEL: insert_mem_hi_v2f64:
1421 ; SSE-NEXT: movhpd (%rdi), %xmm0
1424 ; AVX-LABEL: insert_mem_hi_v2f64:
1426 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1428 %a = load double, double* %ptr
1429 %v = insertelement <2 x double> undef, double %a, i32 0
1430 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1431 ret <2 x double> %shuffle
1434 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1435 ; SSE2-LABEL: insert_dup_reg_v2f64:
1437 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1440 ; SSE3-LABEL: insert_dup_reg_v2f64:
1442 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1445 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1447 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1450 ; SSE41-LABEL: insert_dup_reg_v2f64:
1452 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1455 ; AVX-LABEL: insert_dup_reg_v2f64:
1457 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1459 %v = insertelement <2 x double> undef, double %a, i32 0
1460 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1461 ret <2 x double> %shuffle
1464 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1465 ; SSE2-LABEL: insert_dup_mem_v2f64:
1467 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1468 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1471 ; SSE3-LABEL: insert_dup_mem_v2f64:
1473 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1476 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1478 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1481 ; SSE41-LABEL: insert_dup_mem_v2f64:
1483 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1486 ; AVX1-LABEL: insert_dup_mem_v2f64:
1488 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1491 ; AVX2-LABEL: insert_dup_mem_v2f64:
1493 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1496 ; AVX512VL-LABEL: insert_dup_mem_v2f64:
1498 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1499 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1500 ; AVX512VL-NEXT: retq
1501 %a = load double, double* %ptr
1502 %v = insertelement <2 x double> undef, double %a, i32 0
1503 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1504 ret <2 x double> %shuffle
1507 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1508 ; SSE-LABEL: insert_dup_mem_v2i64:
1510 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1511 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1514 ; AVX1-LABEL: insert_dup_mem_v2i64:
1516 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1517 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1520 ; AVX2-LABEL: insert_dup_mem_v2i64:
1522 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
1525 ; AVX512VL-LABEL: insert_dup_mem_v2i64:
1527 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0
1528 ; AVX512VL-NEXT: retq
1529 %tmp = load i64, i64* %ptr, align 1
1530 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1531 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1535 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1536 ; SSE-LABEL: shuffle_mem_v2f64_10:
1538 ; SSE-NEXT: movapd (%rdi), %xmm0
1539 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1542 ; AVX1-LABEL: shuffle_mem_v2f64_10:
1544 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1547 ; AVX2-LABEL: shuffle_mem_v2f64_10:
1549 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1552 ; AVX512VL-LABEL: shuffle_mem_v2f64_10:
1554 ; AVX512VL-NEXT: vpermilpd $1, (%rdi), %xmm0
1555 ; AVX512VL-NEXT: retq
1557 %a = load <2 x double>, <2 x double>* %ptr
1558 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1559 ret <2 x double> %shuffle