1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-unknown-unknown"
12 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
13 ; SSE-LABEL: shuffle_v2i64_00:
15 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
18 ; AVX1-LABEL: shuffle_v2i64_00:
20 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
23 ; AVX2-LABEL: shuffle_v2i64_00:
25 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
28 ; AVX512VL-LABEL: shuffle_v2i64_00:
30 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0
32 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
33 ret <2 x i64> %shuffle
35 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
36 ; SSE-LABEL: shuffle_v2i64_10:
38 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
41 ; AVX-LABEL: shuffle_v2i64_10:
43 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
46 ; AVX512VL-LABEL: shuffle_v2i64_10:
48 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
50 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
51 ret <2 x i64> %shuffle
53 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
54 ; SSE-LABEL: shuffle_v2i64_11:
56 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
59 ; AVX-LABEL: shuffle_v2i64_11:
61 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
64 ; AVX512VL-LABEL: shuffle_v2i64_11:
66 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
68 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
69 ret <2 x i64> %shuffle
71 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
72 ; SSE-LABEL: shuffle_v2i64_22:
74 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
77 ; AVX1-LABEL: shuffle_v2i64_22:
79 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
82 ; AVX2-LABEL: shuffle_v2i64_22:
84 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
87 ; AVX512VL-LABEL: shuffle_v2i64_22:
89 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0
91 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
92 ret <2 x i64> %shuffle
94 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
95 ; SSE-LABEL: shuffle_v2i64_32:
97 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
100 ; AVX-LABEL: shuffle_v2i64_32:
102 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
105 ; AVX512VL-LABEL: shuffle_v2i64_32:
107 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
108 ; AVX512VL-NEXT: retq
109 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
110 ret <2 x i64> %shuffle
112 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
113 ; SSE-LABEL: shuffle_v2i64_33:
115 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
118 ; AVX-LABEL: shuffle_v2i64_33:
120 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
123 ; AVX512VL-LABEL: shuffle_v2i64_33:
125 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
126 ; AVX512VL-NEXT: retq
127 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
128 ret <2 x i64> %shuffle
131 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
132 ; SSE2-LABEL: shuffle_v2f64_00:
134 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
137 ; SSE3-LABEL: shuffle_v2f64_00:
139 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
142 ; SSSE3-LABEL: shuffle_v2f64_00:
144 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
147 ; SSE41-LABEL: shuffle_v2f64_00:
149 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
152 ; AVX-LABEL: shuffle_v2f64_00:
154 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
157 ; AVX512VL-LABEL: shuffle_v2f64_00:
159 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
160 ; AVX512VL-NEXT: retq
161 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
162 ret <2 x double> %shuffle
164 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
165 ; SSE-LABEL: shuffle_v2f64_10:
167 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
170 ; AVX-LABEL: shuffle_v2f64_10:
172 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
175 ; AVX512VL-LABEL: shuffle_v2f64_10:
177 ; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0
178 ; AVX512VL-NEXT: retq
180 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
181 ret <2 x double> %shuffle
183 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
184 ; SSE-LABEL: shuffle_v2f64_11:
186 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
189 ; AVX-LABEL: shuffle_v2f64_11:
191 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
194 ; AVX512VL-LABEL: shuffle_v2f64_11:
196 ; AVX512VL-NEXT: vmovhlps %xmm0, %xmm0, %xmm0
197 ; AVX512VL-NEXT: retq
198 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
199 ret <2 x double> %shuffle
201 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
202 ; SSE2-LABEL: shuffle_v2f64_22:
204 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
205 ; SSE2-NEXT: movaps %xmm1, %xmm0
208 ; SSE3-LABEL: shuffle_v2f64_22:
210 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
213 ; SSSE3-LABEL: shuffle_v2f64_22:
215 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
218 ; SSE41-LABEL: shuffle_v2f64_22:
220 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
223 ; AVX-LABEL: shuffle_v2f64_22:
225 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
228 ; AVX512VL-LABEL: shuffle_v2f64_22:
230 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
231 ; AVX512VL-NEXT: retq
232 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
233 ret <2 x double> %shuffle
235 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
236 ; SSE-LABEL: shuffle_v2f64_32:
238 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
239 ; SSE-NEXT: movapd %xmm1, %xmm0
242 ; AVX-LABEL: shuffle_v2f64_32:
244 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
247 ; AVX512VL-LABEL: shuffle_v2f64_32:
249 ; AVX512VL-NEXT: vpermilpd $1, %xmm1, %xmm0
250 ; AVX512VL-NEXT: retq
252 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
253 ret <2 x double> %shuffle
255 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
256 ; SSE-LABEL: shuffle_v2f64_33:
258 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
259 ; SSE-NEXT: movaps %xmm1, %xmm0
262 ; AVX-LABEL: shuffle_v2f64_33:
264 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
267 ; AVX512VL-LABEL: shuffle_v2f64_33:
269 ; AVX512VL-NEXT: vmovhlps %xmm1, %xmm1, %xmm0
270 ; AVX512VL-NEXT: retq
271 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
272 ret <2 x double> %shuffle
274 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
275 ; SSE2-LABEL: shuffle_v2f64_03:
277 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
278 ; SSE2-NEXT: movapd %xmm1, %xmm0
281 ; SSE3-LABEL: shuffle_v2f64_03:
283 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
284 ; SSE3-NEXT: movapd %xmm1, %xmm0
287 ; SSSE3-LABEL: shuffle_v2f64_03:
289 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
290 ; SSSE3-NEXT: movapd %xmm1, %xmm0
293 ; SSE41-LABEL: shuffle_v2f64_03:
295 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
298 ; AVX-LABEL: shuffle_v2f64_03:
300 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
303 ; AVX512VL-LABEL: shuffle_v2f64_03:
305 ; AVX512VL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
306 ; AVX512VL-NEXT: retq
307 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
308 ret <2 x double> %shuffle
310 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
311 ; SSE2-LABEL: shuffle_v2f64_21:
313 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
316 ; SSE3-LABEL: shuffle_v2f64_21:
318 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
321 ; SSSE3-LABEL: shuffle_v2f64_21:
323 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
326 ; SSE41-LABEL: shuffle_v2f64_21:
328 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
331 ; AVX-LABEL: shuffle_v2f64_21:
333 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
336 ; AVX512VL-LABEL: shuffle_v2f64_21:
338 ; AVX512VL-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
339 ; AVX512VL-NEXT: retq
340 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
341 ret <2 x double> %shuffle
345 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
346 ; SSE-LABEL: shuffle_v2i64_02:
348 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
351 ; AVX-LABEL: shuffle_v2i64_02:
353 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
356 ; AVX512VL-LABEL: shuffle_v2i64_02:
358 ; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0
359 ; AVX512VL-NEXT: retq
360 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
361 ret <2 x i64> %shuffle
363 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
364 ; SSE-LABEL: shuffle_v2i64_02_copy:
366 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
367 ; SSE-NEXT: movdqa %xmm1, %xmm0
370 ; AVX-LABEL: shuffle_v2i64_02_copy:
372 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
375 ; AVX512VL-LABEL: shuffle_v2i64_02_copy:
377 ; AVX512VL-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm0
378 ; AVX512VL-NEXT: retq
379 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
380 ret <2 x i64> %shuffle
382 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
383 ; SSE2-LABEL: shuffle_v2i64_03:
385 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
386 ; SSE2-NEXT: movapd %xmm1, %xmm0
389 ; SSE3-LABEL: shuffle_v2i64_03:
391 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
392 ; SSE3-NEXT: movapd %xmm1, %xmm0
395 ; SSSE3-LABEL: shuffle_v2i64_03:
397 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
398 ; SSSE3-NEXT: movapd %xmm1, %xmm0
401 ; SSE41-LABEL: shuffle_v2i64_03:
403 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
406 ; AVX1-LABEL: shuffle_v2i64_03:
408 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
411 ; AVX2-LABEL: shuffle_v2i64_03:
413 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
416 ; AVX512VL-LABEL: shuffle_v2i64_03:
418 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
419 ; AVX512VL-NEXT: retq
420 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
421 ret <2 x i64> %shuffle
423 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
424 ; SSE2-LABEL: shuffle_v2i64_03_copy:
426 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
427 ; SSE2-NEXT: movapd %xmm2, %xmm0
430 ; SSE3-LABEL: shuffle_v2i64_03_copy:
432 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
433 ; SSE3-NEXT: movapd %xmm2, %xmm0
436 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
438 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
439 ; SSSE3-NEXT: movapd %xmm2, %xmm0
442 ; SSE41-LABEL: shuffle_v2i64_03_copy:
444 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
445 ; SSE41-NEXT: movdqa %xmm1, %xmm0
448 ; AVX1-LABEL: shuffle_v2i64_03_copy:
450 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
453 ; AVX2-LABEL: shuffle_v2i64_03_copy:
455 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
458 ; AVX512VL-LABEL: shuffle_v2i64_03_copy:
460 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
461 ; AVX512VL-NEXT: retq
462 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
463 ret <2 x i64> %shuffle
465 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
466 ; SSE2-LABEL: shuffle_v2i64_12:
468 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
471 ; SSE3-LABEL: shuffle_v2i64_12:
473 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
476 ; SSSE3-LABEL: shuffle_v2i64_12:
478 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
479 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
482 ; SSE41-LABEL: shuffle_v2i64_12:
484 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
485 ; SSE41-NEXT: movdqa %xmm1, %xmm0
488 ; AVX-LABEL: shuffle_v2i64_12:
490 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
493 ; AVX512VL-LABEL: shuffle_v2i64_12:
495 ; AVX512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
496 ; AVX512VL-NEXT: retq
497 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
498 ret <2 x i64> %shuffle
500 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
501 ; SSE2-LABEL: shuffle_v2i64_12_copy:
503 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
504 ; SSE2-NEXT: movapd %xmm1, %xmm0
507 ; SSE3-LABEL: shuffle_v2i64_12_copy:
509 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
510 ; SSE3-NEXT: movapd %xmm1, %xmm0
513 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
515 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
516 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
519 ; SSE41-LABEL: shuffle_v2i64_12_copy:
521 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
522 ; SSE41-NEXT: movdqa %xmm2, %xmm0
525 ; AVX-LABEL: shuffle_v2i64_12_copy:
527 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
530 ; AVX512VL-LABEL: shuffle_v2i64_12_copy:
532 ; AVX512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
533 ; AVX512VL-NEXT: retq
534 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
535 ret <2 x i64> %shuffle
537 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
538 ; SSE-LABEL: shuffle_v2i64_13:
540 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
543 ; AVX-LABEL: shuffle_v2i64_13:
545 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
548 ; AVX512VL-LABEL: shuffle_v2i64_13:
550 ; AVX512VL-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0
551 ; AVX512VL-NEXT: retq
552 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
553 ret <2 x i64> %shuffle
555 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
556 ; SSE-LABEL: shuffle_v2i64_13_copy:
558 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
559 ; SSE-NEXT: movdqa %xmm1, %xmm0
562 ; AVX-LABEL: shuffle_v2i64_13_copy:
564 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
567 ; AVX512VL-LABEL: shuffle_v2i64_13_copy:
569 ; AVX512VL-NEXT: vpunpckhqdq %xmm2, %xmm1, %xmm0
570 ; AVX512VL-NEXT: retq
571 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
572 ret <2 x i64> %shuffle
574 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
575 ; SSE-LABEL: shuffle_v2i64_20:
577 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
578 ; SSE-NEXT: movdqa %xmm1, %xmm0
581 ; AVX-LABEL: shuffle_v2i64_20:
583 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
586 ; AVX512VL-LABEL: shuffle_v2i64_20:
588 ; AVX512VL-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0
589 ; AVX512VL-NEXT: retq
590 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
591 ret <2 x i64> %shuffle
593 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
594 ; SSE-LABEL: shuffle_v2i64_20_copy:
596 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
597 ; SSE-NEXT: movdqa %xmm2, %xmm0
600 ; AVX-LABEL: shuffle_v2i64_20_copy:
602 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
605 ; AVX512VL-LABEL: shuffle_v2i64_20_copy:
607 ; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm2, %xmm0
608 ; AVX512VL-NEXT: retq
609 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
610 ret <2 x i64> %shuffle
612 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
613 ; SSE2-LABEL: shuffle_v2i64_21:
615 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
618 ; SSE3-LABEL: shuffle_v2i64_21:
620 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
623 ; SSSE3-LABEL: shuffle_v2i64_21:
625 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
628 ; SSE41-LABEL: shuffle_v2i64_21:
630 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
633 ; AVX1-LABEL: shuffle_v2i64_21:
635 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
638 ; AVX2-LABEL: shuffle_v2i64_21:
640 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
643 ; AVX512VL-LABEL: shuffle_v2i64_21:
645 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
646 ; AVX512VL-NEXT: retq
647 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
648 ret <2 x i64> %shuffle
650 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
651 ; SSE2-LABEL: shuffle_v2i64_21_copy:
653 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
654 ; SSE2-NEXT: movapd %xmm1, %xmm0
657 ; SSE3-LABEL: shuffle_v2i64_21_copy:
659 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
660 ; SSE3-NEXT: movapd %xmm1, %xmm0
663 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
665 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
666 ; SSSE3-NEXT: movapd %xmm1, %xmm0
669 ; SSE41-LABEL: shuffle_v2i64_21_copy:
671 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
672 ; SSE41-NEXT: movdqa %xmm1, %xmm0
675 ; AVX1-LABEL: shuffle_v2i64_21_copy:
677 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
680 ; AVX2-LABEL: shuffle_v2i64_21_copy:
682 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
685 ; AVX512VL-LABEL: shuffle_v2i64_21_copy:
687 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
688 ; AVX512VL-NEXT: retq
689 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
690 ret <2 x i64> %shuffle
692 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
693 ; SSE2-LABEL: shuffle_v2i64_30:
695 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
696 ; SSE2-NEXT: movapd %xmm1, %xmm0
699 ; SSE3-LABEL: shuffle_v2i64_30:
701 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
702 ; SSE3-NEXT: movapd %xmm1, %xmm0
705 ; SSSE3-LABEL: shuffle_v2i64_30:
707 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
710 ; SSE41-LABEL: shuffle_v2i64_30:
712 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
715 ; AVX-LABEL: shuffle_v2i64_30:
717 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
720 ; AVX512VL-LABEL: shuffle_v2i64_30:
722 ; AVX512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
723 ; AVX512VL-NEXT: retq
724 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
725 ret <2 x i64> %shuffle
727 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
728 ; SSE2-LABEL: shuffle_v2i64_30_copy:
730 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
731 ; SSE2-NEXT: movapd %xmm2, %xmm0
734 ; SSE3-LABEL: shuffle_v2i64_30_copy:
736 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
737 ; SSE3-NEXT: movapd %xmm2, %xmm0
740 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
742 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
743 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
746 ; SSE41-LABEL: shuffle_v2i64_30_copy:
748 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
749 ; SSE41-NEXT: movdqa %xmm1, %xmm0
752 ; AVX-LABEL: shuffle_v2i64_30_copy:
754 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
757 ; AVX512VL-LABEL: shuffle_v2i64_30_copy:
759 ; AVX512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
760 ; AVX512VL-NEXT: retq
761 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
762 ret <2 x i64> %shuffle
764 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
765 ; SSE-LABEL: shuffle_v2i64_31:
767 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
768 ; SSE-NEXT: movdqa %xmm1, %xmm0
771 ; AVX-LABEL: shuffle_v2i64_31:
773 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
776 ; AVX512VL-LABEL: shuffle_v2i64_31:
778 ; AVX512VL-NEXT: vpunpckhqdq %xmm0, %xmm1, %xmm0
779 ; AVX512VL-NEXT: retq
780 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
781 ret <2 x i64> %shuffle
783 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
784 ; SSE-LABEL: shuffle_v2i64_31_copy:
786 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
787 ; SSE-NEXT: movdqa %xmm2, %xmm0
790 ; AVX-LABEL: shuffle_v2i64_31_copy:
792 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
795 ; AVX512VL-LABEL: shuffle_v2i64_31_copy:
797 ; AVX512VL-NEXT: vpunpckhqdq %xmm1, %xmm2, %xmm0
798 ; AVX512VL-NEXT: retq
799 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
800 ret <2 x i64> %shuffle
803 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
804 ; SSE-LABEL: shuffle_v2i64_0z:
806 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
809 ; AVX-LABEL: shuffle_v2i64_0z:
811 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
814 ; AVX512VL-LABEL: shuffle_v2i64_0z:
816 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
817 ; AVX512VL-NEXT: retq
818 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
819 ret <2 x i64> %shuffle
822 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
823 ; SSE-LABEL: shuffle_v2i64_1z:
825 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
828 ; AVX-LABEL: shuffle_v2i64_1z:
830 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
833 ; AVX512VL-LABEL: shuffle_v2i64_1z:
835 ; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
836 ; AVX512VL-NEXT: retq
837 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
838 ret <2 x i64> %shuffle
841 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
842 ; SSE-LABEL: shuffle_v2i64_z0:
844 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
847 ; AVX-LABEL: shuffle_v2i64_z0:
849 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
852 ; AVX512VL-LABEL: shuffle_v2i64_z0:
854 ; AVX512VL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
855 ; AVX512VL-NEXT: retq
856 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
857 ret <2 x i64> %shuffle
860 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
861 ; SSE2-LABEL: shuffle_v2i64_z1:
863 ; SSE2-NEXT: xorpd %xmm1, %xmm1
864 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
867 ; SSE3-LABEL: shuffle_v2i64_z1:
869 ; SSE3-NEXT: xorpd %xmm1, %xmm1
870 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
873 ; SSSE3-LABEL: shuffle_v2i64_z1:
875 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
876 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
879 ; SSE41-LABEL: shuffle_v2i64_z1:
881 ; SSE41-NEXT: pxor %xmm1, %xmm1
882 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
885 ; AVX1-LABEL: shuffle_v2i64_z1:
887 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
888 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
891 ; AVX2-LABEL: shuffle_v2i64_z1:
893 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
894 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
897 ; AVX512VL-LABEL: shuffle_v2i64_z1:
899 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
900 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
901 ; AVX512VL-NEXT: retq
902 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
903 ret <2 x i64> %shuffle
906 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
907 ; SSE-LABEL: shuffle_v2f64_0z:
909 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
912 ; AVX-LABEL: shuffle_v2f64_0z:
914 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
917 ; AVX512VL-LABEL: shuffle_v2f64_0z:
919 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
920 ; AVX512VL-NEXT: retq
921 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
922 ret <2 x double> %shuffle
925 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
926 ; SSE-LABEL: shuffle_v2f64_1z:
928 ; SSE-NEXT: xorpd %xmm1, %xmm1
929 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
932 ; AVX-LABEL: shuffle_v2f64_1z:
934 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
935 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
938 ; AVX512VL-LABEL: shuffle_v2f64_1z:
940 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
941 ; AVX512VL-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0
942 ; AVX512VL-NEXT: retq
943 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
944 ret <2 x double> %shuffle
947 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
948 ; SSE-LABEL: shuffle_v2f64_z0:
950 ; SSE-NEXT: xorpd %xmm1, %xmm1
951 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
952 ; SSE-NEXT: movapd %xmm1, %xmm0
955 ; AVX-LABEL: shuffle_v2f64_z0:
957 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
958 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
961 ; AVX512VL-LABEL: shuffle_v2f64_z0:
963 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
964 ; AVX512VL-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0
965 ; AVX512VL-NEXT: retq
966 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
967 ret <2 x double> %shuffle
970 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
971 ; SSE2-LABEL: shuffle_v2f64_z1:
973 ; SSE2-NEXT: xorpd %xmm1, %xmm1
974 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
977 ; SSE3-LABEL: shuffle_v2f64_z1:
979 ; SSE3-NEXT: xorpd %xmm1, %xmm1
980 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
983 ; SSSE3-LABEL: shuffle_v2f64_z1:
985 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
986 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
989 ; SSE41-LABEL: shuffle_v2f64_z1:
991 ; SSE41-NEXT: xorpd %xmm1, %xmm1
992 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
995 ; AVX-LABEL: shuffle_v2f64_z1:
997 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
998 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1001 ; AVX512VL-LABEL: shuffle_v2f64_z1:
1003 ; AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1004 ; AVX512VL-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1005 ; AVX512VL-NEXT: retq
1006 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
1007 ret <2 x double> %shuffle
1010 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
1011 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
1013 ; SSE-NEXT: xorpd %xmm1, %xmm1
1014 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1017 ; AVX-LABEL: shuffle_v2f64_bitcast_1z:
1019 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1020 ; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1023 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
1025 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1026 ; AVX512VL-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0
1027 ; AVX512VL-NEXT: retq
1028 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
1029 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
1030 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
1031 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
1032 ret <2 x double> %bitcast64
1035 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
1036 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
1038 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1039 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1040 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
1043 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
1045 ; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1046 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1047 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
1050 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
1052 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1053 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1054 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
1057 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
1059 ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1060 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1061 ; SSE41-NEXT: xorps %xmm1, %xmm1
1062 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1065 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
1067 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1068 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1069 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1070 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1073 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
1075 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1076 ; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1077 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
1078 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1081 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
1083 ; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm1
1084 ; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1085 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1086 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1087 ; AVX512VL-NEXT: retq
1088 %bitcast32 = bitcast <2 x i64> %x to <4 x float>
1089 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1090 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
1091 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
1095 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
1096 ; SSE-LABEL: insert_reg_and_zero_v2i64:
1098 ; SSE-NEXT: movd %rdi, %xmm0
1101 ; AVX-LABEL: insert_reg_and_zero_v2i64:
1103 ; AVX-NEXT: vmovq %rdi, %xmm0
1106 ; AVX512VL-LABEL: insert_reg_and_zero_v2i64:
1108 ; AVX512VL-NEXT: vmovq %rdi, %xmm0
1109 ; AVX512VL-NEXT: retq
1110 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1111 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1112 ret <2 x i64> %shuffle
1115 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
1116 ; SSE-LABEL: insert_mem_and_zero_v2i64:
1118 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1121 ; AVX-LABEL: insert_mem_and_zero_v2i64:
1123 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1126 ; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
1128 ; AVX512VL-NEXT: vmovq (%rdi), %xmm0
1129 ; AVX512VL-NEXT: retq
1130 %a = load i64, i64* %ptr
1131 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1132 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1133 ret <2 x i64> %shuffle
1136 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
1137 ; SSE-LABEL: insert_reg_and_zero_v2f64:
1139 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1142 ; AVX-LABEL: insert_reg_and_zero_v2f64:
1144 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1147 ; AVX512VL-LABEL: insert_reg_and_zero_v2f64:
1149 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
1150 ; AVX512VL-NEXT: retq
1151 %v = insertelement <2 x double> undef, double %a, i32 0
1152 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1153 ret <2 x double> %shuffle
1156 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
1157 ; SSE-LABEL: insert_mem_and_zero_v2f64:
1159 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1162 ; AVX-LABEL: insert_mem_and_zero_v2f64:
1164 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1167 ; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
1169 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1170 ; AVX512VL-NEXT: retq
1171 %a = load double, double* %ptr
1172 %v = insertelement <2 x double> undef, double %a, i32 0
1173 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1174 ret <2 x double> %shuffle
1177 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
1178 ; SSE2-LABEL: insert_reg_lo_v2i64:
1180 ; SSE2-NEXT: movd %rdi, %xmm1
1181 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1184 ; SSE3-LABEL: insert_reg_lo_v2i64:
1186 ; SSE3-NEXT: movd %rdi, %xmm1
1187 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1190 ; SSSE3-LABEL: insert_reg_lo_v2i64:
1192 ; SSSE3-NEXT: movd %rdi, %xmm1
1193 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1196 ; SSE41-LABEL: insert_reg_lo_v2i64:
1198 ; SSE41-NEXT: movd %rdi, %xmm1
1199 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1202 ; AVX1-LABEL: insert_reg_lo_v2i64:
1204 ; AVX1-NEXT: vmovq %rdi, %xmm1
1205 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1208 ; AVX2-LABEL: insert_reg_lo_v2i64:
1210 ; AVX2-NEXT: vmovq %rdi, %xmm1
1211 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1214 ; AVX512VL-LABEL: insert_reg_lo_v2i64:
1216 ; AVX512VL-NEXT: vmovq %rdi, %xmm1
1217 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1218 ; AVX512VL-NEXT: retq
1219 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1220 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1221 ret <2 x i64> %shuffle
1224 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
1225 ; SSE2-LABEL: insert_mem_lo_v2i64:
1227 ; SSE2-NEXT: movlpd (%rdi), %xmm0
1230 ; SSE3-LABEL: insert_mem_lo_v2i64:
1232 ; SSE3-NEXT: movlpd (%rdi), %xmm0
1235 ; SSSE3-LABEL: insert_mem_lo_v2i64:
1237 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
1240 ; SSE41-LABEL: insert_mem_lo_v2i64:
1242 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1243 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1246 ; AVX1-LABEL: insert_mem_lo_v2i64:
1248 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1249 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1252 ; AVX2-LABEL: insert_mem_lo_v2i64:
1254 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1255 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1258 ; AVX512VL-LABEL: insert_mem_lo_v2i64:
1260 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1261 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1262 ; AVX512VL-NEXT: retq
1263 %a = load i64, i64* %ptr
1264 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1265 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1266 ret <2 x i64> %shuffle
1269 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1270 ; SSE-LABEL: insert_reg_hi_v2i64:
1272 ; SSE-NEXT: movd %rdi, %xmm1
1273 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1276 ; AVX-LABEL: insert_reg_hi_v2i64:
1278 ; AVX-NEXT: vmovq %rdi, %xmm1
1279 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1282 ; AVX512VL-LABEL: insert_reg_hi_v2i64:
1284 ; AVX512VL-NEXT: vmovq %rdi, %xmm1
1285 ; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0
1286 ; AVX512VL-NEXT: retq
1287 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1288 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1289 ret <2 x i64> %shuffle
1292 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1293 ; SSE-LABEL: insert_mem_hi_v2i64:
1295 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1296 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1299 ; AVX-LABEL: insert_mem_hi_v2i64:
1301 ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1302 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1305 ; AVX512VL-LABEL: insert_mem_hi_v2i64:
1307 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1308 ; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0
1309 ; AVX512VL-NEXT: retq
1310 %a = load i64, i64* %ptr
1311 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1312 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1313 ret <2 x i64> %shuffle
1316 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1317 ; SSE-LABEL: insert_reg_lo_v2f64:
1319 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1320 ; SSE-NEXT: movapd %xmm1, %xmm0
1323 ; AVX-LABEL: insert_reg_lo_v2f64:
1325 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1328 ; AVX512VL-LABEL: insert_reg_lo_v2f64:
1330 ; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1331 ; AVX512VL-NEXT: retq
1332 %v = insertelement <2 x double> undef, double %a, i32 0
1333 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1334 ret <2 x double> %shuffle
1337 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1338 ; SSE-LABEL: insert_mem_lo_v2f64:
1340 ; SSE-NEXT: movlpd (%rdi), %xmm0
1343 ; AVX-LABEL: insert_mem_lo_v2f64:
1345 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1348 ; AVX512VL-LABEL: insert_mem_lo_v2f64:
1350 ; AVX512VL-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1351 ; AVX512VL-NEXT: retq
1352 %a = load double, double* %ptr
1353 %v = insertelement <2 x double> undef, double %a, i32 0
1354 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1355 ret <2 x double> %shuffle
1358 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1359 ; SSE-LABEL: insert_reg_hi_v2f64:
1361 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1362 ; SSE-NEXT: movapd %xmm1, %xmm0
1365 ; AVX-LABEL: insert_reg_hi_v2f64:
1367 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1370 ; AVX512VL-LABEL: insert_reg_hi_v2f64:
1372 ; AVX512VL-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0
1373 ; AVX512VL-NEXT: retq
1374 %v = insertelement <2 x double> undef, double %a, i32 0
1375 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1376 ret <2 x double> %shuffle
1379 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1380 ; SSE-LABEL: insert_mem_hi_v2f64:
1382 ; SSE-NEXT: movhpd (%rdi), %xmm0
1385 ; AVX-LABEL: insert_mem_hi_v2f64:
1387 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1390 ; AVX512VL-LABEL: insert_mem_hi_v2f64:
1392 ; AVX512VL-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1393 ; AVX512VL-NEXT: retq
1394 %a = load double, double* %ptr
1395 %v = insertelement <2 x double> undef, double %a, i32 0
1396 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1397 ret <2 x double> %shuffle
1400 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1401 ; SSE2-LABEL: insert_dup_reg_v2f64:
1403 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1406 ; SSE3-LABEL: insert_dup_reg_v2f64:
1408 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1411 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1413 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1416 ; SSE41-LABEL: insert_dup_reg_v2f64:
1418 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1421 ; AVX-LABEL: insert_dup_reg_v2f64:
1423 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1426 ; AVX512VL-LABEL: insert_dup_reg_v2f64:
1428 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1429 ; AVX512VL-NEXT: retq
1430 %v = insertelement <2 x double> undef, double %a, i32 0
1431 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1432 ret <2 x double> %shuffle
1435 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1436 ; SSE2-LABEL: insert_dup_mem_v2f64:
1438 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1439 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1442 ; SSE3-LABEL: insert_dup_mem_v2f64:
1444 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1447 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1449 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1452 ; SSE41-LABEL: insert_dup_mem_v2f64:
1454 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1457 ; AVX-LABEL: insert_dup_mem_v2f64:
1459 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1462 ; AVX512VL-LABEL: insert_dup_mem_v2f64:
1464 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1465 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1466 ; AVX512VL-NEXT: retq
1467 %a = load double, double* %ptr
1468 %v = insertelement <2 x double> undef, double %a, i32 0
1469 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1470 ret <2 x double> %shuffle
1473 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1474 ; SSE-LABEL: insert_dup_mem_v2i64:
1476 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1477 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1480 ; AVX1-LABEL: insert_dup_mem_v2i64:
1482 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1483 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1486 ; AVX2-LABEL: insert_dup_mem_v2i64:
1488 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
1491 ; AVX512VL-LABEL: insert_dup_mem_v2i64:
1493 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0
1494 ; AVX512VL-NEXT: retq
1495 %tmp = load i64, i64* %ptr, align 1
1496 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1497 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1501 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1502 ; SSE-LABEL: shuffle_mem_v2f64_10:
1504 ; SSE-NEXT: movapd (%rdi), %xmm0
1505 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1508 ; AVX-LABEL: shuffle_mem_v2f64_10:
1510 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1513 ; AVX512VL-LABEL: shuffle_mem_v2f64_10:
1515 ; AVX512VL-NEXT: vpermilpd $1, (%rdi), %xmm0
1516 ; AVX512VL-NEXT: retq
1518 %a = load <2 x double>, <2 x double>* %ptr
1519 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1520 ret <2 x double> %shuffle