1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
10 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11 target triple = "x86_64-unknown-unknown"
13 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
14 ; SSE-LABEL: shuffle_v2i64_00:
16 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
19 ; AVX1-LABEL: shuffle_v2i64_00:
21 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
24 ; AVX2-LABEL: shuffle_v2i64_00:
26 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
29 ; AVX512VL-LABEL: shuffle_v2i64_00:
31 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0
33 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
34 ret <2 x i64> %shuffle
36 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
37 ; SSE-LABEL: shuffle_v2i64_10:
39 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
42 ; AVX-LABEL: shuffle_v2i64_10:
44 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
46 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
47 ret <2 x i64> %shuffle
49 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
50 ; SSE-LABEL: shuffle_v2i64_11:
52 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
55 ; AVX-LABEL: shuffle_v2i64_11:
57 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
59 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
60 ret <2 x i64> %shuffle
62 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
63 ; SSE-LABEL: shuffle_v2i64_22:
65 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
68 ; AVX1-LABEL: shuffle_v2i64_22:
70 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
73 ; AVX2-LABEL: shuffle_v2i64_22:
75 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
78 ; AVX512VL-LABEL: shuffle_v2i64_22:
80 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0
82 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
83 ret <2 x i64> %shuffle
85 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
86 ; SSE-LABEL: shuffle_v2i64_32:
88 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
91 ; AVX-LABEL: shuffle_v2i64_32:
93 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
95 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
96 ret <2 x i64> %shuffle
98 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
99 ; SSE-LABEL: shuffle_v2i64_33:
101 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
104 ; AVX-LABEL: shuffle_v2i64_33:
106 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
108 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
109 ret <2 x i64> %shuffle
112 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
113 ; SSE2-LABEL: shuffle_v2f64_00:
115 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
118 ; SSE3-LABEL: shuffle_v2f64_00:
120 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
123 ; SSSE3-LABEL: shuffle_v2f64_00:
125 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
128 ; SSE41-LABEL: shuffle_v2f64_00:
130 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
133 ; AVX-LABEL: shuffle_v2f64_00:
135 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
137 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
138 ret <2 x double> %shuffle
140 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
141 ; SSE-LABEL: shuffle_v2f64_10:
143 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
146 ; AVX-LABEL: shuffle_v2f64_10:
148 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
152 ret <2 x double> %shuffle
154 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
155 ; SSE-LABEL: shuffle_v2f64_11:
157 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
160 ; AVX-LABEL: shuffle_v2f64_11:
162 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
164 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
165 ret <2 x double> %shuffle
167 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
168 ; SSE2-LABEL: shuffle_v2f64_22:
170 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
171 ; SSE2-NEXT: movaps %xmm1, %xmm0
174 ; SSE3-LABEL: shuffle_v2f64_22:
176 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
179 ; SSSE3-LABEL: shuffle_v2f64_22:
181 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
184 ; SSE41-LABEL: shuffle_v2f64_22:
186 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
189 ; AVX-LABEL: shuffle_v2f64_22:
191 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
193 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
194 ret <2 x double> %shuffle
196 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
197 ; SSE-LABEL: shuffle_v2f64_32:
199 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
200 ; SSE-NEXT: movapd %xmm1, %xmm0
203 ; AVX-LABEL: shuffle_v2f64_32:
205 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
208 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
209 ret <2 x double> %shuffle
211 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
212 ; SSE-LABEL: shuffle_v2f64_33:
214 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
215 ; SSE-NEXT: movaps %xmm1, %xmm0
218 ; AVX-LABEL: shuffle_v2f64_33:
220 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
222 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
223 ret <2 x double> %shuffle
225 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
226 ; SSE2-LABEL: shuffle_v2f64_03:
228 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
229 ; SSE2-NEXT: movapd %xmm1, %xmm0
232 ; SSE3-LABEL: shuffle_v2f64_03:
234 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
235 ; SSE3-NEXT: movapd %xmm1, %xmm0
238 ; SSSE3-LABEL: shuffle_v2f64_03:
240 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
241 ; SSSE3-NEXT: movapd %xmm1, %xmm0
244 ; SSE41-LABEL: shuffle_v2f64_03:
246 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
249 ; AVX-LABEL: shuffle_v2f64_03:
251 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
253 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
254 ret <2 x double> %shuffle
256 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
257 ; SSE2-LABEL: shuffle_v2f64_21:
259 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
262 ; SSE3-LABEL: shuffle_v2f64_21:
264 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
267 ; SSSE3-LABEL: shuffle_v2f64_21:
269 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
272 ; SSE41-LABEL: shuffle_v2f64_21:
274 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
277 ; AVX-LABEL: shuffle_v2f64_21:
279 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
281 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
282 ret <2 x double> %shuffle
286 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
287 ; SSE-LABEL: shuffle_v2i64_02:
289 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
292 ; AVX-LABEL: shuffle_v2i64_02:
294 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
296 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
297 ret <2 x i64> %shuffle
299 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
300 ; SSE-LABEL: shuffle_v2i64_02_copy:
302 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
303 ; SSE-NEXT: movdqa %xmm1, %xmm0
306 ; AVX-LABEL: shuffle_v2i64_02_copy:
308 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
310 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
311 ret <2 x i64> %shuffle
313 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
314 ; SSE2-LABEL: shuffle_v2i64_03:
316 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
317 ; SSE2-NEXT: movapd %xmm1, %xmm0
320 ; SSE3-LABEL: shuffle_v2i64_03:
322 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
323 ; SSE3-NEXT: movapd %xmm1, %xmm0
326 ; SSSE3-LABEL: shuffle_v2i64_03:
328 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
329 ; SSSE3-NEXT: movapd %xmm1, %xmm0
332 ; SSE41-LABEL: shuffle_v2i64_03:
334 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
337 ; AVX1-LABEL: shuffle_v2i64_03:
339 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
342 ; AVX2-LABEL: shuffle_v2i64_03:
344 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
347 ; AVX512VL-LABEL: shuffle_v2i64_03:
349 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
350 ; AVX512VL-NEXT: retq
351 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
352 ret <2 x i64> %shuffle
354 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
355 ; SSE2-LABEL: shuffle_v2i64_03_copy:
357 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
358 ; SSE2-NEXT: movapd %xmm2, %xmm0
361 ; SSE3-LABEL: shuffle_v2i64_03_copy:
363 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
364 ; SSE3-NEXT: movapd %xmm2, %xmm0
367 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
369 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
370 ; SSSE3-NEXT: movapd %xmm2, %xmm0
373 ; SSE41-LABEL: shuffle_v2i64_03_copy:
375 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
376 ; SSE41-NEXT: movdqa %xmm1, %xmm0
379 ; AVX1-LABEL: shuffle_v2i64_03_copy:
381 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
384 ; AVX2-LABEL: shuffle_v2i64_03_copy:
386 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
389 ; AVX512VL-LABEL: shuffle_v2i64_03_copy:
391 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
392 ; AVX512VL-NEXT: retq
393 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
394 ret <2 x i64> %shuffle
396 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
397 ; SSE2-LABEL: shuffle_v2i64_12:
399 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
402 ; SSE3-LABEL: shuffle_v2i64_12:
404 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
407 ; SSSE3-LABEL: shuffle_v2i64_12:
409 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
410 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
413 ; SSE41-LABEL: shuffle_v2i64_12:
415 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
416 ; SSE41-NEXT: movdqa %xmm1, %xmm0
419 ; AVX-LABEL: shuffle_v2i64_12:
421 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
423 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
424 ret <2 x i64> %shuffle
426 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
427 ; SSE2-LABEL: shuffle_v2i64_12_copy:
429 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
430 ; SSE2-NEXT: movapd %xmm1, %xmm0
433 ; SSE3-LABEL: shuffle_v2i64_12_copy:
435 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
436 ; SSE3-NEXT: movapd %xmm1, %xmm0
439 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
441 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
442 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
445 ; SSE41-LABEL: shuffle_v2i64_12_copy:
447 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
448 ; SSE41-NEXT: movdqa %xmm2, %xmm0
451 ; AVX-LABEL: shuffle_v2i64_12_copy:
453 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
455 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
456 ret <2 x i64> %shuffle
458 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
459 ; SSE-LABEL: shuffle_v2i64_13:
461 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
464 ; AVX-LABEL: shuffle_v2i64_13:
466 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
468 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
469 ret <2 x i64> %shuffle
471 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
472 ; SSE-LABEL: shuffle_v2i64_13_copy:
474 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
475 ; SSE-NEXT: movdqa %xmm1, %xmm0
478 ; AVX-LABEL: shuffle_v2i64_13_copy:
480 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
482 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
483 ret <2 x i64> %shuffle
485 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
486 ; SSE-LABEL: shuffle_v2i64_20:
488 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
489 ; SSE-NEXT: movdqa %xmm1, %xmm0
492 ; AVX-LABEL: shuffle_v2i64_20:
494 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
496 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
497 ret <2 x i64> %shuffle
499 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
500 ; SSE-LABEL: shuffle_v2i64_20_copy:
502 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
503 ; SSE-NEXT: movdqa %xmm2, %xmm0
506 ; AVX-LABEL: shuffle_v2i64_20_copy:
508 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
510 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
511 ret <2 x i64> %shuffle
513 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
514 ; SSE2-LABEL: shuffle_v2i64_21:
516 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
519 ; SSE3-LABEL: shuffle_v2i64_21:
521 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
524 ; SSSE3-LABEL: shuffle_v2i64_21:
526 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
529 ; SSE41-LABEL: shuffle_v2i64_21:
531 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
534 ; AVX1-LABEL: shuffle_v2i64_21:
536 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
539 ; AVX2-LABEL: shuffle_v2i64_21:
541 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
544 ; AVX512VL-LABEL: shuffle_v2i64_21:
546 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
547 ; AVX512VL-NEXT: retq
548 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
549 ret <2 x i64> %shuffle
551 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
552 ; SSE2-LABEL: shuffle_v2i64_21_copy:
554 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
555 ; SSE2-NEXT: movapd %xmm1, %xmm0
558 ; SSE3-LABEL: shuffle_v2i64_21_copy:
560 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
561 ; SSE3-NEXT: movapd %xmm1, %xmm0
564 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
566 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
567 ; SSSE3-NEXT: movapd %xmm1, %xmm0
570 ; SSE41-LABEL: shuffle_v2i64_21_copy:
572 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
573 ; SSE41-NEXT: movdqa %xmm1, %xmm0
576 ; AVX1-LABEL: shuffle_v2i64_21_copy:
578 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
581 ; AVX2-LABEL: shuffle_v2i64_21_copy:
583 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
586 ; AVX512VL-LABEL: shuffle_v2i64_21_copy:
588 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
589 ; AVX512VL-NEXT: retq
590 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
591 ret <2 x i64> %shuffle
593 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
594 ; SSE2-LABEL: shuffle_v2i64_30:
596 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
597 ; SSE2-NEXT: movapd %xmm1, %xmm0
600 ; SSE3-LABEL: shuffle_v2i64_30:
602 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
603 ; SSE3-NEXT: movapd %xmm1, %xmm0
606 ; SSSE3-LABEL: shuffle_v2i64_30:
608 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
611 ; SSE41-LABEL: shuffle_v2i64_30:
613 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
616 ; AVX-LABEL: shuffle_v2i64_30:
618 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
620 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
621 ret <2 x i64> %shuffle
623 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
624 ; SSE2-LABEL: shuffle_v2i64_30_copy:
626 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
627 ; SSE2-NEXT: movapd %xmm2, %xmm0
630 ; SSE3-LABEL: shuffle_v2i64_30_copy:
632 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
633 ; SSE3-NEXT: movapd %xmm2, %xmm0
636 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
638 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
639 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
642 ; SSE41-LABEL: shuffle_v2i64_30_copy:
644 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
645 ; SSE41-NEXT: movdqa %xmm1, %xmm0
648 ; AVX-LABEL: shuffle_v2i64_30_copy:
650 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
652 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
653 ret <2 x i64> %shuffle
655 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
656 ; SSE-LABEL: shuffle_v2i64_31:
658 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
659 ; SSE-NEXT: movdqa %xmm1, %xmm0
662 ; AVX-LABEL: shuffle_v2i64_31:
664 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
666 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
667 ret <2 x i64> %shuffle
669 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
670 ; SSE-LABEL: shuffle_v2i64_31_copy:
672 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
673 ; SSE-NEXT: movdqa %xmm2, %xmm0
676 ; AVX-LABEL: shuffle_v2i64_31_copy:
678 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
680 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
681 ret <2 x i64> %shuffle
684 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
685 ; SSE-LABEL: shuffle_v2i64_0z:
687 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
690 ; AVX1-LABEL: shuffle_v2i64_0z:
692 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
695 ; AVX2-LABEL: shuffle_v2i64_0z:
697 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
700 ; AVX512VL-LABEL: shuffle_v2i64_0z:
702 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
703 ; AVX512VL-NEXT: retq
704 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
705 ret <2 x i64> %shuffle
708 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
709 ; SSE-LABEL: shuffle_v2i64_1z:
711 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
714 ; AVX-LABEL: shuffle_v2i64_1z:
716 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
718 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
719 ret <2 x i64> %shuffle
722 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
723 ; SSE-LABEL: shuffle_v2i64_z0:
725 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
728 ; AVX-LABEL: shuffle_v2i64_z0:
730 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
732 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
733 ret <2 x i64> %shuffle
736 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
737 ; SSE2-LABEL: shuffle_v2i64_z1:
739 ; SSE2-NEXT: xorpd %xmm1, %xmm1
740 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
743 ; SSE3-LABEL: shuffle_v2i64_z1:
745 ; SSE3-NEXT: xorpd %xmm1, %xmm1
746 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
749 ; SSSE3-LABEL: shuffle_v2i64_z1:
751 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
752 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
755 ; SSE41-LABEL: shuffle_v2i64_z1:
757 ; SSE41-NEXT: pxor %xmm1, %xmm1
758 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
761 ; AVX1-LABEL: shuffle_v2i64_z1:
763 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
764 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
767 ; AVX2-LABEL: shuffle_v2i64_z1:
769 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
770 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
773 ; AVX512VL-LABEL: shuffle_v2i64_z1:
775 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
776 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
777 ; AVX512VL-NEXT: retq
778 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
779 ret <2 x i64> %shuffle
782 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
783 ; SSE-LABEL: shuffle_v2f64_0z:
785 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
788 ; AVX1-LABEL: shuffle_v2f64_0z:
790 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
793 ; AVX2-LABEL: shuffle_v2f64_0z:
795 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
798 ; AVX512VL-LABEL: shuffle_v2f64_0z:
800 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
801 ; AVX512VL-NEXT: retq
802 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
803 ret <2 x double> %shuffle
806 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
807 ; SSE-LABEL: shuffle_v2f64_1z:
809 ; SSE-NEXT: xorpd %xmm1, %xmm1
810 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
813 ; AVX1-LABEL: shuffle_v2f64_1z:
815 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
816 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
819 ; AVX2-LABEL: shuffle_v2f64_1z:
821 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
822 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
825 ; AVX512VL-LABEL: shuffle_v2f64_1z:
827 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
828 ; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
829 ; AVX512VL-NEXT: retq
830 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
831 ret <2 x double> %shuffle
834 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
835 ; SSE-LABEL: shuffle_v2f64_z0:
837 ; SSE-NEXT: xorpd %xmm1, %xmm1
838 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
839 ; SSE-NEXT: movapd %xmm1, %xmm0
842 ; AVX1-LABEL: shuffle_v2f64_z0:
844 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
845 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
848 ; AVX2-LABEL: shuffle_v2f64_z0:
850 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
851 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
854 ; AVX512VL-LABEL: shuffle_v2f64_z0:
856 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
857 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
858 ; AVX512VL-NEXT: retq
859 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
860 ret <2 x double> %shuffle
863 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
864 ; SSE2-LABEL: shuffle_v2f64_z1:
866 ; SSE2-NEXT: xorpd %xmm1, %xmm1
867 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
870 ; SSE3-LABEL: shuffle_v2f64_z1:
872 ; SSE3-NEXT: xorpd %xmm1, %xmm1
873 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
876 ; SSSE3-LABEL: shuffle_v2f64_z1:
878 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
879 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
882 ; SSE41-LABEL: shuffle_v2f64_z1:
884 ; SSE41-NEXT: xorpd %xmm1, %xmm1
885 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
888 ; AVX-LABEL: shuffle_v2f64_z1:
890 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
891 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
893 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
894 ret <2 x double> %shuffle
897 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
898 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
900 ; SSE-NEXT: xorpd %xmm1, %xmm1
901 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
904 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
906 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
907 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
910 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
912 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
913 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
916 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
918 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
919 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
920 ; AVX512VL-NEXT: retq
921 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
922 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
923 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
924 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
925 ret <2 x double> %bitcast64
928 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
929 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
931 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
932 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
933 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
936 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
938 ; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
939 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
940 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
943 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
945 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
946 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
947 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
950 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
952 ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
953 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
954 ; SSE41-NEXT: xorps %xmm1, %xmm1
955 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
958 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
960 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
961 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
962 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
963 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
966 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
968 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
969 ; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
970 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
971 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
974 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
976 ; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm1
977 ; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
978 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
979 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
980 ; AVX512VL-NEXT: retq
981 %bitcast32 = bitcast <2 x i64> %x to <4 x float>
982 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
983 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
984 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
988 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
989 ; SSE-LABEL: insert_reg_and_zero_v2i64:
991 ; SSE-NEXT: movd %rdi, %xmm0
994 ; AVX-LABEL: insert_reg_and_zero_v2i64:
996 ; AVX-NEXT: vmovq %rdi, %xmm0
998 %v = insertelement <2 x i64> undef, i64 %a, i32 0
999 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1000 ret <2 x i64> %shuffle
1003 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
1004 ; SSE-LABEL: insert_mem_and_zero_v2i64:
1006 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1009 ; AVX1-LABEL: insert_mem_and_zero_v2i64:
1011 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1014 ; AVX2-LABEL: insert_mem_and_zero_v2i64:
1016 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1019 ; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
1021 ; AVX512VL-NEXT: vmovq (%rdi), %xmm0
1022 ; AVX512VL-NEXT: retq
1023 %a = load i64, i64* %ptr
1024 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1025 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1026 ret <2 x i64> %shuffle
1029 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
1030 ; SSE-LABEL: insert_reg_and_zero_v2f64:
1032 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1035 ; AVX1-LABEL: insert_reg_and_zero_v2f64:
1037 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1040 ; AVX2-LABEL: insert_reg_and_zero_v2f64:
1042 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1045 ; AVX512VL-LABEL: insert_reg_and_zero_v2f64:
1047 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
1048 ; AVX512VL-NEXT: retq
1049 %v = insertelement <2 x double> undef, double %a, i32 0
1050 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1051 ret <2 x double> %shuffle
1054 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
1055 ; SSE-LABEL: insert_mem_and_zero_v2f64:
1057 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1060 ; AVX1-LABEL: insert_mem_and_zero_v2f64:
1062 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1065 ; AVX2-LABEL: insert_mem_and_zero_v2f64:
1067 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1070 ; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
1072 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1073 ; AVX512VL-NEXT: retq
1074 %a = load double, double* %ptr
1075 %v = insertelement <2 x double> undef, double %a, i32 0
1076 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1077 ret <2 x double> %shuffle
1080 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
1081 ; SSE2-LABEL: insert_reg_lo_v2i64:
1083 ; SSE2-NEXT: movd %rdi, %xmm1
1084 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1087 ; SSE3-LABEL: insert_reg_lo_v2i64:
1089 ; SSE3-NEXT: movd %rdi, %xmm1
1090 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1093 ; SSSE3-LABEL: insert_reg_lo_v2i64:
1095 ; SSSE3-NEXT: movd %rdi, %xmm1
1096 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1099 ; SSE41-LABEL: insert_reg_lo_v2i64:
1101 ; SSE41-NEXT: movd %rdi, %xmm1
1102 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1105 ; AVX1-LABEL: insert_reg_lo_v2i64:
1107 ; AVX1-NEXT: vmovq %rdi, %xmm1
1108 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1111 ; AVX2-LABEL: insert_reg_lo_v2i64:
1113 ; AVX2-NEXT: vmovq %rdi, %xmm1
1114 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1117 ; AVX512VL-LABEL: insert_reg_lo_v2i64:
1119 ; AVX512VL-NEXT: vmovq %rdi, %xmm1
1120 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1121 ; AVX512VL-NEXT: retq
1122 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1123 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1124 ret <2 x i64> %shuffle
1127 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
1128 ; SSE2-LABEL: insert_mem_lo_v2i64:
1130 ; SSE2-NEXT: movlpd (%rdi), %xmm0
1133 ; SSE3-LABEL: insert_mem_lo_v2i64:
1135 ; SSE3-NEXT: movlpd (%rdi), %xmm0
1138 ; SSSE3-LABEL: insert_mem_lo_v2i64:
1140 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
1143 ; SSE41-LABEL: insert_mem_lo_v2i64:
1145 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1146 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1149 ; AVX1-LABEL: insert_mem_lo_v2i64:
1151 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1152 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1155 ; AVX2-LABEL: insert_mem_lo_v2i64:
1157 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1158 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1161 ; AVX512VL-LABEL: insert_mem_lo_v2i64:
1163 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1164 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1165 ; AVX512VL-NEXT: retq
1166 %a = load i64, i64* %ptr
1167 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1168 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1169 ret <2 x i64> %shuffle
1172 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1173 ; SSE-LABEL: insert_reg_hi_v2i64:
1175 ; SSE-NEXT: movd %rdi, %xmm1
1176 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1179 ; AVX-LABEL: insert_reg_hi_v2i64:
1181 ; AVX-NEXT: vmovq %rdi, %xmm1
1182 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1184 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1185 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1186 ret <2 x i64> %shuffle
1189 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1190 ; SSE-LABEL: insert_mem_hi_v2i64:
1192 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1193 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1196 ; AVX1-LABEL: insert_mem_hi_v2i64:
1198 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1199 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1202 ; AVX2-LABEL: insert_mem_hi_v2i64:
1204 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1205 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1208 ; AVX512VL-LABEL: insert_mem_hi_v2i64:
1210 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1211 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1212 ; AVX512VL-NEXT: retq
1213 %a = load i64, i64* %ptr
1214 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1215 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1216 ret <2 x i64> %shuffle
1219 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1220 ; SSE-LABEL: insert_reg_lo_v2f64:
1222 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1223 ; SSE-NEXT: movapd %xmm1, %xmm0
1226 ; AVX1-LABEL: insert_reg_lo_v2f64:
1228 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1231 ; AVX2-LABEL: insert_reg_lo_v2f64:
1233 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1236 ; AVX512VL-LABEL: insert_reg_lo_v2f64:
1238 ; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1239 ; AVX512VL-NEXT: retq
1240 %v = insertelement <2 x double> undef, double %a, i32 0
1241 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1242 ret <2 x double> %shuffle
1245 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1246 ; SSE-LABEL: insert_mem_lo_v2f64:
1248 ; SSE-NEXT: movlpd (%rdi), %xmm0
1251 ; AVX-LABEL: insert_mem_lo_v2f64:
1253 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1255 %a = load double, double* %ptr
1256 %v = insertelement <2 x double> undef, double %a, i32 0
1257 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1258 ret <2 x double> %shuffle
1261 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1262 ; SSE-LABEL: insert_reg_hi_v2f64:
1264 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1265 ; SSE-NEXT: movapd %xmm1, %xmm0
1268 ; AVX-LABEL: insert_reg_hi_v2f64:
1270 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1272 %v = insertelement <2 x double> undef, double %a, i32 0
1273 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1274 ret <2 x double> %shuffle
1277 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1278 ; SSE-LABEL: insert_mem_hi_v2f64:
1280 ; SSE-NEXT: movhpd (%rdi), %xmm0
1283 ; AVX-LABEL: insert_mem_hi_v2f64:
1285 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1287 %a = load double, double* %ptr
1288 %v = insertelement <2 x double> undef, double %a, i32 0
1289 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1290 ret <2 x double> %shuffle
1293 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1294 ; SSE2-LABEL: insert_dup_reg_v2f64:
1296 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1299 ; SSE3-LABEL: insert_dup_reg_v2f64:
1301 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1304 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1306 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1309 ; SSE41-LABEL: insert_dup_reg_v2f64:
1311 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1314 ; AVX-LABEL: insert_dup_reg_v2f64:
1316 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1318 %v = insertelement <2 x double> undef, double %a, i32 0
1319 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1320 ret <2 x double> %shuffle
1323 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1324 ; SSE2-LABEL: insert_dup_mem_v2f64:
1326 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1327 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1330 ; SSE3-LABEL: insert_dup_mem_v2f64:
1332 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1335 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1337 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1340 ; SSE41-LABEL: insert_dup_mem_v2f64:
1342 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1345 ; AVX-LABEL: insert_dup_mem_v2f64:
1347 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1349 %a = load double, double* %ptr
1350 %v = insertelement <2 x double> undef, double %a, i32 0
1351 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1352 ret <2 x double> %shuffle
1355 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1356 ; SSE2-LABEL: insert_dup_mem128_v2f64:
1358 ; SSE2-NEXT: movaps (%rdi), %xmm0
1359 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1362 ; SSE3-LABEL: insert_dup_mem128_v2f64:
1364 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1367 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
1369 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1372 ; SSE41-LABEL: insert_dup_mem128_v2f64:
1374 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1377 ; AVX-LABEL: insert_dup_mem128_v2f64:
1379 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1381 %v = load <2 x double>, <2 x double>* %ptr
1382 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1383 ret <2 x double> %shuffle
1387 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1388 ; SSE-LABEL: insert_dup_mem_v2i64:
1390 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1391 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1394 ; AVX1-LABEL: insert_dup_mem_v2i64:
1396 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1397 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1400 ; AVX2-LABEL: insert_dup_mem_v2i64:
1402 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
1405 ; AVX512VL-LABEL: insert_dup_mem_v2i64:
1407 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0
1408 ; AVX512VL-NEXT: retq
1409 %tmp = load i64, i64* %ptr, align 1
1410 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1411 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1415 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1416 ; SSE-LABEL: shuffle_mem_v2f64_10:
1418 ; SSE-NEXT: movapd (%rdi), %xmm0
1419 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1422 ; AVX-LABEL: shuffle_mem_v2f64_10:
1424 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1427 %a = load <2 x double>, <2 x double>* %ptr
1428 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1429 ret <2 x double> %shuffle