1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
10 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11 target triple = "x86_64-unknown-unknown"
13 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
14 ; SSE-LABEL: shuffle_v2i64_00:
16 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
19 ; AVX1-LABEL: shuffle_v2i64_00:
21 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
24 ; AVX2-LABEL: shuffle_v2i64_00:
26 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
29 ; AVX512VL-LABEL: shuffle_v2i64_00:
31 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0
33 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
34 ret <2 x i64> %shuffle
36 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
37 ; SSE-LABEL: shuffle_v2i64_10:
39 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
42 ; AVX-LABEL: shuffle_v2i64_10:
44 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
46 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
47 ret <2 x i64> %shuffle
49 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
50 ; SSE-LABEL: shuffle_v2i64_11:
52 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
55 ; AVX-LABEL: shuffle_v2i64_11:
57 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
59 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
60 ret <2 x i64> %shuffle
62 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
63 ; SSE-LABEL: shuffle_v2i64_22:
65 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
68 ; AVX1-LABEL: shuffle_v2i64_22:
70 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
73 ; AVX2-LABEL: shuffle_v2i64_22:
75 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
78 ; AVX512VL-LABEL: shuffle_v2i64_22:
80 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0
82 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
83 ret <2 x i64> %shuffle
85 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
86 ; SSE-LABEL: shuffle_v2i64_32:
88 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
91 ; AVX-LABEL: shuffle_v2i64_32:
93 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
95 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
96 ret <2 x i64> %shuffle
98 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
99 ; SSE-LABEL: shuffle_v2i64_33:
101 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
104 ; AVX-LABEL: shuffle_v2i64_33:
106 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
108 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
109 ret <2 x i64> %shuffle
112 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
113 ; SSE2-LABEL: shuffle_v2f64_00:
115 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
118 ; SSE3-LABEL: shuffle_v2f64_00:
120 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
123 ; SSSE3-LABEL: shuffle_v2f64_00:
125 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
128 ; SSE41-LABEL: shuffle_v2f64_00:
130 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
133 ; AVX-LABEL: shuffle_v2f64_00:
135 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
137 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
138 ret <2 x double> %shuffle
140 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
141 ; SSE-LABEL: shuffle_v2f64_10:
143 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
146 ; AVX-LABEL: shuffle_v2f64_10:
148 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
152 ret <2 x double> %shuffle
154 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
155 ; SSE-LABEL: shuffle_v2f64_11:
157 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
160 ; AVX-LABEL: shuffle_v2f64_11:
162 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
164 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
165 ret <2 x double> %shuffle
167 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
168 ; SSE2-LABEL: shuffle_v2f64_22:
170 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
171 ; SSE2-NEXT: movaps %xmm1, %xmm0
174 ; SSE3-LABEL: shuffle_v2f64_22:
176 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
179 ; SSSE3-LABEL: shuffle_v2f64_22:
181 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
184 ; SSE41-LABEL: shuffle_v2f64_22:
186 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
189 ; AVX-LABEL: shuffle_v2f64_22:
191 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
193 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
194 ret <2 x double> %shuffle
196 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
197 ; SSE-LABEL: shuffle_v2f64_32:
199 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
200 ; SSE-NEXT: movapd %xmm1, %xmm0
203 ; AVX-LABEL: shuffle_v2f64_32:
205 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
208 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
209 ret <2 x double> %shuffle
211 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
212 ; SSE-LABEL: shuffle_v2f64_33:
214 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
215 ; SSE-NEXT: movaps %xmm1, %xmm0
218 ; AVX-LABEL: shuffle_v2f64_33:
220 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
222 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
223 ret <2 x double> %shuffle
225 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
226 ; SSE2-LABEL: shuffle_v2f64_03:
228 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
229 ; SSE2-NEXT: movapd %xmm1, %xmm0
232 ; SSE3-LABEL: shuffle_v2f64_03:
234 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
235 ; SSE3-NEXT: movapd %xmm1, %xmm0
238 ; SSSE3-LABEL: shuffle_v2f64_03:
240 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
241 ; SSSE3-NEXT: movapd %xmm1, %xmm0
244 ; SSE41-LABEL: shuffle_v2f64_03:
246 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
249 ; AVX-LABEL: shuffle_v2f64_03:
251 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
253 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
254 ret <2 x double> %shuffle
256 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
257 ; SSE2-LABEL: shuffle_v2f64_21:
259 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
262 ; SSE3-LABEL: shuffle_v2f64_21:
264 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
267 ; SSSE3-LABEL: shuffle_v2f64_21:
269 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
272 ; SSE41-LABEL: shuffle_v2f64_21:
274 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
277 ; AVX-LABEL: shuffle_v2f64_21:
279 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
281 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
282 ret <2 x double> %shuffle
286 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
287 ; SSE-LABEL: shuffle_v2i64_02:
289 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
292 ; AVX-LABEL: shuffle_v2i64_02:
294 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
296 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
297 ret <2 x i64> %shuffle
299 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
300 ; SSE-LABEL: shuffle_v2i64_02_copy:
302 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
303 ; SSE-NEXT: movdqa %xmm1, %xmm0
306 ; AVX-LABEL: shuffle_v2i64_02_copy:
308 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
310 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
311 ret <2 x i64> %shuffle
313 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
314 ; SSE2-LABEL: shuffle_v2i64_03:
316 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
317 ; SSE2-NEXT: movapd %xmm1, %xmm0
320 ; SSE3-LABEL: shuffle_v2i64_03:
322 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
323 ; SSE3-NEXT: movapd %xmm1, %xmm0
326 ; SSSE3-LABEL: shuffle_v2i64_03:
328 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
329 ; SSSE3-NEXT: movapd %xmm1, %xmm0
332 ; SSE41-LABEL: shuffle_v2i64_03:
334 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
337 ; AVX1-LABEL: shuffle_v2i64_03:
339 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
342 ; AVX2-LABEL: shuffle_v2i64_03:
344 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
347 ; AVX512VL-LABEL: shuffle_v2i64_03:
349 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
350 ; AVX512VL-NEXT: retq
351 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
352 ret <2 x i64> %shuffle
354 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
355 ; SSE2-LABEL: shuffle_v2i64_03_copy:
357 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
358 ; SSE2-NEXT: movapd %xmm2, %xmm0
361 ; SSE3-LABEL: shuffle_v2i64_03_copy:
363 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
364 ; SSE3-NEXT: movapd %xmm2, %xmm0
367 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
369 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
370 ; SSSE3-NEXT: movapd %xmm2, %xmm0
373 ; SSE41-LABEL: shuffle_v2i64_03_copy:
375 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
376 ; SSE41-NEXT: movdqa %xmm1, %xmm0
379 ; AVX1-LABEL: shuffle_v2i64_03_copy:
381 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
384 ; AVX2-LABEL: shuffle_v2i64_03_copy:
386 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
389 ; AVX512VL-LABEL: shuffle_v2i64_03_copy:
391 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
392 ; AVX512VL-NEXT: retq
393 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
394 ret <2 x i64> %shuffle
396 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
397 ; SSE2-LABEL: shuffle_v2i64_12:
399 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
402 ; SSE3-LABEL: shuffle_v2i64_12:
404 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
407 ; SSSE3-LABEL: shuffle_v2i64_12:
409 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
410 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
413 ; SSE41-LABEL: shuffle_v2i64_12:
415 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
416 ; SSE41-NEXT: movdqa %xmm1, %xmm0
419 ; AVX-LABEL: shuffle_v2i64_12:
421 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
423 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
424 ret <2 x i64> %shuffle
426 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
427 ; SSE2-LABEL: shuffle_v2i64_12_copy:
429 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
430 ; SSE2-NEXT: movapd %xmm1, %xmm0
433 ; SSE3-LABEL: shuffle_v2i64_12_copy:
435 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
436 ; SSE3-NEXT: movapd %xmm1, %xmm0
439 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
441 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
442 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
445 ; SSE41-LABEL: shuffle_v2i64_12_copy:
447 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
448 ; SSE41-NEXT: movdqa %xmm2, %xmm0
451 ; AVX-LABEL: shuffle_v2i64_12_copy:
453 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
455 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
456 ret <2 x i64> %shuffle
458 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
459 ; SSE-LABEL: shuffle_v2i64_13:
461 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
464 ; AVX-LABEL: shuffle_v2i64_13:
466 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
468 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
469 ret <2 x i64> %shuffle
471 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
472 ; SSE-LABEL: shuffle_v2i64_13_copy:
474 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
475 ; SSE-NEXT: movdqa %xmm1, %xmm0
478 ; AVX-LABEL: shuffle_v2i64_13_copy:
480 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
482 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
483 ret <2 x i64> %shuffle
485 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
486 ; SSE-LABEL: shuffle_v2i64_20:
488 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
489 ; SSE-NEXT: movdqa %xmm1, %xmm0
492 ; AVX-LABEL: shuffle_v2i64_20:
494 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
496 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
497 ret <2 x i64> %shuffle
499 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
500 ; SSE-LABEL: shuffle_v2i64_20_copy:
502 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
503 ; SSE-NEXT: movdqa %xmm2, %xmm0
506 ; AVX-LABEL: shuffle_v2i64_20_copy:
508 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
510 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
511 ret <2 x i64> %shuffle
513 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
514 ; SSE2-LABEL: shuffle_v2i64_21:
516 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
519 ; SSE3-LABEL: shuffle_v2i64_21:
521 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
524 ; SSSE3-LABEL: shuffle_v2i64_21:
526 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
529 ; SSE41-LABEL: shuffle_v2i64_21:
531 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
534 ; AVX1-LABEL: shuffle_v2i64_21:
536 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
539 ; AVX2-LABEL: shuffle_v2i64_21:
541 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
544 ; AVX512VL-LABEL: shuffle_v2i64_21:
546 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
547 ; AVX512VL-NEXT: retq
548 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
549 ret <2 x i64> %shuffle
551 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
552 ; SSE2-LABEL: shuffle_v2i64_21_copy:
554 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
555 ; SSE2-NEXT: movapd %xmm1, %xmm0
558 ; SSE3-LABEL: shuffle_v2i64_21_copy:
560 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
561 ; SSE3-NEXT: movapd %xmm1, %xmm0
564 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
566 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
567 ; SSSE3-NEXT: movapd %xmm1, %xmm0
570 ; SSE41-LABEL: shuffle_v2i64_21_copy:
572 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
573 ; SSE41-NEXT: movdqa %xmm1, %xmm0
576 ; AVX1-LABEL: shuffle_v2i64_21_copy:
578 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
581 ; AVX2-LABEL: shuffle_v2i64_21_copy:
583 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
586 ; AVX512VL-LABEL: shuffle_v2i64_21_copy:
588 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
589 ; AVX512VL-NEXT: retq
590 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
591 ret <2 x i64> %shuffle
593 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
594 ; SSE2-LABEL: shuffle_v2i64_30:
596 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
597 ; SSE2-NEXT: movapd %xmm1, %xmm0
600 ; SSE3-LABEL: shuffle_v2i64_30:
602 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
603 ; SSE3-NEXT: movapd %xmm1, %xmm0
606 ; SSSE3-LABEL: shuffle_v2i64_30:
608 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
611 ; SSE41-LABEL: shuffle_v2i64_30:
613 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
616 ; AVX-LABEL: shuffle_v2i64_30:
618 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
620 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
621 ret <2 x i64> %shuffle
623 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
624 ; SSE2-LABEL: shuffle_v2i64_30_copy:
626 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
627 ; SSE2-NEXT: movapd %xmm2, %xmm0
630 ; SSE3-LABEL: shuffle_v2i64_30_copy:
632 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
633 ; SSE3-NEXT: movapd %xmm2, %xmm0
636 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
638 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
639 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
642 ; SSE41-LABEL: shuffle_v2i64_30_copy:
644 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
645 ; SSE41-NEXT: movdqa %xmm1, %xmm0
648 ; AVX-LABEL: shuffle_v2i64_30_copy:
650 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
652 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
653 ret <2 x i64> %shuffle
655 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
656 ; SSE-LABEL: shuffle_v2i64_31:
658 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
659 ; SSE-NEXT: movdqa %xmm1, %xmm0
662 ; AVX-LABEL: shuffle_v2i64_31:
664 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
666 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
667 ret <2 x i64> %shuffle
669 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
670 ; SSE-LABEL: shuffle_v2i64_31_copy:
672 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
673 ; SSE-NEXT: movdqa %xmm2, %xmm0
676 ; AVX-LABEL: shuffle_v2i64_31_copy:
678 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
680 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
681 ret <2 x i64> %shuffle
684 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
685 ; SSE-LABEL: shuffle_v2i64_0z:
687 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
690 ; AVX-LABEL: shuffle_v2i64_0z:
692 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
694 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
695 ret <2 x i64> %shuffle
698 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
699 ; SSE-LABEL: shuffle_v2i64_1z:
701 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
704 ; AVX-LABEL: shuffle_v2i64_1z:
706 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
708 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
709 ret <2 x i64> %shuffle
712 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
713 ; SSE-LABEL: shuffle_v2i64_z0:
715 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
718 ; AVX-LABEL: shuffle_v2i64_z0:
720 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
722 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
723 ret <2 x i64> %shuffle
726 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
727 ; SSE2-LABEL: shuffle_v2i64_z1:
729 ; SSE2-NEXT: xorpd %xmm1, %xmm1
730 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
733 ; SSE3-LABEL: shuffle_v2i64_z1:
735 ; SSE3-NEXT: xorpd %xmm1, %xmm1
736 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
739 ; SSSE3-LABEL: shuffle_v2i64_z1:
741 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
742 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
745 ; SSE41-LABEL: shuffle_v2i64_z1:
747 ; SSE41-NEXT: pxor %xmm1, %xmm1
748 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
751 ; AVX1-LABEL: shuffle_v2i64_z1:
753 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
754 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
757 ; AVX2-LABEL: shuffle_v2i64_z1:
759 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
760 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
763 ; AVX512VL-LABEL: shuffle_v2i64_z1:
765 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
766 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
767 ; AVX512VL-NEXT: retq
768 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
769 ret <2 x i64> %shuffle
772 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
773 ; SSE-LABEL: shuffle_v2f64_0z:
775 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
778 ; AVX-LABEL: shuffle_v2f64_0z:
780 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
782 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
783 ret <2 x double> %shuffle
786 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
787 ; SSE-LABEL: shuffle_v2f64_1z:
789 ; SSE-NEXT: xorpd %xmm1, %xmm1
790 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
793 ; AVX1-LABEL: shuffle_v2f64_1z:
795 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
796 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
799 ; AVX2-LABEL: shuffle_v2f64_1z:
801 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
802 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
805 ; AVX512VL-LABEL: shuffle_v2f64_1z:
807 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
808 ; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
809 ; AVX512VL-NEXT: retq
810 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
811 ret <2 x double> %shuffle
814 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
815 ; SSE-LABEL: shuffle_v2f64_z0:
817 ; SSE-NEXT: xorpd %xmm1, %xmm1
818 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
819 ; SSE-NEXT: movapd %xmm1, %xmm0
822 ; AVX1-LABEL: shuffle_v2f64_z0:
824 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
825 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
828 ; AVX2-LABEL: shuffle_v2f64_z0:
830 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
831 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
834 ; AVX512VL-LABEL: shuffle_v2f64_z0:
836 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
837 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
838 ; AVX512VL-NEXT: retq
839 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
840 ret <2 x double> %shuffle
843 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
844 ; SSE2-LABEL: shuffle_v2f64_z1:
846 ; SSE2-NEXT: xorpd %xmm1, %xmm1
847 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
850 ; SSE3-LABEL: shuffle_v2f64_z1:
852 ; SSE3-NEXT: xorpd %xmm1, %xmm1
853 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
856 ; SSSE3-LABEL: shuffle_v2f64_z1:
858 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
859 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
862 ; SSE41-LABEL: shuffle_v2f64_z1:
864 ; SSE41-NEXT: xorpd %xmm1, %xmm1
865 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
868 ; AVX-LABEL: shuffle_v2f64_z1:
870 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
871 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
873 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
874 ret <2 x double> %shuffle
877 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
878 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
880 ; SSE-NEXT: xorpd %xmm1, %xmm1
881 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
884 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
886 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
887 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
890 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
892 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
893 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
896 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
898 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
899 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
900 ; AVX512VL-NEXT: retq
901 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
902 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
903 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
904 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
905 ret <2 x double> %bitcast64
908 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
909 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
911 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
912 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
913 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
916 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
918 ; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
919 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
920 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
923 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
925 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
926 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
927 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
930 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
932 ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
933 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
934 ; SSE41-NEXT: xorps %xmm1, %xmm1
935 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
938 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
940 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
941 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
942 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
943 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
946 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
948 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
949 ; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
950 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
951 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
954 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
956 ; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm1
957 ; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
958 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
959 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
960 ; AVX512VL-NEXT: retq
961 %bitcast32 = bitcast <2 x i64> %x to <4 x float>
962 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
963 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
964 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
968 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
969 ; SSE-LABEL: insert_reg_and_zero_v2i64:
971 ; SSE-NEXT: movd %rdi, %xmm0
974 ; AVX-LABEL: insert_reg_and_zero_v2i64:
976 ; AVX-NEXT: vmovq %rdi, %xmm0
978 %v = insertelement <2 x i64> undef, i64 %a, i32 0
979 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
980 ret <2 x i64> %shuffle
983 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
984 ; SSE-LABEL: insert_mem_and_zero_v2i64:
986 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
989 ; AVX1-LABEL: insert_mem_and_zero_v2i64:
991 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
994 ; AVX2-LABEL: insert_mem_and_zero_v2i64:
996 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
999 ; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
1001 ; AVX512VL-NEXT: vmovq (%rdi), %xmm0
1002 ; AVX512VL-NEXT: retq
1003 %a = load i64, i64* %ptr
1004 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1005 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1006 ret <2 x i64> %shuffle
1009 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
1010 ; SSE-LABEL: insert_reg_and_zero_v2f64:
1012 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1015 ; AVX-LABEL: insert_reg_and_zero_v2f64:
1017 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1019 %v = insertelement <2 x double> undef, double %a, i32 0
1020 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1021 ret <2 x double> %shuffle
1024 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
1025 ; SSE-LABEL: insert_mem_and_zero_v2f64:
1027 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1030 ; AVX1-LABEL: insert_mem_and_zero_v2f64:
1032 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1035 ; AVX2-LABEL: insert_mem_and_zero_v2f64:
1037 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1040 ; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
1042 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1043 ; AVX512VL-NEXT: retq
1044 %a = load double, double* %ptr
1045 %v = insertelement <2 x double> undef, double %a, i32 0
1046 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1047 ret <2 x double> %shuffle
1050 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
1051 ; SSE2-LABEL: insert_reg_lo_v2i64:
1053 ; SSE2-NEXT: movd %rdi, %xmm1
1054 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1057 ; SSE3-LABEL: insert_reg_lo_v2i64:
1059 ; SSE3-NEXT: movd %rdi, %xmm1
1060 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1063 ; SSSE3-LABEL: insert_reg_lo_v2i64:
1065 ; SSSE3-NEXT: movd %rdi, %xmm1
1066 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1069 ; SSE41-LABEL: insert_reg_lo_v2i64:
1071 ; SSE41-NEXT: movd %rdi, %xmm1
1072 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1075 ; AVX1-LABEL: insert_reg_lo_v2i64:
1077 ; AVX1-NEXT: vmovq %rdi, %xmm1
1078 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1081 ; AVX2-LABEL: insert_reg_lo_v2i64:
1083 ; AVX2-NEXT: vmovq %rdi, %xmm1
1084 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1087 ; AVX512VL-LABEL: insert_reg_lo_v2i64:
1089 ; AVX512VL-NEXT: vmovq %rdi, %xmm1
1090 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1091 ; AVX512VL-NEXT: retq
1092 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1093 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1094 ret <2 x i64> %shuffle
1097 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
1098 ; SSE2-LABEL: insert_mem_lo_v2i64:
1100 ; SSE2-NEXT: movlpd (%rdi), %xmm0
1103 ; SSE3-LABEL: insert_mem_lo_v2i64:
1105 ; SSE3-NEXT: movlpd (%rdi), %xmm0
1108 ; SSSE3-LABEL: insert_mem_lo_v2i64:
1110 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
1113 ; SSE41-LABEL: insert_mem_lo_v2i64:
1115 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1116 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1119 ; AVX1-LABEL: insert_mem_lo_v2i64:
1121 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1122 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1125 ; AVX2-LABEL: insert_mem_lo_v2i64:
1127 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1128 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1131 ; AVX512VL-LABEL: insert_mem_lo_v2i64:
1133 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1134 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1135 ; AVX512VL-NEXT: retq
1136 %a = load i64, i64* %ptr
1137 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1138 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1139 ret <2 x i64> %shuffle
1142 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1143 ; SSE-LABEL: insert_reg_hi_v2i64:
1145 ; SSE-NEXT: movd %rdi, %xmm1
1146 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1149 ; AVX-LABEL: insert_reg_hi_v2i64:
1151 ; AVX-NEXT: vmovq %rdi, %xmm1
1152 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1154 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1155 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1156 ret <2 x i64> %shuffle
1159 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1160 ; SSE-LABEL: insert_mem_hi_v2i64:
1162 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1163 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1166 ; AVX1-LABEL: insert_mem_hi_v2i64:
1168 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1169 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1172 ; AVX2-LABEL: insert_mem_hi_v2i64:
1174 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1175 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1178 ; AVX512VL-LABEL: insert_mem_hi_v2i64:
1180 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1181 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1182 ; AVX512VL-NEXT: retq
1183 %a = load i64, i64* %ptr
1184 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1185 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1186 ret <2 x i64> %shuffle
1189 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1190 ; SSE-LABEL: insert_reg_lo_v2f64:
1192 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1193 ; SSE-NEXT: movapd %xmm1, %xmm0
1196 ; AVX1-LABEL: insert_reg_lo_v2f64:
1198 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1201 ; AVX2-LABEL: insert_reg_lo_v2f64:
1203 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1206 ; AVX512VL-LABEL: insert_reg_lo_v2f64:
1208 ; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1209 ; AVX512VL-NEXT: retq
1210 %v = insertelement <2 x double> undef, double %a, i32 0
1211 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1212 ret <2 x double> %shuffle
1215 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1216 ; SSE-LABEL: insert_mem_lo_v2f64:
1218 ; SSE-NEXT: movlpd (%rdi), %xmm0
1221 ; AVX-LABEL: insert_mem_lo_v2f64:
1223 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1225 %a = load double, double* %ptr
1226 %v = insertelement <2 x double> undef, double %a, i32 0
1227 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1228 ret <2 x double> %shuffle
1231 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1232 ; SSE-LABEL: insert_reg_hi_v2f64:
1234 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1235 ; SSE-NEXT: movapd %xmm1, %xmm0
1238 ; AVX-LABEL: insert_reg_hi_v2f64:
1240 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1242 %v = insertelement <2 x double> undef, double %a, i32 0
1243 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1244 ret <2 x double> %shuffle
1247 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1248 ; SSE-LABEL: insert_mem_hi_v2f64:
1250 ; SSE-NEXT: movhpd (%rdi), %xmm0
1253 ; AVX-LABEL: insert_mem_hi_v2f64:
1255 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1257 %a = load double, double* %ptr
1258 %v = insertelement <2 x double> undef, double %a, i32 0
1259 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1260 ret <2 x double> %shuffle
1263 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1264 ; SSE2-LABEL: insert_dup_reg_v2f64:
1266 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1269 ; SSE3-LABEL: insert_dup_reg_v2f64:
1271 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1274 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1276 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1279 ; SSE41-LABEL: insert_dup_reg_v2f64:
1281 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1284 ; AVX-LABEL: insert_dup_reg_v2f64:
1286 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1288 %v = insertelement <2 x double> undef, double %a, i32 0
1289 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1290 ret <2 x double> %shuffle
1293 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1294 ; SSE2-LABEL: insert_dup_mem_v2f64:
1296 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1297 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1300 ; SSE3-LABEL: insert_dup_mem_v2f64:
1302 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1305 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1307 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1310 ; SSE41-LABEL: insert_dup_mem_v2f64:
1312 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1315 ; AVX-LABEL: insert_dup_mem_v2f64:
1317 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1319 %a = load double, double* %ptr
1320 %v = insertelement <2 x double> undef, double %a, i32 0
1321 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1322 ret <2 x double> %shuffle
1325 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1326 ; SSE2-LABEL: insert_dup_mem128_v2f64:
1328 ; SSE2-NEXT: movaps (%rdi), %xmm0
1329 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1332 ; SSE3-LABEL: insert_dup_mem128_v2f64:
1334 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1337 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
1339 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1342 ; SSE41-LABEL: insert_dup_mem128_v2f64:
1344 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1347 ; AVX-LABEL: insert_dup_mem128_v2f64:
1349 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1351 %v = load <2 x double>, <2 x double>* %ptr
1352 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1353 ret <2 x double> %shuffle
1357 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1358 ; SSE-LABEL: insert_dup_mem_v2i64:
1360 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1361 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1364 ; AVX1-LABEL: insert_dup_mem_v2i64:
1366 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1367 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1370 ; AVX2-LABEL: insert_dup_mem_v2i64:
1372 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
1375 ; AVX512VL-LABEL: insert_dup_mem_v2i64:
1377 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0
1378 ; AVX512VL-NEXT: retq
1379 %tmp = load i64, i64* %ptr, align 1
1380 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1381 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1385 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1386 ; SSE-LABEL: shuffle_mem_v2f64_10:
1388 ; SSE-NEXT: movapd (%rdi), %xmm0
1389 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1392 ; AVX-LABEL: shuffle_mem_v2f64_10:
1394 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1397 %a = load <2 x double>, <2 x double>* %ptr
1398 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1399 ret <2 x double> %shuffle