1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-unknown-unknown"
12 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
13 ; SSE-LABEL: shuffle_v2i64_00:
15 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
18 ; AVX1-LABEL: shuffle_v2i64_00:
20 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
23 ; AVX2-LABEL: shuffle_v2i64_00:
25 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
28 ; AVX512VL-LABEL: shuffle_v2i64_00:
30 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0
32 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
33 ret <2 x i64> %shuffle
35 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
36 ; SSE-LABEL: shuffle_v2i64_10:
38 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
41 ; AVX-LABEL: shuffle_v2i64_10:
43 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
45 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
46 ret <2 x i64> %shuffle
48 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
49 ; SSE-LABEL: shuffle_v2i64_11:
51 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
54 ; AVX-LABEL: shuffle_v2i64_11:
56 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
58 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
59 ret <2 x i64> %shuffle
61 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
62 ; SSE-LABEL: shuffle_v2i64_22:
64 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
67 ; AVX1-LABEL: shuffle_v2i64_22:
69 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
72 ; AVX2-LABEL: shuffle_v2i64_22:
74 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
77 ; AVX512VL-LABEL: shuffle_v2i64_22:
79 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0
81 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
82 ret <2 x i64> %shuffle
84 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
85 ; SSE-LABEL: shuffle_v2i64_32:
87 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
90 ; AVX-LABEL: shuffle_v2i64_32:
92 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
94 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
95 ret <2 x i64> %shuffle
97 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
98 ; SSE-LABEL: shuffle_v2i64_33:
100 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
103 ; AVX-LABEL: shuffle_v2i64_33:
105 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
107 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
108 ret <2 x i64> %shuffle
111 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
112 ; SSE2-LABEL: shuffle_v2f64_00:
114 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
117 ; SSE3-LABEL: shuffle_v2f64_00:
119 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
122 ; SSSE3-LABEL: shuffle_v2f64_00:
124 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
127 ; SSE41-LABEL: shuffle_v2f64_00:
129 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
132 ; AVX-LABEL: shuffle_v2f64_00:
134 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
136 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
137 ret <2 x double> %shuffle
139 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
140 ; SSE-LABEL: shuffle_v2f64_10:
142 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
145 ; AVX-LABEL: shuffle_v2f64_10:
147 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
150 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
151 ret <2 x double> %shuffle
153 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
154 ; SSE-LABEL: shuffle_v2f64_11:
156 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
159 ; AVX-LABEL: shuffle_v2f64_11:
161 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
163 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
164 ret <2 x double> %shuffle
166 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
167 ; SSE2-LABEL: shuffle_v2f64_22:
169 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
170 ; SSE2-NEXT: movaps %xmm1, %xmm0
173 ; SSE3-LABEL: shuffle_v2f64_22:
175 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
178 ; SSSE3-LABEL: shuffle_v2f64_22:
180 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
183 ; SSE41-LABEL: shuffle_v2f64_22:
185 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
188 ; AVX-LABEL: shuffle_v2f64_22:
190 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
192 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
193 ret <2 x double> %shuffle
195 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
196 ; SSE-LABEL: shuffle_v2f64_32:
198 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
199 ; SSE-NEXT: movapd %xmm1, %xmm0
202 ; AVX-LABEL: shuffle_v2f64_32:
204 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
207 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
208 ret <2 x double> %shuffle
210 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
211 ; SSE-LABEL: shuffle_v2f64_33:
213 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
214 ; SSE-NEXT: movaps %xmm1, %xmm0
217 ; AVX-LABEL: shuffle_v2f64_33:
219 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
221 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
222 ret <2 x double> %shuffle
224 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
225 ; SSE2-LABEL: shuffle_v2f64_03:
227 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
228 ; SSE2-NEXT: movapd %xmm1, %xmm0
231 ; SSE3-LABEL: shuffle_v2f64_03:
233 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
234 ; SSE3-NEXT: movapd %xmm1, %xmm0
237 ; SSSE3-LABEL: shuffle_v2f64_03:
239 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
240 ; SSSE3-NEXT: movapd %xmm1, %xmm0
243 ; SSE41-LABEL: shuffle_v2f64_03:
245 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
248 ; AVX-LABEL: shuffle_v2f64_03:
250 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
252 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
253 ret <2 x double> %shuffle
255 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
256 ; SSE2-LABEL: shuffle_v2f64_21:
258 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
261 ; SSE3-LABEL: shuffle_v2f64_21:
263 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
266 ; SSSE3-LABEL: shuffle_v2f64_21:
268 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
271 ; SSE41-LABEL: shuffle_v2f64_21:
273 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
276 ; AVX-LABEL: shuffle_v2f64_21:
278 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
280 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
281 ret <2 x double> %shuffle
285 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
286 ; SSE-LABEL: shuffle_v2i64_02:
288 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
291 ; AVX-LABEL: shuffle_v2i64_02:
293 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
295 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
296 ret <2 x i64> %shuffle
298 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
299 ; SSE-LABEL: shuffle_v2i64_02_copy:
301 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
302 ; SSE-NEXT: movdqa %xmm1, %xmm0
305 ; AVX-LABEL: shuffle_v2i64_02_copy:
307 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
309 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
310 ret <2 x i64> %shuffle
312 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
313 ; SSE2-LABEL: shuffle_v2i64_03:
315 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
316 ; SSE2-NEXT: movapd %xmm1, %xmm0
319 ; SSE3-LABEL: shuffle_v2i64_03:
321 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
322 ; SSE3-NEXT: movapd %xmm1, %xmm0
325 ; SSSE3-LABEL: shuffle_v2i64_03:
327 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
328 ; SSSE3-NEXT: movapd %xmm1, %xmm0
331 ; SSE41-LABEL: shuffle_v2i64_03:
333 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
336 ; AVX1-LABEL: shuffle_v2i64_03:
338 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
341 ; AVX2-LABEL: shuffle_v2i64_03:
343 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
346 ; AVX512VL-LABEL: shuffle_v2i64_03:
348 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
349 ; AVX512VL-NEXT: retq
350 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
351 ret <2 x i64> %shuffle
353 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
354 ; SSE2-LABEL: shuffle_v2i64_03_copy:
356 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
357 ; SSE2-NEXT: movapd %xmm2, %xmm0
360 ; SSE3-LABEL: shuffle_v2i64_03_copy:
362 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
363 ; SSE3-NEXT: movapd %xmm2, %xmm0
366 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
368 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
369 ; SSSE3-NEXT: movapd %xmm2, %xmm0
372 ; SSE41-LABEL: shuffle_v2i64_03_copy:
374 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
375 ; SSE41-NEXT: movdqa %xmm1, %xmm0
378 ; AVX1-LABEL: shuffle_v2i64_03_copy:
380 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
383 ; AVX2-LABEL: shuffle_v2i64_03_copy:
385 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
388 ; AVX512VL-LABEL: shuffle_v2i64_03_copy:
390 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
391 ; AVX512VL-NEXT: retq
392 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
393 ret <2 x i64> %shuffle
395 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
396 ; SSE2-LABEL: shuffle_v2i64_12:
398 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
401 ; SSE3-LABEL: shuffle_v2i64_12:
403 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
406 ; SSSE3-LABEL: shuffle_v2i64_12:
408 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
409 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
412 ; SSE41-LABEL: shuffle_v2i64_12:
414 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
415 ; SSE41-NEXT: movdqa %xmm1, %xmm0
418 ; AVX-LABEL: shuffle_v2i64_12:
420 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
422 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
423 ret <2 x i64> %shuffle
425 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
426 ; SSE2-LABEL: shuffle_v2i64_12_copy:
428 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
429 ; SSE2-NEXT: movapd %xmm1, %xmm0
432 ; SSE3-LABEL: shuffle_v2i64_12_copy:
434 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
435 ; SSE3-NEXT: movapd %xmm1, %xmm0
438 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
440 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
441 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
444 ; SSE41-LABEL: shuffle_v2i64_12_copy:
446 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
447 ; SSE41-NEXT: movdqa %xmm2, %xmm0
450 ; AVX-LABEL: shuffle_v2i64_12_copy:
452 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
454 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
455 ret <2 x i64> %shuffle
457 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
458 ; SSE-LABEL: shuffle_v2i64_13:
460 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
463 ; AVX-LABEL: shuffle_v2i64_13:
465 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
467 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
468 ret <2 x i64> %shuffle
470 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
471 ; SSE-LABEL: shuffle_v2i64_13_copy:
473 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
474 ; SSE-NEXT: movdqa %xmm1, %xmm0
477 ; AVX-LABEL: shuffle_v2i64_13_copy:
479 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
481 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
482 ret <2 x i64> %shuffle
484 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
485 ; SSE-LABEL: shuffle_v2i64_20:
487 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
488 ; SSE-NEXT: movdqa %xmm1, %xmm0
491 ; AVX-LABEL: shuffle_v2i64_20:
493 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
495 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
496 ret <2 x i64> %shuffle
498 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
499 ; SSE-LABEL: shuffle_v2i64_20_copy:
501 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
502 ; SSE-NEXT: movdqa %xmm2, %xmm0
505 ; AVX-LABEL: shuffle_v2i64_20_copy:
507 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
509 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
510 ret <2 x i64> %shuffle
512 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
513 ; SSE2-LABEL: shuffle_v2i64_21:
515 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
518 ; SSE3-LABEL: shuffle_v2i64_21:
520 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
523 ; SSSE3-LABEL: shuffle_v2i64_21:
525 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
528 ; SSE41-LABEL: shuffle_v2i64_21:
530 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
533 ; AVX1-LABEL: shuffle_v2i64_21:
535 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
538 ; AVX2-LABEL: shuffle_v2i64_21:
540 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
543 ; AVX512VL-LABEL: shuffle_v2i64_21:
545 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
546 ; AVX512VL-NEXT: retq
547 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
548 ret <2 x i64> %shuffle
550 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
551 ; SSE2-LABEL: shuffle_v2i64_21_copy:
553 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
554 ; SSE2-NEXT: movapd %xmm1, %xmm0
557 ; SSE3-LABEL: shuffle_v2i64_21_copy:
559 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
560 ; SSE3-NEXT: movapd %xmm1, %xmm0
563 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
565 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
566 ; SSSE3-NEXT: movapd %xmm1, %xmm0
569 ; SSE41-LABEL: shuffle_v2i64_21_copy:
571 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
572 ; SSE41-NEXT: movdqa %xmm1, %xmm0
575 ; AVX1-LABEL: shuffle_v2i64_21_copy:
577 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
580 ; AVX2-LABEL: shuffle_v2i64_21_copy:
582 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
585 ; AVX512VL-LABEL: shuffle_v2i64_21_copy:
587 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
588 ; AVX512VL-NEXT: retq
589 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
590 ret <2 x i64> %shuffle
592 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
593 ; SSE2-LABEL: shuffle_v2i64_30:
595 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
596 ; SSE2-NEXT: movapd %xmm1, %xmm0
599 ; SSE3-LABEL: shuffle_v2i64_30:
601 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
602 ; SSE3-NEXT: movapd %xmm1, %xmm0
605 ; SSSE3-LABEL: shuffle_v2i64_30:
607 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
610 ; SSE41-LABEL: shuffle_v2i64_30:
612 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
615 ; AVX-LABEL: shuffle_v2i64_30:
617 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
619 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
620 ret <2 x i64> %shuffle
622 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
623 ; SSE2-LABEL: shuffle_v2i64_30_copy:
625 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
626 ; SSE2-NEXT: movapd %xmm2, %xmm0
629 ; SSE3-LABEL: shuffle_v2i64_30_copy:
631 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
632 ; SSE3-NEXT: movapd %xmm2, %xmm0
635 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
637 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
638 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
641 ; SSE41-LABEL: shuffle_v2i64_30_copy:
643 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
644 ; SSE41-NEXT: movdqa %xmm1, %xmm0
647 ; AVX-LABEL: shuffle_v2i64_30_copy:
649 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
651 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
652 ret <2 x i64> %shuffle
654 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
655 ; SSE-LABEL: shuffle_v2i64_31:
657 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
658 ; SSE-NEXT: movdqa %xmm1, %xmm0
661 ; AVX-LABEL: shuffle_v2i64_31:
663 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
665 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
666 ret <2 x i64> %shuffle
668 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
669 ; SSE-LABEL: shuffle_v2i64_31_copy:
671 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
672 ; SSE-NEXT: movdqa %xmm2, %xmm0
675 ; AVX-LABEL: shuffle_v2i64_31_copy:
677 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
679 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
680 ret <2 x i64> %shuffle
683 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
684 ; SSE-LABEL: shuffle_v2i64_0z:
686 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
689 ; AVX1-LABEL: shuffle_v2i64_0z:
691 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
694 ; AVX2-LABEL: shuffle_v2i64_0z:
696 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
699 ; AVX512VL-LABEL: shuffle_v2i64_0z:
701 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
702 ; AVX512VL-NEXT: retq
703 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
704 ret <2 x i64> %shuffle
707 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
708 ; SSE-LABEL: shuffle_v2i64_1z:
710 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
713 ; AVX-LABEL: shuffle_v2i64_1z:
715 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
717 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
718 ret <2 x i64> %shuffle
721 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
722 ; SSE-LABEL: shuffle_v2i64_z0:
724 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
727 ; AVX-LABEL: shuffle_v2i64_z0:
729 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
731 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
732 ret <2 x i64> %shuffle
735 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
736 ; SSE2-LABEL: shuffle_v2i64_z1:
738 ; SSE2-NEXT: xorpd %xmm1, %xmm1
739 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
742 ; SSE3-LABEL: shuffle_v2i64_z1:
744 ; SSE3-NEXT: xorpd %xmm1, %xmm1
745 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
748 ; SSSE3-LABEL: shuffle_v2i64_z1:
750 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
751 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
754 ; SSE41-LABEL: shuffle_v2i64_z1:
756 ; SSE41-NEXT: pxor %xmm1, %xmm1
757 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
760 ; AVX1-LABEL: shuffle_v2i64_z1:
762 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
763 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
766 ; AVX2-LABEL: shuffle_v2i64_z1:
768 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
769 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
772 ; AVX512VL-LABEL: shuffle_v2i64_z1:
774 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
775 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
776 ; AVX512VL-NEXT: retq
777 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
778 ret <2 x i64> %shuffle
781 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
782 ; SSE-LABEL: shuffle_v2f64_0z:
784 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
787 ; AVX1-LABEL: shuffle_v2f64_0z:
789 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
792 ; AVX2-LABEL: shuffle_v2f64_0z:
794 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
797 ; AVX512VL-LABEL: shuffle_v2f64_0z:
799 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
800 ; AVX512VL-NEXT: retq
801 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
802 ret <2 x double> %shuffle
805 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
806 ; SSE-LABEL: shuffle_v2f64_1z:
808 ; SSE-NEXT: xorpd %xmm1, %xmm1
809 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
812 ; AVX1-LABEL: shuffle_v2f64_1z:
814 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
815 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
818 ; AVX2-LABEL: shuffle_v2f64_1z:
820 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
821 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
824 ; AVX512VL-LABEL: shuffle_v2f64_1z:
826 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
827 ; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
828 ; AVX512VL-NEXT: retq
829 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
830 ret <2 x double> %shuffle
833 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
834 ; SSE-LABEL: shuffle_v2f64_z0:
836 ; SSE-NEXT: xorpd %xmm1, %xmm1
837 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
838 ; SSE-NEXT: movapd %xmm1, %xmm0
841 ; AVX1-LABEL: shuffle_v2f64_z0:
843 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
844 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
847 ; AVX2-LABEL: shuffle_v2f64_z0:
849 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
850 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
853 ; AVX512VL-LABEL: shuffle_v2f64_z0:
855 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
856 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
857 ; AVX512VL-NEXT: retq
858 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
859 ret <2 x double> %shuffle
862 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
863 ; SSE2-LABEL: shuffle_v2f64_z1:
865 ; SSE2-NEXT: xorpd %xmm1, %xmm1
866 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
869 ; SSE3-LABEL: shuffle_v2f64_z1:
871 ; SSE3-NEXT: xorpd %xmm1, %xmm1
872 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
875 ; SSSE3-LABEL: shuffle_v2f64_z1:
877 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
878 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
881 ; SSE41-LABEL: shuffle_v2f64_z1:
883 ; SSE41-NEXT: xorpd %xmm1, %xmm1
884 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
887 ; AVX-LABEL: shuffle_v2f64_z1:
889 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
890 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
892 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
893 ret <2 x double> %shuffle
896 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
897 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
899 ; SSE-NEXT: xorpd %xmm1, %xmm1
900 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
903 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
905 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
906 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
909 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
911 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
912 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
915 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
917 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
918 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
919 ; AVX512VL-NEXT: retq
920 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
921 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
922 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
923 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
924 ret <2 x double> %bitcast64
927 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
928 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
930 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
931 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
932 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
935 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
937 ; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
938 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
939 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
942 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
944 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
945 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
946 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
949 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
951 ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
952 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
953 ; SSE41-NEXT: xorps %xmm1, %xmm1
954 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
957 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
959 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
960 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
961 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
962 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
965 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
967 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
968 ; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
969 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
970 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
973 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
975 ; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm1
976 ; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
977 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
978 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
979 ; AVX512VL-NEXT: retq
980 %bitcast32 = bitcast <2 x i64> %x to <4 x float>
981 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
982 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
983 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
987 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
988 ; SSE-LABEL: insert_reg_and_zero_v2i64:
990 ; SSE-NEXT: movd %rdi, %xmm0
993 ; AVX-LABEL: insert_reg_and_zero_v2i64:
995 ; AVX-NEXT: vmovq %rdi, %xmm0
997 %v = insertelement <2 x i64> undef, i64 %a, i32 0
998 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
999 ret <2 x i64> %shuffle
1002 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
1003 ; SSE-LABEL: insert_mem_and_zero_v2i64:
1005 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1008 ; AVX1-LABEL: insert_mem_and_zero_v2i64:
1010 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1013 ; AVX2-LABEL: insert_mem_and_zero_v2i64:
1015 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1018 ; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
1020 ; AVX512VL-NEXT: vmovq (%rdi), %xmm0
1021 ; AVX512VL-NEXT: retq
1022 %a = load i64, i64* %ptr
1023 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1024 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1025 ret <2 x i64> %shuffle
1028 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
1029 ; SSE-LABEL: insert_reg_and_zero_v2f64:
1031 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1034 ; AVX1-LABEL: insert_reg_and_zero_v2f64:
1036 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1039 ; AVX2-LABEL: insert_reg_and_zero_v2f64:
1041 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1044 ; AVX512VL-LABEL: insert_reg_and_zero_v2f64:
1046 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
1047 ; AVX512VL-NEXT: retq
1048 %v = insertelement <2 x double> undef, double %a, i32 0
1049 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1050 ret <2 x double> %shuffle
1053 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
1054 ; SSE-LABEL: insert_mem_and_zero_v2f64:
1056 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1059 ; AVX1-LABEL: insert_mem_and_zero_v2f64:
1061 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1064 ; AVX2-LABEL: insert_mem_and_zero_v2f64:
1066 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1069 ; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
1071 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1072 ; AVX512VL-NEXT: retq
1073 %a = load double, double* %ptr
1074 %v = insertelement <2 x double> undef, double %a, i32 0
1075 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1076 ret <2 x double> %shuffle
1079 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
1080 ; SSE2-LABEL: insert_reg_lo_v2i64:
1082 ; SSE2-NEXT: movd %rdi, %xmm1
1083 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1086 ; SSE3-LABEL: insert_reg_lo_v2i64:
1088 ; SSE3-NEXT: movd %rdi, %xmm1
1089 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1092 ; SSSE3-LABEL: insert_reg_lo_v2i64:
1094 ; SSSE3-NEXT: movd %rdi, %xmm1
1095 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1098 ; SSE41-LABEL: insert_reg_lo_v2i64:
1100 ; SSE41-NEXT: movd %rdi, %xmm1
1101 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1104 ; AVX1-LABEL: insert_reg_lo_v2i64:
1106 ; AVX1-NEXT: vmovq %rdi, %xmm1
1107 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1110 ; AVX2-LABEL: insert_reg_lo_v2i64:
1112 ; AVX2-NEXT: vmovq %rdi, %xmm1
1113 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1116 ; AVX512VL-LABEL: insert_reg_lo_v2i64:
1118 ; AVX512VL-NEXT: vmovq %rdi, %xmm1
1119 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1120 ; AVX512VL-NEXT: retq
1121 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1122 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1123 ret <2 x i64> %shuffle
1126 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
1127 ; SSE2-LABEL: insert_mem_lo_v2i64:
1129 ; SSE2-NEXT: movlpd (%rdi), %xmm0
1132 ; SSE3-LABEL: insert_mem_lo_v2i64:
1134 ; SSE3-NEXT: movlpd (%rdi), %xmm0
1137 ; SSSE3-LABEL: insert_mem_lo_v2i64:
1139 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
1142 ; SSE41-LABEL: insert_mem_lo_v2i64:
1144 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1145 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1148 ; AVX1-LABEL: insert_mem_lo_v2i64:
1150 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1151 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1154 ; AVX2-LABEL: insert_mem_lo_v2i64:
1156 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1157 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1160 ; AVX512VL-LABEL: insert_mem_lo_v2i64:
1162 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1163 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1164 ; AVX512VL-NEXT: retq
1165 %a = load i64, i64* %ptr
1166 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1167 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1168 ret <2 x i64> %shuffle
1171 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1172 ; SSE-LABEL: insert_reg_hi_v2i64:
1174 ; SSE-NEXT: movd %rdi, %xmm1
1175 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1178 ; AVX-LABEL: insert_reg_hi_v2i64:
1180 ; AVX-NEXT: vmovq %rdi, %xmm1
1181 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1183 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1184 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1185 ret <2 x i64> %shuffle
1188 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1189 ; SSE-LABEL: insert_mem_hi_v2i64:
1191 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1192 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1195 ; AVX1-LABEL: insert_mem_hi_v2i64:
1197 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1198 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1201 ; AVX2-LABEL: insert_mem_hi_v2i64:
1203 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1204 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1207 ; AVX512VL-LABEL: insert_mem_hi_v2i64:
1209 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1210 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1211 ; AVX512VL-NEXT: retq
1212 %a = load i64, i64* %ptr
1213 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1214 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1215 ret <2 x i64> %shuffle
1218 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1219 ; SSE-LABEL: insert_reg_lo_v2f64:
1221 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1222 ; SSE-NEXT: movapd %xmm1, %xmm0
1225 ; AVX1-LABEL: insert_reg_lo_v2f64:
1227 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1230 ; AVX2-LABEL: insert_reg_lo_v2f64:
1232 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1235 ; AVX512VL-LABEL: insert_reg_lo_v2f64:
1237 ; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1238 ; AVX512VL-NEXT: retq
1239 %v = insertelement <2 x double> undef, double %a, i32 0
1240 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1241 ret <2 x double> %shuffle
1244 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1245 ; SSE-LABEL: insert_mem_lo_v2f64:
1247 ; SSE-NEXT: movlpd (%rdi), %xmm0
1250 ; AVX-LABEL: insert_mem_lo_v2f64:
1252 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1254 %a = load double, double* %ptr
1255 %v = insertelement <2 x double> undef, double %a, i32 0
1256 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1257 ret <2 x double> %shuffle
1260 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1261 ; SSE-LABEL: insert_reg_hi_v2f64:
1263 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1264 ; SSE-NEXT: movapd %xmm1, %xmm0
1267 ; AVX-LABEL: insert_reg_hi_v2f64:
1269 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1271 %v = insertelement <2 x double> undef, double %a, i32 0
1272 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1273 ret <2 x double> %shuffle
1276 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1277 ; SSE-LABEL: insert_mem_hi_v2f64:
1279 ; SSE-NEXT: movhpd (%rdi), %xmm0
1282 ; AVX-LABEL: insert_mem_hi_v2f64:
1284 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1286 %a = load double, double* %ptr
1287 %v = insertelement <2 x double> undef, double %a, i32 0
1288 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1289 ret <2 x double> %shuffle
1292 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1293 ; SSE2-LABEL: insert_dup_reg_v2f64:
1295 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1298 ; SSE3-LABEL: insert_dup_reg_v2f64:
1300 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1303 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1305 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1308 ; SSE41-LABEL: insert_dup_reg_v2f64:
1310 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1313 ; AVX-LABEL: insert_dup_reg_v2f64:
1315 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1317 %v = insertelement <2 x double> undef, double %a, i32 0
1318 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1319 ret <2 x double> %shuffle
1322 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1323 ; SSE2-LABEL: insert_dup_mem_v2f64:
1325 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1326 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1329 ; SSE3-LABEL: insert_dup_mem_v2f64:
1331 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1334 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1336 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1339 ; SSE41-LABEL: insert_dup_mem_v2f64:
1341 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1344 ; AVX-LABEL: insert_dup_mem_v2f64:
1346 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1348 %a = load double, double* %ptr
1349 %v = insertelement <2 x double> undef, double %a, i32 0
1350 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1351 ret <2 x double> %shuffle
1354 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1355 ; SSE2-LABEL: insert_dup_mem128_v2f64:
1357 ; SSE2-NEXT: movaps (%rdi), %xmm0
1358 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1361 ; SSE3-LABEL: insert_dup_mem128_v2f64:
1363 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1366 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
1368 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1371 ; SSE41-LABEL: insert_dup_mem128_v2f64:
1373 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1376 ; AVX-LABEL: insert_dup_mem128_v2f64:
1378 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1380 %v = load <2 x double>, <2 x double>* %ptr
1381 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1382 ret <2 x double> %shuffle
1386 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1387 ; SSE-LABEL: insert_dup_mem_v2i64:
1389 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1390 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1393 ; AVX1-LABEL: insert_dup_mem_v2i64:
1395 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1396 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1399 ; AVX2-LABEL: insert_dup_mem_v2i64:
1401 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
1404 ; AVX512VL-LABEL: insert_dup_mem_v2i64:
1406 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0
1407 ; AVX512VL-NEXT: retq
1408 %tmp = load i64, i64* %ptr, align 1
1409 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1410 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1414 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1415 ; SSE-LABEL: shuffle_mem_v2f64_10:
1417 ; SSE-NEXT: movapd (%rdi), %xmm0
1418 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1421 ; AVX-LABEL: shuffle_mem_v2f64_10:
1423 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1426 %a = load <2 x double>, <2 x double>* %ptr
1427 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1428 ret <2 x double> %shuffle