1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-unknown-unknown"
12 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
13 ; SSE-LABEL: shuffle_v2i64_00:
15 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
18 ; AVX1-LABEL: shuffle_v2i64_00:
20 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
23 ; AVX2-LABEL: shuffle_v2i64_00:
25 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
28 ; AVX512VL-LABEL: shuffle_v2i64_00:
30 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0
32 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
33 ret <2 x i64> %shuffle
35 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
36 ; SSE-LABEL: shuffle_v2i64_10:
38 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
41 ; AVX-LABEL: shuffle_v2i64_10:
43 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
45 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
46 ret <2 x i64> %shuffle
48 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
49 ; SSE-LABEL: shuffle_v2i64_11:
51 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
54 ; AVX-LABEL: shuffle_v2i64_11:
56 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
58 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
59 ret <2 x i64> %shuffle
61 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
62 ; SSE-LABEL: shuffle_v2i64_22:
64 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
67 ; AVX1-LABEL: shuffle_v2i64_22:
69 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
72 ; AVX2-LABEL: shuffle_v2i64_22:
74 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
77 ; AVX512VL-LABEL: shuffle_v2i64_22:
79 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0
81 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
82 ret <2 x i64> %shuffle
84 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
85 ; SSE-LABEL: shuffle_v2i64_32:
87 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
90 ; AVX-LABEL: shuffle_v2i64_32:
92 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
94 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
95 ret <2 x i64> %shuffle
97 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
98 ; SSE-LABEL: shuffle_v2i64_33:
100 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
103 ; AVX-LABEL: shuffle_v2i64_33:
105 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
107 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
108 ret <2 x i64> %shuffle
111 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
112 ; SSE2-LABEL: shuffle_v2f64_00:
114 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
117 ; SSE3-LABEL: shuffle_v2f64_00:
119 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
122 ; SSSE3-LABEL: shuffle_v2f64_00:
124 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
127 ; SSE41-LABEL: shuffle_v2f64_00:
129 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
132 ; AVX-LABEL: shuffle_v2f64_00:
134 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
136 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
137 ret <2 x double> %shuffle
139 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
140 ; SSE-LABEL: shuffle_v2f64_10:
142 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
145 ; AVX-LABEL: shuffle_v2f64_10:
147 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
150 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
151 ret <2 x double> %shuffle
153 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
154 ; SSE-LABEL: shuffle_v2f64_11:
156 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
159 ; AVX1-LABEL: shuffle_v2f64_11:
161 ; AVX1-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
164 ; AVX2-LABEL: shuffle_v2f64_11:
166 ; AVX2-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
169 ; AVX512VL-LABEL: shuffle_v2f64_11:
171 ; AVX512VL-NEXT: vmovhlps %xmm0, %xmm0, %xmm0
172 ; AVX512VL-NEXT: retq
173 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
174 ret <2 x double> %shuffle
176 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
177 ; SSE2-LABEL: shuffle_v2f64_22:
179 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
180 ; SSE2-NEXT: movaps %xmm1, %xmm0
183 ; SSE3-LABEL: shuffle_v2f64_22:
185 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
188 ; SSSE3-LABEL: shuffle_v2f64_22:
190 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
193 ; SSE41-LABEL: shuffle_v2f64_22:
195 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
198 ; AVX-LABEL: shuffle_v2f64_22:
200 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
202 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
203 ret <2 x double> %shuffle
205 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
206 ; SSE-LABEL: shuffle_v2f64_32:
208 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
209 ; SSE-NEXT: movapd %xmm1, %xmm0
212 ; AVX-LABEL: shuffle_v2f64_32:
214 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
217 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
218 ret <2 x double> %shuffle
220 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
221 ; SSE-LABEL: shuffle_v2f64_33:
223 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
224 ; SSE-NEXT: movaps %xmm1, %xmm0
227 ; AVX1-LABEL: shuffle_v2f64_33:
229 ; AVX1-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
232 ; AVX2-LABEL: shuffle_v2f64_33:
234 ; AVX2-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
237 ; AVX512VL-LABEL: shuffle_v2f64_33:
239 ; AVX512VL-NEXT: vmovhlps %xmm1, %xmm1, %xmm0
240 ; AVX512VL-NEXT: retq
241 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
242 ret <2 x double> %shuffle
244 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
245 ; SSE2-LABEL: shuffle_v2f64_03:
247 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
248 ; SSE2-NEXT: movapd %xmm1, %xmm0
251 ; SSE3-LABEL: shuffle_v2f64_03:
253 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
254 ; SSE3-NEXT: movapd %xmm1, %xmm0
257 ; SSSE3-LABEL: shuffle_v2f64_03:
259 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
260 ; SSSE3-NEXT: movapd %xmm1, %xmm0
263 ; SSE41-LABEL: shuffle_v2f64_03:
265 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
268 ; AVX-LABEL: shuffle_v2f64_03:
270 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
272 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
273 ret <2 x double> %shuffle
275 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
276 ; SSE2-LABEL: shuffle_v2f64_21:
278 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
281 ; SSE3-LABEL: shuffle_v2f64_21:
283 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
286 ; SSSE3-LABEL: shuffle_v2f64_21:
288 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
291 ; SSE41-LABEL: shuffle_v2f64_21:
293 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
296 ; AVX-LABEL: shuffle_v2f64_21:
298 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
300 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
301 ret <2 x double> %shuffle
305 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
306 ; SSE-LABEL: shuffle_v2i64_02:
308 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
311 ; AVX-LABEL: shuffle_v2i64_02:
313 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
315 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
316 ret <2 x i64> %shuffle
318 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
319 ; SSE-LABEL: shuffle_v2i64_02_copy:
321 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
322 ; SSE-NEXT: movdqa %xmm1, %xmm0
325 ; AVX-LABEL: shuffle_v2i64_02_copy:
327 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
329 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
330 ret <2 x i64> %shuffle
332 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
333 ; SSE2-LABEL: shuffle_v2i64_03:
335 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
336 ; SSE2-NEXT: movapd %xmm1, %xmm0
339 ; SSE3-LABEL: shuffle_v2i64_03:
341 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
342 ; SSE3-NEXT: movapd %xmm1, %xmm0
345 ; SSSE3-LABEL: shuffle_v2i64_03:
347 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
348 ; SSSE3-NEXT: movapd %xmm1, %xmm0
351 ; SSE41-LABEL: shuffle_v2i64_03:
353 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
356 ; AVX1-LABEL: shuffle_v2i64_03:
358 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
361 ; AVX2-LABEL: shuffle_v2i64_03:
363 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
366 ; AVX512VL-LABEL: shuffle_v2i64_03:
368 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
369 ; AVX512VL-NEXT: retq
370 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
371 ret <2 x i64> %shuffle
373 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
374 ; SSE2-LABEL: shuffle_v2i64_03_copy:
376 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
377 ; SSE2-NEXT: movapd %xmm2, %xmm0
380 ; SSE3-LABEL: shuffle_v2i64_03_copy:
382 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
383 ; SSE3-NEXT: movapd %xmm2, %xmm0
386 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
388 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
389 ; SSSE3-NEXT: movapd %xmm2, %xmm0
392 ; SSE41-LABEL: shuffle_v2i64_03_copy:
394 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
395 ; SSE41-NEXT: movdqa %xmm1, %xmm0
398 ; AVX1-LABEL: shuffle_v2i64_03_copy:
400 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
403 ; AVX2-LABEL: shuffle_v2i64_03_copy:
405 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
408 ; AVX512VL-LABEL: shuffle_v2i64_03_copy:
410 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
411 ; AVX512VL-NEXT: retq
412 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
413 ret <2 x i64> %shuffle
415 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
416 ; SSE2-LABEL: shuffle_v2i64_12:
418 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
421 ; SSE3-LABEL: shuffle_v2i64_12:
423 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
426 ; SSSE3-LABEL: shuffle_v2i64_12:
428 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
429 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
432 ; SSE41-LABEL: shuffle_v2i64_12:
434 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
435 ; SSE41-NEXT: movdqa %xmm1, %xmm0
438 ; AVX-LABEL: shuffle_v2i64_12:
440 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
442 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
443 ret <2 x i64> %shuffle
445 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
446 ; SSE2-LABEL: shuffle_v2i64_12_copy:
448 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
449 ; SSE2-NEXT: movapd %xmm1, %xmm0
452 ; SSE3-LABEL: shuffle_v2i64_12_copy:
454 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
455 ; SSE3-NEXT: movapd %xmm1, %xmm0
458 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
460 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
461 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
464 ; SSE41-LABEL: shuffle_v2i64_12_copy:
466 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
467 ; SSE41-NEXT: movdqa %xmm2, %xmm0
470 ; AVX-LABEL: shuffle_v2i64_12_copy:
472 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
474 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
475 ret <2 x i64> %shuffle
477 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
478 ; SSE-LABEL: shuffle_v2i64_13:
480 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
483 ; AVX-LABEL: shuffle_v2i64_13:
485 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
487 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
488 ret <2 x i64> %shuffle
490 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
491 ; SSE-LABEL: shuffle_v2i64_13_copy:
493 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
494 ; SSE-NEXT: movdqa %xmm1, %xmm0
497 ; AVX-LABEL: shuffle_v2i64_13_copy:
499 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
501 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
502 ret <2 x i64> %shuffle
504 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
505 ; SSE-LABEL: shuffle_v2i64_20:
507 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
508 ; SSE-NEXT: movdqa %xmm1, %xmm0
511 ; AVX-LABEL: shuffle_v2i64_20:
513 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
515 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
516 ret <2 x i64> %shuffle
518 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
519 ; SSE-LABEL: shuffle_v2i64_20_copy:
521 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
522 ; SSE-NEXT: movdqa %xmm2, %xmm0
525 ; AVX-LABEL: shuffle_v2i64_20_copy:
527 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
529 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
530 ret <2 x i64> %shuffle
532 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
533 ; SSE2-LABEL: shuffle_v2i64_21:
535 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
538 ; SSE3-LABEL: shuffle_v2i64_21:
540 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
543 ; SSSE3-LABEL: shuffle_v2i64_21:
545 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
548 ; SSE41-LABEL: shuffle_v2i64_21:
550 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
553 ; AVX1-LABEL: shuffle_v2i64_21:
555 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
558 ; AVX2-LABEL: shuffle_v2i64_21:
560 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
563 ; AVX512VL-LABEL: shuffle_v2i64_21:
565 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
566 ; AVX512VL-NEXT: retq
567 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
568 ret <2 x i64> %shuffle
570 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
571 ; SSE2-LABEL: shuffle_v2i64_21_copy:
573 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
574 ; SSE2-NEXT: movapd %xmm1, %xmm0
577 ; SSE3-LABEL: shuffle_v2i64_21_copy:
579 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
580 ; SSE3-NEXT: movapd %xmm1, %xmm0
583 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
585 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
586 ; SSSE3-NEXT: movapd %xmm1, %xmm0
589 ; SSE41-LABEL: shuffle_v2i64_21_copy:
591 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
592 ; SSE41-NEXT: movdqa %xmm1, %xmm0
595 ; AVX1-LABEL: shuffle_v2i64_21_copy:
597 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
600 ; AVX2-LABEL: shuffle_v2i64_21_copy:
602 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
605 ; AVX512VL-LABEL: shuffle_v2i64_21_copy:
607 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
608 ; AVX512VL-NEXT: retq
609 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
610 ret <2 x i64> %shuffle
612 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
613 ; SSE2-LABEL: shuffle_v2i64_30:
615 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
616 ; SSE2-NEXT: movapd %xmm1, %xmm0
619 ; SSE3-LABEL: shuffle_v2i64_30:
621 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
622 ; SSE3-NEXT: movapd %xmm1, %xmm0
625 ; SSSE3-LABEL: shuffle_v2i64_30:
627 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
630 ; SSE41-LABEL: shuffle_v2i64_30:
632 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
635 ; AVX-LABEL: shuffle_v2i64_30:
637 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
639 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
640 ret <2 x i64> %shuffle
642 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
643 ; SSE2-LABEL: shuffle_v2i64_30_copy:
645 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
646 ; SSE2-NEXT: movapd %xmm2, %xmm0
649 ; SSE3-LABEL: shuffle_v2i64_30_copy:
651 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
652 ; SSE3-NEXT: movapd %xmm2, %xmm0
655 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
657 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
658 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
661 ; SSE41-LABEL: shuffle_v2i64_30_copy:
663 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
664 ; SSE41-NEXT: movdqa %xmm1, %xmm0
667 ; AVX-LABEL: shuffle_v2i64_30_copy:
669 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
671 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
672 ret <2 x i64> %shuffle
674 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
675 ; SSE-LABEL: shuffle_v2i64_31:
677 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
678 ; SSE-NEXT: movdqa %xmm1, %xmm0
681 ; AVX-LABEL: shuffle_v2i64_31:
683 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
685 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
686 ret <2 x i64> %shuffle
688 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
689 ; SSE-LABEL: shuffle_v2i64_31_copy:
691 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
692 ; SSE-NEXT: movdqa %xmm2, %xmm0
695 ; AVX-LABEL: shuffle_v2i64_31_copy:
697 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
699 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
700 ret <2 x i64> %shuffle
703 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
704 ; SSE-LABEL: shuffle_v2i64_0z:
706 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
709 ; AVX1-LABEL: shuffle_v2i64_0z:
711 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
714 ; AVX2-LABEL: shuffle_v2i64_0z:
716 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
719 ; AVX512VL-LABEL: shuffle_v2i64_0z:
721 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
722 ; AVX512VL-NEXT: retq
723 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
724 ret <2 x i64> %shuffle
727 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
728 ; SSE-LABEL: shuffle_v2i64_1z:
730 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
733 ; AVX-LABEL: shuffle_v2i64_1z:
735 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
737 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
738 ret <2 x i64> %shuffle
741 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
742 ; SSE-LABEL: shuffle_v2i64_z0:
744 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
747 ; AVX-LABEL: shuffle_v2i64_z0:
749 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
751 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
752 ret <2 x i64> %shuffle
755 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
756 ; SSE2-LABEL: shuffle_v2i64_z1:
758 ; SSE2-NEXT: xorpd %xmm1, %xmm1
759 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
762 ; SSE3-LABEL: shuffle_v2i64_z1:
764 ; SSE3-NEXT: xorpd %xmm1, %xmm1
765 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
768 ; SSSE3-LABEL: shuffle_v2i64_z1:
770 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
771 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
774 ; SSE41-LABEL: shuffle_v2i64_z1:
776 ; SSE41-NEXT: pxor %xmm1, %xmm1
777 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
780 ; AVX1-LABEL: shuffle_v2i64_z1:
782 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
783 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
786 ; AVX2-LABEL: shuffle_v2i64_z1:
788 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
789 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
792 ; AVX512VL-LABEL: shuffle_v2i64_z1:
794 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
795 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
796 ; AVX512VL-NEXT: retq
797 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
798 ret <2 x i64> %shuffle
801 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
802 ; SSE-LABEL: shuffle_v2f64_0z:
804 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
807 ; AVX1-LABEL: shuffle_v2f64_0z:
809 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
812 ; AVX2-LABEL: shuffle_v2f64_0z:
814 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
817 ; AVX512VL-LABEL: shuffle_v2f64_0z:
819 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
820 ; AVX512VL-NEXT: retq
821 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
822 ret <2 x double> %shuffle
825 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
826 ; SSE-LABEL: shuffle_v2f64_1z:
828 ; SSE-NEXT: xorpd %xmm1, %xmm1
829 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
832 ; AVX1-LABEL: shuffle_v2f64_1z:
834 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
835 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
838 ; AVX2-LABEL: shuffle_v2f64_1z:
840 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
841 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
844 ; AVX512VL-LABEL: shuffle_v2f64_1z:
846 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
847 ; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
848 ; AVX512VL-NEXT: retq
849 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
850 ret <2 x double> %shuffle
853 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
854 ; SSE-LABEL: shuffle_v2f64_z0:
856 ; SSE-NEXT: xorpd %xmm1, %xmm1
857 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
858 ; SSE-NEXT: movapd %xmm1, %xmm0
861 ; AVX1-LABEL: shuffle_v2f64_z0:
863 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
864 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
867 ; AVX2-LABEL: shuffle_v2f64_z0:
869 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
870 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
873 ; AVX512VL-LABEL: shuffle_v2f64_z0:
875 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
876 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
877 ; AVX512VL-NEXT: retq
878 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
879 ret <2 x double> %shuffle
882 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
883 ; SSE2-LABEL: shuffle_v2f64_z1:
885 ; SSE2-NEXT: xorpd %xmm1, %xmm1
886 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
889 ; SSE3-LABEL: shuffle_v2f64_z1:
891 ; SSE3-NEXT: xorpd %xmm1, %xmm1
892 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
895 ; SSSE3-LABEL: shuffle_v2f64_z1:
897 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
898 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
901 ; SSE41-LABEL: shuffle_v2f64_z1:
903 ; SSE41-NEXT: xorpd %xmm1, %xmm1
904 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
907 ; AVX-LABEL: shuffle_v2f64_z1:
909 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
910 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
912 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
913 ret <2 x double> %shuffle
916 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
917 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
919 ; SSE-NEXT: xorpd %xmm1, %xmm1
920 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
923 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
925 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
926 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
929 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
931 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
932 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
935 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
937 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
938 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
939 ; AVX512VL-NEXT: retq
940 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
941 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
942 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
943 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
944 ret <2 x double> %bitcast64
947 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
948 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
950 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
951 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
952 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
955 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
957 ; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
958 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
959 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
962 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
964 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
965 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
966 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
969 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
971 ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
972 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
973 ; SSE41-NEXT: xorps %xmm1, %xmm1
974 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
977 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
979 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
980 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
981 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
982 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
985 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
987 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
988 ; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
989 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
990 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
993 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
995 ; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm1
996 ; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
997 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
998 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
999 ; AVX512VL-NEXT: retq
1000 %bitcast32 = bitcast <2 x i64> %x to <4 x float>
1001 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1002 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
1003 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
1007 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
1008 ; SSE-LABEL: insert_reg_and_zero_v2i64:
1010 ; SSE-NEXT: movd %rdi, %xmm0
1013 ; AVX-LABEL: insert_reg_and_zero_v2i64:
1015 ; AVX-NEXT: vmovq %rdi, %xmm0
1017 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1018 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1019 ret <2 x i64> %shuffle
1022 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
1023 ; SSE-LABEL: insert_mem_and_zero_v2i64:
1025 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1028 ; AVX1-LABEL: insert_mem_and_zero_v2i64:
1030 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1033 ; AVX2-LABEL: insert_mem_and_zero_v2i64:
1035 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1038 ; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
1040 ; AVX512VL-NEXT: vmovq (%rdi), %xmm0
1041 ; AVX512VL-NEXT: retq
1042 %a = load i64, i64* %ptr
1043 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1044 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1045 ret <2 x i64> %shuffle
1048 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
1049 ; SSE-LABEL: insert_reg_and_zero_v2f64:
1051 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1054 ; AVX1-LABEL: insert_reg_and_zero_v2f64:
1056 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1059 ; AVX2-LABEL: insert_reg_and_zero_v2f64:
1061 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1064 ; AVX512VL-LABEL: insert_reg_and_zero_v2f64:
1066 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
1067 ; AVX512VL-NEXT: retq
1068 %v = insertelement <2 x double> undef, double %a, i32 0
1069 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1070 ret <2 x double> %shuffle
1073 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
1074 ; SSE-LABEL: insert_mem_and_zero_v2f64:
1076 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1079 ; AVX1-LABEL: insert_mem_and_zero_v2f64:
1081 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1084 ; AVX2-LABEL: insert_mem_and_zero_v2f64:
1086 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1089 ; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
1091 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1092 ; AVX512VL-NEXT: retq
1093 %a = load double, double* %ptr
1094 %v = insertelement <2 x double> undef, double %a, i32 0
1095 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1096 ret <2 x double> %shuffle
1099 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
1100 ; SSE2-LABEL: insert_reg_lo_v2i64:
1102 ; SSE2-NEXT: movd %rdi, %xmm1
1103 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1106 ; SSE3-LABEL: insert_reg_lo_v2i64:
1108 ; SSE3-NEXT: movd %rdi, %xmm1
1109 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1112 ; SSSE3-LABEL: insert_reg_lo_v2i64:
1114 ; SSSE3-NEXT: movd %rdi, %xmm1
1115 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1118 ; SSE41-LABEL: insert_reg_lo_v2i64:
1120 ; SSE41-NEXT: movd %rdi, %xmm1
1121 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1124 ; AVX1-LABEL: insert_reg_lo_v2i64:
1126 ; AVX1-NEXT: vmovq %rdi, %xmm1
1127 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1130 ; AVX2-LABEL: insert_reg_lo_v2i64:
1132 ; AVX2-NEXT: vmovq %rdi, %xmm1
1133 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1136 ; AVX512VL-LABEL: insert_reg_lo_v2i64:
1138 ; AVX512VL-NEXT: vmovq %rdi, %xmm1
1139 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1140 ; AVX512VL-NEXT: retq
1141 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1142 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1143 ret <2 x i64> %shuffle
1146 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
1147 ; SSE2-LABEL: insert_mem_lo_v2i64:
1149 ; SSE2-NEXT: movlpd (%rdi), %xmm0
1152 ; SSE3-LABEL: insert_mem_lo_v2i64:
1154 ; SSE3-NEXT: movlpd (%rdi), %xmm0
1157 ; SSSE3-LABEL: insert_mem_lo_v2i64:
1159 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
1162 ; SSE41-LABEL: insert_mem_lo_v2i64:
1164 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1165 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1168 ; AVX1-LABEL: insert_mem_lo_v2i64:
1170 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1171 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1174 ; AVX2-LABEL: insert_mem_lo_v2i64:
1176 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1177 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1180 ; AVX512VL-LABEL: insert_mem_lo_v2i64:
1182 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1183 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1184 ; AVX512VL-NEXT: retq
1185 %a = load i64, i64* %ptr
1186 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1187 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1188 ret <2 x i64> %shuffle
1191 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1192 ; SSE-LABEL: insert_reg_hi_v2i64:
1194 ; SSE-NEXT: movd %rdi, %xmm1
1195 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1198 ; AVX-LABEL: insert_reg_hi_v2i64:
1200 ; AVX-NEXT: vmovq %rdi, %xmm1
1201 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1203 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1204 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1205 ret <2 x i64> %shuffle
1208 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1209 ; SSE-LABEL: insert_mem_hi_v2i64:
1211 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1212 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1215 ; AVX1-LABEL: insert_mem_hi_v2i64:
1217 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1218 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1221 ; AVX2-LABEL: insert_mem_hi_v2i64:
1223 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1224 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1227 ; AVX512VL-LABEL: insert_mem_hi_v2i64:
1229 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1230 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1231 ; AVX512VL-NEXT: retq
1232 %a = load i64, i64* %ptr
1233 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1234 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1235 ret <2 x i64> %shuffle
1238 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1239 ; SSE-LABEL: insert_reg_lo_v2f64:
1241 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1242 ; SSE-NEXT: movapd %xmm1, %xmm0
1245 ; AVX1-LABEL: insert_reg_lo_v2f64:
1247 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1250 ; AVX2-LABEL: insert_reg_lo_v2f64:
1252 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1255 ; AVX512VL-LABEL: insert_reg_lo_v2f64:
1257 ; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1258 ; AVX512VL-NEXT: retq
1259 %v = insertelement <2 x double> undef, double %a, i32 0
1260 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1261 ret <2 x double> %shuffle
1264 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1265 ; SSE-LABEL: insert_mem_lo_v2f64:
1267 ; SSE-NEXT: movlpd (%rdi), %xmm0
1270 ; AVX-LABEL: insert_mem_lo_v2f64:
1272 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1274 %a = load double, double* %ptr
1275 %v = insertelement <2 x double> undef, double %a, i32 0
1276 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1277 ret <2 x double> %shuffle
1280 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1281 ; SSE-LABEL: insert_reg_hi_v2f64:
1283 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1284 ; SSE-NEXT: movapd %xmm1, %xmm0
1287 ; AVX-LABEL: insert_reg_hi_v2f64:
1289 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1291 %v = insertelement <2 x double> undef, double %a, i32 0
1292 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1293 ret <2 x double> %shuffle
1296 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1297 ; SSE-LABEL: insert_mem_hi_v2f64:
1299 ; SSE-NEXT: movhpd (%rdi), %xmm0
1302 ; AVX-LABEL: insert_mem_hi_v2f64:
1304 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1306 %a = load double, double* %ptr
1307 %v = insertelement <2 x double> undef, double %a, i32 0
1308 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1309 ret <2 x double> %shuffle
1312 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1313 ; SSE2-LABEL: insert_dup_reg_v2f64:
1315 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1318 ; SSE3-LABEL: insert_dup_reg_v2f64:
1320 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1323 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1325 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1328 ; SSE41-LABEL: insert_dup_reg_v2f64:
1330 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1333 ; AVX-LABEL: insert_dup_reg_v2f64:
1335 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1337 %v = insertelement <2 x double> undef, double %a, i32 0
1338 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1339 ret <2 x double> %shuffle
1342 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1343 ; SSE2-LABEL: insert_dup_mem_v2f64:
1345 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1346 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1349 ; SSE3-LABEL: insert_dup_mem_v2f64:
1351 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1354 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1356 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1359 ; SSE41-LABEL: insert_dup_mem_v2f64:
1361 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1364 ; AVX-LABEL: insert_dup_mem_v2f64:
1366 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1368 %a = load double, double* %ptr
1369 %v = insertelement <2 x double> undef, double %a, i32 0
1370 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1371 ret <2 x double> %shuffle
1374 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
1375 ; SSE2-LABEL: insert_dup_mem128_v2f64:
1377 ; SSE2-NEXT: movaps (%rdi), %xmm0
1378 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1381 ; SSE3-LABEL: insert_dup_mem128_v2f64:
1383 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1386 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
1388 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1391 ; SSE41-LABEL: insert_dup_mem128_v2f64:
1393 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1396 ; AVX-LABEL: insert_dup_mem128_v2f64:
1398 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1400 %v = load <2 x double>, <2 x double>* %ptr
1401 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1402 ret <2 x double> %shuffle
1406 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1407 ; SSE-LABEL: insert_dup_mem_v2i64:
1409 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1410 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1413 ; AVX1-LABEL: insert_dup_mem_v2i64:
1415 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1416 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1419 ; AVX2-LABEL: insert_dup_mem_v2i64:
1421 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
1424 ; AVX512VL-LABEL: insert_dup_mem_v2i64:
1426 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0
1427 ; AVX512VL-NEXT: retq
1428 %tmp = load i64, i64* %ptr, align 1
1429 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1430 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1434 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1435 ; SSE-LABEL: shuffle_mem_v2f64_10:
1437 ; SSE-NEXT: movapd (%rdi), %xmm0
1438 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1441 ; AVX-LABEL: shuffle_mem_v2f64_10:
1443 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1446 %a = load <2 x double>, <2 x double>* %ptr
1447 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1448 ret <2 x double> %shuffle