1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9 target triple = "x86_64-unknown-unknown"
11 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
12 ; SSE-LABEL: shuffle_v2i64_00:
14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
17 ; AVX1-LABEL: shuffle_v2i64_00:
19 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
22 ; AVX2-LABEL: shuffle_v2i64_00:
24 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
26 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
27 ret <2 x i64> %shuffle
29 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
30 ; SSE-LABEL: shuffle_v2i64_10:
32 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
35 ; AVX-LABEL: shuffle_v2i64_10:
37 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
39 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
40 ret <2 x i64> %shuffle
42 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
43 ; SSE-LABEL: shuffle_v2i64_11:
45 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
48 ; AVX-LABEL: shuffle_v2i64_11:
50 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
52 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
53 ret <2 x i64> %shuffle
55 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
56 ; SSE-LABEL: shuffle_v2i64_22:
58 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
61 ; AVX1-LABEL: shuffle_v2i64_22:
63 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
66 ; AVX2-LABEL: shuffle_v2i64_22:
68 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
70 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
71 ret <2 x i64> %shuffle
73 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
74 ; SSE-LABEL: shuffle_v2i64_32:
76 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
79 ; AVX-LABEL: shuffle_v2i64_32:
81 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
83 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
84 ret <2 x i64> %shuffle
86 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
87 ; SSE-LABEL: shuffle_v2i64_33:
89 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
92 ; AVX-LABEL: shuffle_v2i64_33:
94 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
96 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
97 ret <2 x i64> %shuffle
100 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
101 ; SSE2-LABEL: shuffle_v2f64_00:
103 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
106 ; SSE3-LABEL: shuffle_v2f64_00:
108 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
111 ; SSSE3-LABEL: shuffle_v2f64_00:
113 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
116 ; SSE41-LABEL: shuffle_v2f64_00:
118 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
121 ; AVX-LABEL: shuffle_v2f64_00:
123 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
125 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
126 ret <2 x double> %shuffle
128 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
129 ; SSE-LABEL: shuffle_v2f64_10:
131 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
134 ; AVX-LABEL: shuffle_v2f64_10:
136 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
138 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
139 ret <2 x double> %shuffle
141 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
142 ; SSE-LABEL: shuffle_v2f64_11:
144 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
147 ; AVX-LABEL: shuffle_v2f64_11:
149 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
152 ret <2 x double> %shuffle
154 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
155 ; SSE2-LABEL: shuffle_v2f64_22:
157 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
158 ; SSE2-NEXT: movaps %xmm1, %xmm0
161 ; SSE3-LABEL: shuffle_v2f64_22:
163 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
164 ; SSE3-NEXT: movapd %xmm1, %xmm0
167 ; SSSE3-LABEL: shuffle_v2f64_22:
169 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
170 ; SSSE3-NEXT: movapd %xmm1, %xmm0
173 ; SSE41-LABEL: shuffle_v2f64_22:
175 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
176 ; SSE41-NEXT: movapd %xmm1, %xmm0
179 ; AVX-LABEL: shuffle_v2f64_22:
181 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
183 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
184 ret <2 x double> %shuffle
186 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
187 ; SSE-LABEL: shuffle_v2f64_32:
189 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
192 ; AVX-LABEL: shuffle_v2f64_32:
194 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
196 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
197 ret <2 x double> %shuffle
199 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
200 ; SSE-LABEL: shuffle_v2f64_33:
202 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
203 ; SSE-NEXT: movaps %xmm1, %xmm0
206 ; AVX-LABEL: shuffle_v2f64_33:
208 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
210 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
211 ret <2 x double> %shuffle
213 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
214 ; SSE2-LABEL: shuffle_v2f64_03:
216 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
219 ; SSE3-LABEL: shuffle_v2f64_03:
221 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
224 ; SSSE3-LABEL: shuffle_v2f64_03:
226 ; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
229 ; SSE41-LABEL: shuffle_v2f64_03:
231 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
234 ; AVX-LABEL: shuffle_v2f64_03:
236 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
238 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
239 ret <2 x double> %shuffle
241 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
242 ; SSE2-LABEL: shuffle_v2f64_21:
244 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
245 ; SSE2-NEXT: movapd %xmm1, %xmm0
248 ; SSE3-LABEL: shuffle_v2f64_21:
250 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
251 ; SSE3-NEXT: movapd %xmm1, %xmm0
254 ; SSSE3-LABEL: shuffle_v2f64_21:
256 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
257 ; SSSE3-NEXT: movapd %xmm1, %xmm0
260 ; SSE41-LABEL: shuffle_v2f64_21:
262 ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
263 ; SSE41-NEXT: movapd %xmm1, %xmm0
266 ; AVX-LABEL: shuffle_v2f64_21:
268 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
270 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
271 ret <2 x double> %shuffle
275 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
276 ; SSE-LABEL: shuffle_v2i64_02:
278 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
281 ; AVX-LABEL: shuffle_v2i64_02:
283 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
285 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
286 ret <2 x i64> %shuffle
288 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
289 ; SSE-LABEL: shuffle_v2i64_02_copy:
291 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
292 ; SSE-NEXT: movdqa %xmm1, %xmm0
295 ; AVX-LABEL: shuffle_v2i64_02_copy:
297 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
299 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
300 ret <2 x i64> %shuffle
302 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
303 ; SSE2-LABEL: shuffle_v2i64_03:
305 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
308 ; SSE3-LABEL: shuffle_v2i64_03:
310 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
313 ; SSSE3-LABEL: shuffle_v2i64_03:
315 ; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
318 ; SSE41-LABEL: shuffle_v2i64_03:
320 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
323 ; AVX1-LABEL: shuffle_v2i64_03:
325 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
328 ; AVX2-LABEL: shuffle_v2i64_03:
330 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
332 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
333 ret <2 x i64> %shuffle
335 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
336 ; SSE2-LABEL: shuffle_v2i64_03_copy:
338 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
339 ; SSE2-NEXT: movapd %xmm1, %xmm0
342 ; SSE3-LABEL: shuffle_v2i64_03_copy:
344 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
345 ; SSE3-NEXT: movapd %xmm1, %xmm0
348 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
350 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
351 ; SSSE3-NEXT: movapd %xmm1, %xmm0
354 ; SSE41-LABEL: shuffle_v2i64_03_copy:
356 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
357 ; SSE41-NEXT: movdqa %xmm1, %xmm0
360 ; AVX1-LABEL: shuffle_v2i64_03_copy:
362 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
365 ; AVX2-LABEL: shuffle_v2i64_03_copy:
367 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
369 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
370 ret <2 x i64> %shuffle
372 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
373 ; SSE2-LABEL: shuffle_v2i64_12:
375 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
378 ; SSE3-LABEL: shuffle_v2i64_12:
380 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
383 ; SSSE3-LABEL: shuffle_v2i64_12:
385 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
386 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
389 ; SSE41-LABEL: shuffle_v2i64_12:
391 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
392 ; SSE41-NEXT: movdqa %xmm1, %xmm0
395 ; AVX-LABEL: shuffle_v2i64_12:
397 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
399 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
400 ret <2 x i64> %shuffle
402 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
403 ; SSE2-LABEL: shuffle_v2i64_12_copy:
405 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
406 ; SSE2-NEXT: movapd %xmm1, %xmm0
409 ; SSE3-LABEL: shuffle_v2i64_12_copy:
411 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
412 ; SSE3-NEXT: movapd %xmm1, %xmm0
415 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
417 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
418 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
421 ; SSE41-LABEL: shuffle_v2i64_12_copy:
423 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
424 ; SSE41-NEXT: movdqa %xmm2, %xmm0
427 ; AVX-LABEL: shuffle_v2i64_12_copy:
429 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
431 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
432 ret <2 x i64> %shuffle
434 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
435 ; SSE-LABEL: shuffle_v2i64_13:
437 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
440 ; AVX-LABEL: shuffle_v2i64_13:
442 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
444 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
445 ret <2 x i64> %shuffle
447 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
448 ; SSE-LABEL: shuffle_v2i64_13_copy:
450 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
451 ; SSE-NEXT: movdqa %xmm1, %xmm0
454 ; AVX-LABEL: shuffle_v2i64_13_copy:
456 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
458 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
459 ret <2 x i64> %shuffle
461 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
462 ; SSE-LABEL: shuffle_v2i64_20:
464 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
465 ; SSE-NEXT: movdqa %xmm1, %xmm0
468 ; AVX-LABEL: shuffle_v2i64_20:
470 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
472 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
473 ret <2 x i64> %shuffle
475 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
476 ; SSE-LABEL: shuffle_v2i64_20_copy:
478 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
479 ; SSE-NEXT: movdqa %xmm2, %xmm0
482 ; AVX-LABEL: shuffle_v2i64_20_copy:
484 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
486 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
487 ret <2 x i64> %shuffle
489 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
490 ; SSE2-LABEL: shuffle_v2i64_21:
492 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
493 ; SSE2-NEXT: movapd %xmm1, %xmm0
496 ; SSE3-LABEL: shuffle_v2i64_21:
498 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
499 ; SSE3-NEXT: movapd %xmm1, %xmm0
502 ; SSSE3-LABEL: shuffle_v2i64_21:
504 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
505 ; SSSE3-NEXT: movapd %xmm1, %xmm0
508 ; SSE41-LABEL: shuffle_v2i64_21:
510 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
511 ; SSE41-NEXT: movdqa %xmm1, %xmm0
514 ; AVX1-LABEL: shuffle_v2i64_21:
516 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
519 ; AVX2-LABEL: shuffle_v2i64_21:
521 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
523 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
524 ret <2 x i64> %shuffle
526 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
527 ; SSE2-LABEL: shuffle_v2i64_21_copy:
529 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
530 ; SSE2-NEXT: movapd %xmm2, %xmm0
533 ; SSE3-LABEL: shuffle_v2i64_21_copy:
535 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
536 ; SSE3-NEXT: movapd %xmm2, %xmm0
539 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
541 ; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
542 ; SSSE3-NEXT: movapd %xmm2, %xmm0
545 ; SSE41-LABEL: shuffle_v2i64_21_copy:
547 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
548 ; SSE41-NEXT: movdqa %xmm2, %xmm0
551 ; AVX1-LABEL: shuffle_v2i64_21_copy:
553 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
556 ; AVX2-LABEL: shuffle_v2i64_21_copy:
558 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
560 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
561 ret <2 x i64> %shuffle
563 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
564 ; SSE2-LABEL: shuffle_v2i64_30:
566 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
567 ; SSE2-NEXT: movapd %xmm1, %xmm0
570 ; SSE3-LABEL: shuffle_v2i64_30:
572 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
573 ; SSE3-NEXT: movapd %xmm1, %xmm0
576 ; SSSE3-LABEL: shuffle_v2i64_30:
578 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
581 ; SSE41-LABEL: shuffle_v2i64_30:
583 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
586 ; AVX-LABEL: shuffle_v2i64_30:
588 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
590 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
591 ret <2 x i64> %shuffle
593 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
594 ; SSE2-LABEL: shuffle_v2i64_30_copy:
596 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
597 ; SSE2-NEXT: movapd %xmm2, %xmm0
600 ; SSE3-LABEL: shuffle_v2i64_30_copy:
602 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
603 ; SSE3-NEXT: movapd %xmm2, %xmm0
606 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
608 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
609 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
612 ; SSE41-LABEL: shuffle_v2i64_30_copy:
614 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
615 ; SSE41-NEXT: movdqa %xmm1, %xmm0
618 ; AVX-LABEL: shuffle_v2i64_30_copy:
620 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
622 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
623 ret <2 x i64> %shuffle
625 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
626 ; SSE-LABEL: shuffle_v2i64_31:
628 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
629 ; SSE-NEXT: movdqa %xmm1, %xmm0
632 ; AVX-LABEL: shuffle_v2i64_31:
634 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
636 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
637 ret <2 x i64> %shuffle
639 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
640 ; SSE-LABEL: shuffle_v2i64_31_copy:
642 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
643 ; SSE-NEXT: movdqa %xmm2, %xmm0
646 ; AVX-LABEL: shuffle_v2i64_31_copy:
648 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
650 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
651 ret <2 x i64> %shuffle
654 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
655 ; SSE2-LABEL: shuffle_v2i64_0z:
657 ; SSE2-NEXT: xorpd %xmm1, %xmm1
658 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
661 ; SSE3-LABEL: shuffle_v2i64_0z:
663 ; SSE3-NEXT: xorpd %xmm1, %xmm1
664 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
667 ; SSSE3-LABEL: shuffle_v2i64_0z:
669 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
670 ; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
673 ; SSE41-LABEL: shuffle_v2i64_0z:
675 ; SSE41-NEXT: pxor %xmm1, %xmm1
676 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
679 ; AVX1-LABEL: shuffle_v2i64_0z:
681 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
682 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
685 ; AVX2-LABEL: shuffle_v2i64_0z:
687 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
688 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
690 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
691 ret <2 x i64> %shuffle
694 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
695 ; SSE-LABEL: shuffle_v2i64_1z:
697 ; SSE-NEXT: pxor %xmm1, %xmm1
698 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
701 ; AVX-LABEL: shuffle_v2i64_1z:
703 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
704 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
706 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
707 ret <2 x i64> %shuffle
710 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
711 ; SSE-LABEL: shuffle_v2i64_z0:
713 ; SSE-NEXT: pxor %xmm1, %xmm1
714 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
715 ; SSE-NEXT: movdqa %xmm1, %xmm0
718 ; AVX-LABEL: shuffle_v2i64_z0:
720 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
721 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
723 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
724 ret <2 x i64> %shuffle
727 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
728 ; SSE2-LABEL: shuffle_v2i64_z1:
730 ; SSE2-NEXT: xorpd %xmm1, %xmm1
731 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
732 ; SSE2-NEXT: movapd %xmm1, %xmm0
735 ; SSE3-LABEL: shuffle_v2i64_z1:
737 ; SSE3-NEXT: xorpd %xmm1, %xmm1
738 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
739 ; SSE3-NEXT: movapd %xmm1, %xmm0
742 ; SSSE3-LABEL: shuffle_v2i64_z1:
744 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
745 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
746 ; SSSE3-NEXT: movapd %xmm1, %xmm0
749 ; SSE41-LABEL: shuffle_v2i64_z1:
751 ; SSE41-NEXT: pxor %xmm1, %xmm1
752 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
753 ; SSE41-NEXT: movdqa %xmm1, %xmm0
756 ; AVX1-LABEL: shuffle_v2i64_z1:
758 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
759 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
762 ; AVX2-LABEL: shuffle_v2i64_z1:
764 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
765 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
767 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
768 ret <2 x i64> %shuffle
771 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
772 ; SSE2-LABEL: shuffle_v2f64_0z:
774 ; SSE2-NEXT: xorpd %xmm1, %xmm1
775 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
778 ; SSE3-LABEL: shuffle_v2f64_0z:
780 ; SSE3-NEXT: xorpd %xmm1, %xmm1
781 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
784 ; SSSE3-LABEL: shuffle_v2f64_0z:
786 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
787 ; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
790 ; SSE41-LABEL: shuffle_v2f64_0z:
792 ; SSE41-NEXT: xorpd %xmm1, %xmm1
793 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
796 ; AVX-LABEL: shuffle_v2f64_0z:
798 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
799 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
801 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
802 ret <2 x double> %shuffle
805 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
806 ; SSE-LABEL: shuffle_v2f64_1z:
808 ; SSE-NEXT: xorpd %xmm1, %xmm1
809 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
812 ; AVX-LABEL: shuffle_v2f64_1z:
814 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
815 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
817 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
818 ret <2 x double> %shuffle
821 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
822 ; SSE-LABEL: shuffle_v2f64_z0:
824 ; SSE-NEXT: xorpd %xmm1, %xmm1
825 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
826 ; SSE-NEXT: movapd %xmm1, %xmm0
829 ; AVX-LABEL: shuffle_v2f64_z0:
831 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
832 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
834 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
835 ret <2 x double> %shuffle
838 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
839 ; SSE2-LABEL: shuffle_v2f64_z1:
841 ; SSE2-NEXT: xorpd %xmm1, %xmm1
842 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
843 ; SSE2-NEXT: movapd %xmm1, %xmm0
846 ; SSE3-LABEL: shuffle_v2f64_z1:
848 ; SSE3-NEXT: xorpd %xmm1, %xmm1
849 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
850 ; SSE3-NEXT: movapd %xmm1, %xmm0
853 ; SSSE3-LABEL: shuffle_v2f64_z1:
855 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
856 ; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
857 ; SSSE3-NEXT: movapd %xmm1, %xmm0
860 ; SSE41-LABEL: shuffle_v2f64_z1:
862 ; SSE41-NEXT: xorpd %xmm1, %xmm1
863 ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
864 ; SSE41-NEXT: movapd %xmm1, %xmm0
867 ; AVX-LABEL: shuffle_v2f64_z1:
869 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
870 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
872 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
873 ret <2 x double> %shuffle
876 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
877 ; SSE-LABEL: insert_reg_and_zero_v2i64:
879 ; SSE-NEXT: movd %rdi, %xmm0
882 ; AVX-LABEL: insert_reg_and_zero_v2i64:
884 ; AVX-NEXT: vmovq %rdi, %xmm0
886 %v = insertelement <2 x i64> undef, i64 %a, i32 0
887 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
888 ret <2 x i64> %shuffle
891 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
892 ; SSE-LABEL: insert_mem_and_zero_v2i64:
894 ; SSE-NEXT: movq (%rdi), %xmm0
897 ; AVX-LABEL: insert_mem_and_zero_v2i64:
899 ; AVX-NEXT: vmovq (%rdi), %xmm0
902 %v = insertelement <2 x i64> undef, i64 %a, i32 0
903 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
904 ret <2 x i64> %shuffle
907 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
908 ; SSE-LABEL: insert_reg_and_zero_v2f64:
910 ; SSE-NEXT: movq %xmm0, %xmm0
913 ; AVX-LABEL: insert_reg_and_zero_v2f64:
915 ; AVX-NEXT: vmovq %xmm0, %xmm0
917 %v = insertelement <2 x double> undef, double %a, i32 0
918 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
919 ret <2 x double> %shuffle
922 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
923 ; SSE-LABEL: insert_mem_and_zero_v2f64:
925 ; SSE-NEXT: movsd (%rdi), %xmm0
928 ; AVX-LABEL: insert_mem_and_zero_v2f64:
930 ; AVX-NEXT: vmovsd (%rdi), %xmm0
932 %a = load double* %ptr
933 %v = insertelement <2 x double> undef, double %a, i32 0
934 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
935 ret <2 x double> %shuffle
938 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
939 ; SSE2-LABEL: insert_reg_lo_v2i64:
941 ; SSE2-NEXT: movd %rdi, %xmm1
942 ; SSE2-NEXT: movsd %xmm1, %xmm0
945 ; SSE3-LABEL: insert_reg_lo_v2i64:
947 ; SSE3-NEXT: movd %rdi, %xmm1
948 ; SSE3-NEXT: movsd %xmm1, %xmm0
951 ; SSSE3-LABEL: insert_reg_lo_v2i64:
953 ; SSSE3-NEXT: movd %rdi, %xmm1
954 ; SSSE3-NEXT: movsd %xmm1, %xmm0
957 ; SSE41-LABEL: insert_reg_lo_v2i64:
959 ; SSE41-NEXT: movd %rdi, %xmm1
960 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
961 ; SSE41-NEXT: movdqa %xmm1, %xmm0
964 ; AVX1-LABEL: insert_reg_lo_v2i64:
966 ; AVX1-NEXT: vmovq %rdi, %xmm1
967 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
970 ; AVX2-LABEL: insert_reg_lo_v2i64:
972 ; AVX2-NEXT: vmovq %rdi, %xmm1
973 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
975 %v = insertelement <2 x i64> undef, i64 %a, i32 0
976 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
977 ret <2 x i64> %shuffle
980 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
981 ; SSE2-LABEL: insert_mem_lo_v2i64:
983 ; SSE2-NEXT: movlpd (%rdi), %xmm0
986 ; SSE3-LABEL: insert_mem_lo_v2i64:
988 ; SSE3-NEXT: movlpd (%rdi), %xmm0
991 ; SSSE3-LABEL: insert_mem_lo_v2i64:
993 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
996 ; SSE41-LABEL: insert_mem_lo_v2i64:
998 ; SSE41-NEXT: movq (%rdi), %xmm1
999 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1000 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1003 ; AVX1-LABEL: insert_mem_lo_v2i64:
1005 ; AVX1-NEXT: vmovq (%rdi), %xmm1
1006 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1009 ; AVX2-LABEL: insert_mem_lo_v2i64:
1011 ; AVX2-NEXT: vmovq (%rdi), %xmm1
1012 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1015 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1016 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1017 ret <2 x i64> %shuffle
1020 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1021 ; SSE-LABEL: insert_reg_hi_v2i64:
1023 ; SSE-NEXT: movd %rdi, %xmm1
1024 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1027 ; AVX-LABEL: insert_reg_hi_v2i64:
1029 ; AVX-NEXT: vmovq %rdi, %xmm1
1030 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1032 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1033 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1034 ret <2 x i64> %shuffle
1037 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1038 ; SSE-LABEL: insert_mem_hi_v2i64:
1040 ; SSE-NEXT: movq (%rdi), %xmm1
1041 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1044 ; AVX-LABEL: insert_mem_hi_v2i64:
1046 ; AVX-NEXT: vmovq (%rdi), %xmm1
1047 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1050 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1051 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1052 ret <2 x i64> %shuffle
1055 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1056 ; SSE-LABEL: insert_reg_lo_v2f64:
1058 ; SSE-NEXT: movsd %xmm0, %xmm1
1059 ; SSE-NEXT: movaps %xmm1, %xmm0
1062 ; AVX-LABEL: insert_reg_lo_v2f64:
1064 ; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1066 %v = insertelement <2 x double> undef, double %a, i32 0
1067 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1068 ret <2 x double> %shuffle
1071 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1072 ; SSE-LABEL: insert_mem_lo_v2f64:
1074 ; SSE-NEXT: movlpd (%rdi), %xmm0
1077 ; AVX-LABEL: insert_mem_lo_v2f64:
1079 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1081 %a = load double* %ptr
1082 %v = insertelement <2 x double> undef, double %a, i32 0
1083 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1084 ret <2 x double> %shuffle
1087 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1088 ; SSE-LABEL: insert_reg_hi_v2f64:
1090 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1091 ; SSE-NEXT: movapd %xmm1, %xmm0
1094 ; AVX-LABEL: insert_reg_hi_v2f64:
1096 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1098 %v = insertelement <2 x double> undef, double %a, i32 0
1099 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1100 ret <2 x double> %shuffle
1103 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1104 ; SSE-LABEL: insert_mem_hi_v2f64:
1106 ; SSE-NEXT: movhpd (%rdi), %xmm0
1109 ; AVX-LABEL: insert_mem_hi_v2f64:
1111 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1113 %a = load double* %ptr
1114 %v = insertelement <2 x double> undef, double %a, i32 0
1115 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1116 ret <2 x double> %shuffle
1119 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1120 ; FIXME: We should match movddup for SSE3 and higher here.
1122 ; SSE2-LABEL: insert_dup_reg_v2f64:
1124 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1127 ; SSE3-LABEL: insert_dup_reg_v2f64:
1129 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1132 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1134 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1137 ; SSE41-LABEL: insert_dup_reg_v2f64:
1139 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1142 ; AVX-LABEL: insert_dup_reg_v2f64:
1144 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1146 %v = insertelement <2 x double> undef, double %a, i32 0
1147 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1148 ret <2 x double> %shuffle
1150 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1151 ; SSE2-LABEL: insert_dup_mem_v2f64:
1153 ; SSE2-NEXT: movsd (%rdi), %xmm0
1154 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1157 ; SSE3-LABEL: insert_dup_mem_v2f64:
1159 ; SSE3-NEXT: movddup (%rdi), %xmm0
1162 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1164 ; SSSE3-NEXT: movddup (%rdi), %xmm0
1167 ; SSE41-LABEL: insert_dup_mem_v2f64:
1169 ; SSE41-NEXT: movddup (%rdi), %xmm0
1172 ; AVX-LABEL: insert_dup_mem_v2f64:
1174 ; AVX-NEXT: vmovddup (%rdi), %xmm0
1176 %a = load double* %ptr
1177 %v = insertelement <2 x double> undef, double %a, i32 0
1178 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1179 ret <2 x double> %shuffle
1182 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1183 ; SSE-LABEL: shuffle_mem_v2f64_10:
1185 ; SSE-NEXT: movapd (%rdi), %xmm0
1186 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1189 ; AVX-LABEL: shuffle_mem_v2f64_10:
1191 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1193 %a = load <2 x double>* %ptr
1194 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1195 ret <2 x double> %shuffle