1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9 target triple = "x86_64-unknown-unknown"
11 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
12 ; SSE-LABEL: shuffle_v2i64_00:
14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
17 ; AVX1-LABEL: shuffle_v2i64_00:
19 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
22 ; AVX2-LABEL: shuffle_v2i64_00:
24 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
26 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
27 ret <2 x i64> %shuffle
29 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
30 ; SSE-LABEL: shuffle_v2i64_10:
32 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
35 ; AVX-LABEL: shuffle_v2i64_10:
37 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
39 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
40 ret <2 x i64> %shuffle
42 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
43 ; SSE-LABEL: shuffle_v2i64_11:
45 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
48 ; AVX-LABEL: shuffle_v2i64_11:
50 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
52 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
53 ret <2 x i64> %shuffle
55 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
56 ; SSE-LABEL: shuffle_v2i64_22:
58 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
61 ; AVX1-LABEL: shuffle_v2i64_22:
63 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
66 ; AVX2-LABEL: shuffle_v2i64_22:
68 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
70 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
71 ret <2 x i64> %shuffle
73 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
74 ; SSE-LABEL: shuffle_v2i64_32:
76 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
79 ; AVX-LABEL: shuffle_v2i64_32:
81 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
83 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
84 ret <2 x i64> %shuffle
86 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
87 ; SSE-LABEL: shuffle_v2i64_33:
89 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
92 ; AVX-LABEL: shuffle_v2i64_33:
94 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
96 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
97 ret <2 x i64> %shuffle
100 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
101 ; SSE2-LABEL: shuffle_v2f64_00:
103 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
106 ; SSE3-LABEL: shuffle_v2f64_00:
108 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
111 ; SSSE3-LABEL: shuffle_v2f64_00:
113 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
116 ; SSE41-LABEL: shuffle_v2f64_00:
118 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
121 ; AVX-LABEL: shuffle_v2f64_00:
123 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
125 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
126 ret <2 x double> %shuffle
128 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
129 ; SSE-LABEL: shuffle_v2f64_10:
131 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
134 ; AVX-LABEL: shuffle_v2f64_10:
136 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
138 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
139 ret <2 x double> %shuffle
141 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
142 ; SSE-LABEL: shuffle_v2f64_11:
144 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
147 ; AVX-LABEL: shuffle_v2f64_11:
149 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
152 ret <2 x double> %shuffle
154 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
155 ; SSE2-LABEL: shuffle_v2f64_22:
157 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
158 ; SSE2-NEXT: movaps %xmm1, %xmm0
161 ; SSE3-LABEL: shuffle_v2f64_22:
163 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
164 ; SSE3-NEXT: movapd %xmm1, %xmm0
167 ; SSSE3-LABEL: shuffle_v2f64_22:
169 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
170 ; SSSE3-NEXT: movapd %xmm1, %xmm0
173 ; SSE41-LABEL: shuffle_v2f64_22:
175 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
176 ; SSE41-NEXT: movapd %xmm1, %xmm0
179 ; AVX-LABEL: shuffle_v2f64_22:
181 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
183 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
184 ret <2 x double> %shuffle
186 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
187 ; SSE-LABEL: shuffle_v2f64_32:
189 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
190 ; SSE-NEXT: movapd %xmm1, %xmm0
193 ; AVX-LABEL: shuffle_v2f64_32:
195 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
197 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
198 ret <2 x double> %shuffle
200 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
201 ; SSE-LABEL: shuffle_v2f64_33:
203 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
204 ; SSE-NEXT: movaps %xmm1, %xmm0
207 ; AVX-LABEL: shuffle_v2f64_33:
209 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
211 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
212 ret <2 x double> %shuffle
214 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
215 ; SSE2-LABEL: shuffle_v2f64_03:
217 ; SSE2-NEXT: movsd %xmm0, %xmm1
218 ; SSE2-NEXT: movaps %xmm1, %xmm0
221 ; SSE3-LABEL: shuffle_v2f64_03:
223 ; SSE3-NEXT: movsd %xmm0, %xmm1
224 ; SSE3-NEXT: movaps %xmm1, %xmm0
227 ; SSSE3-LABEL: shuffle_v2f64_03:
229 ; SSSE3-NEXT: movsd %xmm0, %xmm1
230 ; SSSE3-NEXT: movaps %xmm1, %xmm0
233 ; SSE41-LABEL: shuffle_v2f64_03:
235 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
238 ; AVX-LABEL: shuffle_v2f64_03:
240 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
242 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
243 ret <2 x double> %shuffle
245 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
246 ; SSE2-LABEL: shuffle_v2f64_21:
248 ; SSE2-NEXT: movsd %xmm1, %xmm0
251 ; SSE3-LABEL: shuffle_v2f64_21:
253 ; SSE3-NEXT: movsd %xmm1, %xmm0
256 ; SSSE3-LABEL: shuffle_v2f64_21:
258 ; SSSE3-NEXT: movsd %xmm1, %xmm0
261 ; SSE41-LABEL: shuffle_v2f64_21:
263 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
266 ; AVX-LABEL: shuffle_v2f64_21:
268 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
270 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
271 ret <2 x double> %shuffle
275 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
276 ; SSE-LABEL: shuffle_v2i64_02:
278 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
281 ; AVX-LABEL: shuffle_v2i64_02:
283 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
285 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
286 ret <2 x i64> %shuffle
288 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
289 ; SSE-LABEL: shuffle_v2i64_02_copy:
291 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
292 ; SSE-NEXT: movdqa %xmm1, %xmm0
295 ; AVX-LABEL: shuffle_v2i64_02_copy:
297 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
299 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
300 ret <2 x i64> %shuffle
302 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
303 ; SSE2-LABEL: shuffle_v2i64_03:
305 ; SSE2-NEXT: movsd %xmm0, %xmm1
306 ; SSE2-NEXT: movaps %xmm1, %xmm0
309 ; SSE3-LABEL: shuffle_v2i64_03:
311 ; SSE3-NEXT: movsd %xmm0, %xmm1
312 ; SSE3-NEXT: movaps %xmm1, %xmm0
315 ; SSSE3-LABEL: shuffle_v2i64_03:
317 ; SSSE3-NEXT: movsd %xmm0, %xmm1
318 ; SSSE3-NEXT: movaps %xmm1, %xmm0
321 ; SSE41-LABEL: shuffle_v2i64_03:
323 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
326 ; AVX1-LABEL: shuffle_v2i64_03:
328 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
331 ; AVX2-LABEL: shuffle_v2i64_03:
333 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
335 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
336 ret <2 x i64> %shuffle
338 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
339 ; SSE2-LABEL: shuffle_v2i64_03_copy:
341 ; SSE2-NEXT: movsd %xmm1, %xmm2
342 ; SSE2-NEXT: movaps %xmm2, %xmm0
345 ; SSE3-LABEL: shuffle_v2i64_03_copy:
347 ; SSE3-NEXT: movsd %xmm1, %xmm2
348 ; SSE3-NEXT: movaps %xmm2, %xmm0
351 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
353 ; SSSE3-NEXT: movsd %xmm1, %xmm2
354 ; SSSE3-NEXT: movaps %xmm2, %xmm0
357 ; SSE41-LABEL: shuffle_v2i64_03_copy:
359 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
360 ; SSE41-NEXT: movdqa %xmm1, %xmm0
363 ; AVX1-LABEL: shuffle_v2i64_03_copy:
365 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
368 ; AVX2-LABEL: shuffle_v2i64_03_copy:
370 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
372 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
373 ret <2 x i64> %shuffle
375 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
376 ; SSE2-LABEL: shuffle_v2i64_12:
378 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
381 ; SSE3-LABEL: shuffle_v2i64_12:
383 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
386 ; SSSE3-LABEL: shuffle_v2i64_12:
388 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
389 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
392 ; SSE41-LABEL: shuffle_v2i64_12:
394 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
395 ; SSE41-NEXT: movdqa %xmm1, %xmm0
398 ; AVX-LABEL: shuffle_v2i64_12:
400 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
402 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
403 ret <2 x i64> %shuffle
405 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
406 ; SSE2-LABEL: shuffle_v2i64_12_copy:
408 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
409 ; SSE2-NEXT: movapd %xmm1, %xmm0
412 ; SSE3-LABEL: shuffle_v2i64_12_copy:
414 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
415 ; SSE3-NEXT: movapd %xmm1, %xmm0
418 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
420 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
421 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
424 ; SSE41-LABEL: shuffle_v2i64_12_copy:
426 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
427 ; SSE41-NEXT: movdqa %xmm2, %xmm0
430 ; AVX-LABEL: shuffle_v2i64_12_copy:
432 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
434 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
435 ret <2 x i64> %shuffle
437 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
438 ; SSE-LABEL: shuffle_v2i64_13:
440 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
443 ; AVX-LABEL: shuffle_v2i64_13:
445 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
447 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
448 ret <2 x i64> %shuffle
450 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
451 ; SSE-LABEL: shuffle_v2i64_13_copy:
453 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
454 ; SSE-NEXT: movdqa %xmm1, %xmm0
457 ; AVX-LABEL: shuffle_v2i64_13_copy:
459 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
461 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
462 ret <2 x i64> %shuffle
464 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
465 ; SSE-LABEL: shuffle_v2i64_20:
467 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
468 ; SSE-NEXT: movdqa %xmm1, %xmm0
471 ; AVX-LABEL: shuffle_v2i64_20:
473 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
475 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
476 ret <2 x i64> %shuffle
478 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
479 ; SSE-LABEL: shuffle_v2i64_20_copy:
481 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
482 ; SSE-NEXT: movdqa %xmm2, %xmm0
485 ; AVX-LABEL: shuffle_v2i64_20_copy:
487 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
489 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
490 ret <2 x i64> %shuffle
492 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
493 ; SSE2-LABEL: shuffle_v2i64_21:
495 ; SSE2-NEXT: movsd %xmm1, %xmm0
498 ; SSE3-LABEL: shuffle_v2i64_21:
500 ; SSE3-NEXT: movsd %xmm1, %xmm0
503 ; SSSE3-LABEL: shuffle_v2i64_21:
505 ; SSSE3-NEXT: movsd %xmm1, %xmm0
508 ; SSE41-LABEL: shuffle_v2i64_21:
510 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
513 ; AVX1-LABEL: shuffle_v2i64_21:
515 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
518 ; AVX2-LABEL: shuffle_v2i64_21:
520 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
522 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
523 ret <2 x i64> %shuffle
525 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
526 ; SSE2-LABEL: shuffle_v2i64_21_copy:
528 ; SSE2-NEXT: movsd %xmm2, %xmm1
529 ; SSE2-NEXT: movaps %xmm1, %xmm0
532 ; SSE3-LABEL: shuffle_v2i64_21_copy:
534 ; SSE3-NEXT: movsd %xmm2, %xmm1
535 ; SSE3-NEXT: movaps %xmm1, %xmm0
538 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
540 ; SSSE3-NEXT: movsd %xmm2, %xmm1
541 ; SSSE3-NEXT: movaps %xmm1, %xmm0
544 ; SSE41-LABEL: shuffle_v2i64_21_copy:
546 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
547 ; SSE41-NEXT: movdqa %xmm1, %xmm0
550 ; AVX1-LABEL: shuffle_v2i64_21_copy:
552 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
555 ; AVX2-LABEL: shuffle_v2i64_21_copy:
557 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
559 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
560 ret <2 x i64> %shuffle
562 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
563 ; SSE2-LABEL: shuffle_v2i64_30:
565 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
566 ; SSE2-NEXT: movapd %xmm1, %xmm0
569 ; SSE3-LABEL: shuffle_v2i64_30:
571 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
572 ; SSE3-NEXT: movapd %xmm1, %xmm0
575 ; SSSE3-LABEL: shuffle_v2i64_30:
577 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
580 ; SSE41-LABEL: shuffle_v2i64_30:
582 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
585 ; AVX-LABEL: shuffle_v2i64_30:
587 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
589 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
590 ret <2 x i64> %shuffle
592 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
593 ; SSE2-LABEL: shuffle_v2i64_30_copy:
595 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
596 ; SSE2-NEXT: movapd %xmm2, %xmm0
599 ; SSE3-LABEL: shuffle_v2i64_30_copy:
601 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
602 ; SSE3-NEXT: movapd %xmm2, %xmm0
605 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
607 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
608 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
611 ; SSE41-LABEL: shuffle_v2i64_30_copy:
613 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
614 ; SSE41-NEXT: movdqa %xmm1, %xmm0
617 ; AVX-LABEL: shuffle_v2i64_30_copy:
619 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
621 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
622 ret <2 x i64> %shuffle
624 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
625 ; SSE-LABEL: shuffle_v2i64_31:
627 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
628 ; SSE-NEXT: movdqa %xmm1, %xmm0
631 ; AVX-LABEL: shuffle_v2i64_31:
633 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
635 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
636 ret <2 x i64> %shuffle
638 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
639 ; SSE-LABEL: shuffle_v2i64_31_copy:
641 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
642 ; SSE-NEXT: movdqa %xmm2, %xmm0
645 ; AVX-LABEL: shuffle_v2i64_31_copy:
647 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
649 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
650 ret <2 x i64> %shuffle
653 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
654 ; SSE-LABEL: shuffle_v2i64_0z:
656 ; SSE-NEXT: movq %xmm0, %xmm0
659 ; AVX-LABEL: shuffle_v2i64_0z:
661 ; AVX-NEXT: vmovq %xmm0, %xmm0
663 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
664 ret <2 x i64> %shuffle
667 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
668 ; SSE-LABEL: shuffle_v2i64_1z:
670 ; SSE-NEXT: pxor %xmm1, %xmm1
671 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
674 ; AVX-LABEL: shuffle_v2i64_1z:
676 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
677 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
679 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
680 ret <2 x i64> %shuffle
683 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
684 ; SSE-LABEL: shuffle_v2i64_z0:
686 ; SSE-NEXT: movq %xmm0, %xmm0
687 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
690 ; AVX-LABEL: shuffle_v2i64_z0:
692 ; AVX-NEXT: vmovq %xmm0, %xmm0
693 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
695 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
696 ret <2 x i64> %shuffle
699 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
700 ; SSE2-LABEL: shuffle_v2i64_z1:
702 ; SSE2-NEXT: xorps %xmm1, %xmm1
703 ; SSE2-NEXT: movsd %xmm1, %xmm0
706 ; SSE3-LABEL: shuffle_v2i64_z1:
708 ; SSE3-NEXT: xorps %xmm1, %xmm1
709 ; SSE3-NEXT: movsd %xmm1, %xmm0
712 ; SSSE3-LABEL: shuffle_v2i64_z1:
714 ; SSSE3-NEXT: xorps %xmm1, %xmm1
715 ; SSSE3-NEXT: movsd %xmm1, %xmm0
718 ; SSE41-LABEL: shuffle_v2i64_z1:
720 ; SSE41-NEXT: pxor %xmm1, %xmm1
721 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
722 ; SSE41-NEXT: movdqa %xmm1, %xmm0
725 ; AVX1-LABEL: shuffle_v2i64_z1:
727 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
728 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
731 ; AVX2-LABEL: shuffle_v2i64_z1:
733 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
734 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
736 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
737 ret <2 x i64> %shuffle
740 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
741 ; SSE-LABEL: shuffle_v2f64_0z:
743 ; SSE-NEXT: movq %xmm0, %xmm0
746 ; AVX-LABEL: shuffle_v2f64_0z:
748 ; AVX-NEXT: vmovq %xmm0, %xmm0
750 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
751 ret <2 x double> %shuffle
754 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
755 ; SSE-LABEL: shuffle_v2f64_1z:
757 ; SSE-NEXT: xorpd %xmm1, %xmm1
758 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
761 ; AVX-LABEL: shuffle_v2f64_1z:
763 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
764 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
766 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
767 ret <2 x double> %shuffle
770 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
771 ; SSE-LABEL: shuffle_v2f64_z0:
773 ; SSE-NEXT: xorpd %xmm1, %xmm1
774 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
775 ; SSE-NEXT: movapd %xmm1, %xmm0
778 ; AVX-LABEL: shuffle_v2f64_z0:
780 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
781 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
783 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
784 ret <2 x double> %shuffle
787 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
788 ; SSE2-LABEL: shuffle_v2f64_z1:
790 ; SSE2-NEXT: xorps %xmm1, %xmm1
791 ; SSE2-NEXT: movsd %xmm1, %xmm0
794 ; SSE3-LABEL: shuffle_v2f64_z1:
796 ; SSE3-NEXT: xorps %xmm1, %xmm1
797 ; SSE3-NEXT: movsd %xmm1, %xmm0
800 ; SSSE3-LABEL: shuffle_v2f64_z1:
802 ; SSSE3-NEXT: xorps %xmm1, %xmm1
803 ; SSSE3-NEXT: movsd %xmm1, %xmm0
806 ; SSE41-LABEL: shuffle_v2f64_z1:
808 ; SSE41-NEXT: xorpd %xmm1, %xmm1
809 ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
810 ; SSE41-NEXT: movapd %xmm1, %xmm0
813 ; AVX-LABEL: shuffle_v2f64_z1:
815 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
816 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
818 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
819 ret <2 x double> %shuffle
822 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
823 ; SSE-LABEL: insert_reg_and_zero_v2i64:
825 ; SSE-NEXT: movd %rdi, %xmm0
828 ; AVX-LABEL: insert_reg_and_zero_v2i64:
830 ; AVX-NEXT: vmovq %rdi, %xmm0
832 %v = insertelement <2 x i64> undef, i64 %a, i32 0
833 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
834 ret <2 x i64> %shuffle
837 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
838 ; SSE-LABEL: insert_mem_and_zero_v2i64:
840 ; SSE-NEXT: movq (%rdi), %xmm0
843 ; AVX-LABEL: insert_mem_and_zero_v2i64:
845 ; AVX-NEXT: vmovq (%rdi), %xmm0
848 %v = insertelement <2 x i64> undef, i64 %a, i32 0
849 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
850 ret <2 x i64> %shuffle
853 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
854 ; SSE-LABEL: insert_reg_and_zero_v2f64:
856 ; SSE-NEXT: movq %xmm0, %xmm0
859 ; AVX-LABEL: insert_reg_and_zero_v2f64:
861 ; AVX-NEXT: vmovq %xmm0, %xmm0
863 %v = insertelement <2 x double> undef, double %a, i32 0
864 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
865 ret <2 x double> %shuffle
868 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
869 ; SSE-LABEL: insert_mem_and_zero_v2f64:
871 ; SSE-NEXT: movsd (%rdi), %xmm0
874 ; AVX-LABEL: insert_mem_and_zero_v2f64:
876 ; AVX-NEXT: vmovsd (%rdi), %xmm0
878 %a = load double* %ptr
879 %v = insertelement <2 x double> undef, double %a, i32 0
880 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
881 ret <2 x double> %shuffle
884 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
885 ; SSE2-LABEL: insert_reg_lo_v2i64:
887 ; SSE2-NEXT: movd %rdi, %xmm1
888 ; SSE2-NEXT: movsd %xmm1, %xmm0
891 ; SSE3-LABEL: insert_reg_lo_v2i64:
893 ; SSE3-NEXT: movd %rdi, %xmm1
894 ; SSE3-NEXT: movsd %xmm1, %xmm0
897 ; SSSE3-LABEL: insert_reg_lo_v2i64:
899 ; SSSE3-NEXT: movd %rdi, %xmm1
900 ; SSSE3-NEXT: movsd %xmm1, %xmm0
903 ; SSE41-LABEL: insert_reg_lo_v2i64:
905 ; SSE41-NEXT: movd %rdi, %xmm1
906 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
907 ; SSE41-NEXT: movdqa %xmm1, %xmm0
910 ; AVX1-LABEL: insert_reg_lo_v2i64:
912 ; AVX1-NEXT: vmovq %rdi, %xmm1
913 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
916 ; AVX2-LABEL: insert_reg_lo_v2i64:
918 ; AVX2-NEXT: vmovq %rdi, %xmm1
919 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
921 %v = insertelement <2 x i64> undef, i64 %a, i32 0
922 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
923 ret <2 x i64> %shuffle
926 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
927 ; SSE2-LABEL: insert_mem_lo_v2i64:
929 ; SSE2-NEXT: movlpd (%rdi), %xmm0
932 ; SSE3-LABEL: insert_mem_lo_v2i64:
934 ; SSE3-NEXT: movlpd (%rdi), %xmm0
937 ; SSSE3-LABEL: insert_mem_lo_v2i64:
939 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
942 ; SSE41-LABEL: insert_mem_lo_v2i64:
944 ; SSE41-NEXT: movq (%rdi), %xmm1
945 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
946 ; SSE41-NEXT: movdqa %xmm1, %xmm0
949 ; AVX1-LABEL: insert_mem_lo_v2i64:
951 ; AVX1-NEXT: vmovq (%rdi), %xmm1
952 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
955 ; AVX2-LABEL: insert_mem_lo_v2i64:
957 ; AVX2-NEXT: vmovq (%rdi), %xmm1
958 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
961 %v = insertelement <2 x i64> undef, i64 %a, i32 0
962 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
963 ret <2 x i64> %shuffle
966 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
967 ; SSE-LABEL: insert_reg_hi_v2i64:
969 ; SSE-NEXT: movd %rdi, %xmm1
970 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
973 ; AVX-LABEL: insert_reg_hi_v2i64:
975 ; AVX-NEXT: vmovq %rdi, %xmm1
976 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
978 %v = insertelement <2 x i64> undef, i64 %a, i32 0
979 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
980 ret <2 x i64> %shuffle
983 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
984 ; SSE-LABEL: insert_mem_hi_v2i64:
986 ; SSE-NEXT: movq (%rdi), %xmm1
987 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
990 ; AVX-LABEL: insert_mem_hi_v2i64:
992 ; AVX-NEXT: vmovq (%rdi), %xmm1
993 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
996 %v = insertelement <2 x i64> undef, i64 %a, i32 0
997 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
998 ret <2 x i64> %shuffle
1001 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1002 ; SSE-LABEL: insert_reg_lo_v2f64:
1004 ; SSE-NEXT: movsd %xmm0, %xmm1
1005 ; SSE-NEXT: movaps %xmm1, %xmm0
1008 ; AVX-LABEL: insert_reg_lo_v2f64:
1010 ; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1012 %v = insertelement <2 x double> undef, double %a, i32 0
1013 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1014 ret <2 x double> %shuffle
1017 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1018 ; SSE-LABEL: insert_mem_lo_v2f64:
1020 ; SSE-NEXT: movlpd (%rdi), %xmm0
1023 ; AVX-LABEL: insert_mem_lo_v2f64:
1025 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1027 %a = load double* %ptr
1028 %v = insertelement <2 x double> undef, double %a, i32 0
1029 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1030 ret <2 x double> %shuffle
1033 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1034 ; SSE-LABEL: insert_reg_hi_v2f64:
1036 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1037 ; SSE-NEXT: movapd %xmm1, %xmm0
1040 ; AVX-LABEL: insert_reg_hi_v2f64:
1042 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1044 %v = insertelement <2 x double> undef, double %a, i32 0
1045 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1046 ret <2 x double> %shuffle
1049 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1050 ; SSE-LABEL: insert_mem_hi_v2f64:
1052 ; SSE-NEXT: movhpd (%rdi), %xmm0
1055 ; AVX-LABEL: insert_mem_hi_v2f64:
1057 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1059 %a = load double* %ptr
1060 %v = insertelement <2 x double> undef, double %a, i32 0
1061 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1062 ret <2 x double> %shuffle
1065 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1066 ; FIXME: We should match movddup for SSE3 and higher here.
1068 ; SSE2-LABEL: insert_dup_reg_v2f64:
1070 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1073 ; SSE3-LABEL: insert_dup_reg_v2f64:
1075 ; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1078 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1080 ; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1083 ; SSE41-LABEL: insert_dup_reg_v2f64:
1085 ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1088 ; AVX-LABEL: insert_dup_reg_v2f64:
1090 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
1092 %v = insertelement <2 x double> undef, double %a, i32 0
1093 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1094 ret <2 x double> %shuffle
1096 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1097 ; SSE2-LABEL: insert_dup_mem_v2f64:
1099 ; SSE2-NEXT: movsd (%rdi), %xmm0
1100 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1103 ; SSE3-LABEL: insert_dup_mem_v2f64:
1105 ; SSE3-NEXT: movddup (%rdi), %xmm0
1108 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1110 ; SSSE3-NEXT: movddup (%rdi), %xmm0
1113 ; SSE41-LABEL: insert_dup_mem_v2f64:
1115 ; SSE41-NEXT: movddup (%rdi), %xmm0
1118 ; AVX-LABEL: insert_dup_mem_v2f64:
1120 ; AVX-NEXT: vmovddup (%rdi), %xmm0
1122 %a = load double* %ptr
1123 %v = insertelement <2 x double> undef, double %a, i32 0
1124 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1125 ret <2 x double> %shuffle
1128 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1129 ; SSE-LABEL: shuffle_mem_v2f64_10:
1131 ; SSE-NEXT: movapd (%rdi), %xmm0
1132 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1135 ; AVX-LABEL: shuffle_mem_v2f64_10:
1137 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1139 %a = load <2 x double>* %ptr
1140 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1141 ret <2 x double> %shuffle