1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-unknown-unknown"
12 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
13 ; SSE-LABEL: shuffle_v2i64_00:
15 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
18 ; AVX1-LABEL: shuffle_v2i64_00:
20 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
23 ; AVX2-LABEL: shuffle_v2i64_00:
25 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
27 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
28 ret <2 x i64> %shuffle
30 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
31 ; SSE-LABEL: shuffle_v2i64_10:
33 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
36 ; AVX-LABEL: shuffle_v2i64_10:
38 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
40 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
41 ret <2 x i64> %shuffle
43 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
44 ; SSE-LABEL: shuffle_v2i64_11:
46 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
49 ; AVX-LABEL: shuffle_v2i64_11:
51 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
53 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
54 ret <2 x i64> %shuffle
56 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
57 ; SSE-LABEL: shuffle_v2i64_22:
59 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
62 ; AVX1-LABEL: shuffle_v2i64_22:
64 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
67 ; AVX2-LABEL: shuffle_v2i64_22:
69 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
71 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
72 ret <2 x i64> %shuffle
74 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
75 ; SSE-LABEL: shuffle_v2i64_32:
77 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
80 ; AVX-LABEL: shuffle_v2i64_32:
82 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
84 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
85 ret <2 x i64> %shuffle
87 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
88 ; SSE-LABEL: shuffle_v2i64_33:
90 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
93 ; AVX-LABEL: shuffle_v2i64_33:
95 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
97 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
98 ret <2 x i64> %shuffle
101 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
102 ; SSE2-LABEL: shuffle_v2f64_00:
104 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
107 ; SSE3-LABEL: shuffle_v2f64_00:
109 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
112 ; SSSE3-LABEL: shuffle_v2f64_00:
114 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
117 ; SSE41-LABEL: shuffle_v2f64_00:
119 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
122 ; AVX-LABEL: shuffle_v2f64_00:
124 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
126 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
127 ret <2 x double> %shuffle
129 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
130 ; SSE-LABEL: shuffle_v2f64_10:
132 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
135 ; AVX-LABEL: shuffle_v2f64_10:
137 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
140 ; AVX512VL-LABEL: shuffle_v2f64_10:
142 ; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0
143 ; AVX512VL-NEXT: retq
144 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
145 ret <2 x double> %shuffle
147 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
148 ; SSE-LABEL: shuffle_v2f64_11:
150 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
153 ; AVX-LABEL: shuffle_v2f64_11:
155 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
157 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
158 ret <2 x double> %shuffle
160 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
161 ; SSE2-LABEL: shuffle_v2f64_22:
163 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
164 ; SSE2-NEXT: movaps %xmm1, %xmm0
167 ; SSE3-LABEL: shuffle_v2f64_22:
169 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
172 ; SSSE3-LABEL: shuffle_v2f64_22:
174 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
177 ; SSE41-LABEL: shuffle_v2f64_22:
179 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
182 ; AVX-LABEL: shuffle_v2f64_22:
184 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
186 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
187 ret <2 x double> %shuffle
189 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
190 ; SSE-LABEL: shuffle_v2f64_32:
192 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
193 ; SSE-NEXT: movapd %xmm1, %xmm0
196 ; AVX-LABEL: shuffle_v2f64_32:
198 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
201 ; AVX512VL-LABEL: shuffle_v2f64_32:
203 ; AVX512VL-NEXT: vpermilpd $1, %xmm1, %xmm0
204 ; AVX512VL-NEXT: retq
205 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
206 ret <2 x double> %shuffle
208 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
209 ; SSE-LABEL: shuffle_v2f64_33:
211 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
212 ; SSE-NEXT: movaps %xmm1, %xmm0
215 ; AVX-LABEL: shuffle_v2f64_33:
217 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
219 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
220 ret <2 x double> %shuffle
222 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
223 ; SSE2-LABEL: shuffle_v2f64_03:
225 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
226 ; SSE2-NEXT: movapd %xmm1, %xmm0
229 ; SSE3-LABEL: shuffle_v2f64_03:
231 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
232 ; SSE3-NEXT: movapd %xmm1, %xmm0
235 ; SSSE3-LABEL: shuffle_v2f64_03:
237 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
238 ; SSSE3-NEXT: movapd %xmm1, %xmm0
241 ; SSE41-LABEL: shuffle_v2f64_03:
243 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
246 ; AVX-LABEL: shuffle_v2f64_03:
248 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
250 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
251 ret <2 x double> %shuffle
253 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
254 ; SSE2-LABEL: shuffle_v2f64_21:
256 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
259 ; SSE3-LABEL: shuffle_v2f64_21:
261 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
264 ; SSSE3-LABEL: shuffle_v2f64_21:
266 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
269 ; SSE41-LABEL: shuffle_v2f64_21:
271 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
274 ; AVX-LABEL: shuffle_v2f64_21:
276 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
278 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
279 ret <2 x double> %shuffle
283 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
284 ; SSE-LABEL: shuffle_v2i64_02:
286 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
289 ; AVX-LABEL: shuffle_v2i64_02:
291 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
293 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
294 ret <2 x i64> %shuffle
296 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
297 ; SSE-LABEL: shuffle_v2i64_02_copy:
299 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
300 ; SSE-NEXT: movdqa %xmm1, %xmm0
303 ; AVX-LABEL: shuffle_v2i64_02_copy:
305 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
307 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
308 ret <2 x i64> %shuffle
310 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
311 ; SSE2-LABEL: shuffle_v2i64_03:
313 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
314 ; SSE2-NEXT: movapd %xmm1, %xmm0
317 ; SSE3-LABEL: shuffle_v2i64_03:
319 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
320 ; SSE3-NEXT: movapd %xmm1, %xmm0
323 ; SSSE3-LABEL: shuffle_v2i64_03:
325 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
326 ; SSSE3-NEXT: movapd %xmm1, %xmm0
329 ; SSE41-LABEL: shuffle_v2i64_03:
331 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
334 ; AVX1-LABEL: shuffle_v2i64_03:
336 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
339 ; AVX2-LABEL: shuffle_v2i64_03:
341 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
343 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
344 ret <2 x i64> %shuffle
346 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
347 ; SSE2-LABEL: shuffle_v2i64_03_copy:
349 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
350 ; SSE2-NEXT: movapd %xmm2, %xmm0
353 ; SSE3-LABEL: shuffle_v2i64_03_copy:
355 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
356 ; SSE3-NEXT: movapd %xmm2, %xmm0
359 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
361 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
362 ; SSSE3-NEXT: movapd %xmm2, %xmm0
365 ; SSE41-LABEL: shuffle_v2i64_03_copy:
367 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
368 ; SSE41-NEXT: movdqa %xmm1, %xmm0
371 ; AVX1-LABEL: shuffle_v2i64_03_copy:
373 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
376 ; AVX2-LABEL: shuffle_v2i64_03_copy:
378 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
380 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
381 ret <2 x i64> %shuffle
383 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
384 ; SSE2-LABEL: shuffle_v2i64_12:
386 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
389 ; SSE3-LABEL: shuffle_v2i64_12:
391 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
394 ; SSSE3-LABEL: shuffle_v2i64_12:
396 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
397 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
400 ; SSE41-LABEL: shuffle_v2i64_12:
402 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
403 ; SSE41-NEXT: movdqa %xmm1, %xmm0
406 ; AVX-LABEL: shuffle_v2i64_12:
408 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
410 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
411 ret <2 x i64> %shuffle
413 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
414 ; SSE2-LABEL: shuffle_v2i64_12_copy:
416 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
417 ; SSE2-NEXT: movapd %xmm1, %xmm0
420 ; SSE3-LABEL: shuffle_v2i64_12_copy:
422 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
423 ; SSE3-NEXT: movapd %xmm1, %xmm0
426 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
428 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
429 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
432 ; SSE41-LABEL: shuffle_v2i64_12_copy:
434 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
435 ; SSE41-NEXT: movdqa %xmm2, %xmm0
438 ; AVX-LABEL: shuffle_v2i64_12_copy:
440 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
442 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
443 ret <2 x i64> %shuffle
445 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
446 ; SSE-LABEL: shuffle_v2i64_13:
448 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
451 ; AVX-LABEL: shuffle_v2i64_13:
453 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
455 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
456 ret <2 x i64> %shuffle
458 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
459 ; SSE-LABEL: shuffle_v2i64_13_copy:
461 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
462 ; SSE-NEXT: movdqa %xmm1, %xmm0
465 ; AVX-LABEL: shuffle_v2i64_13_copy:
467 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
469 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
470 ret <2 x i64> %shuffle
472 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
473 ; SSE-LABEL: shuffle_v2i64_20:
475 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
476 ; SSE-NEXT: movdqa %xmm1, %xmm0
479 ; AVX-LABEL: shuffle_v2i64_20:
481 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
483 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
484 ret <2 x i64> %shuffle
486 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
487 ; SSE-LABEL: shuffle_v2i64_20_copy:
489 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
490 ; SSE-NEXT: movdqa %xmm2, %xmm0
493 ; AVX-LABEL: shuffle_v2i64_20_copy:
495 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
497 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
498 ret <2 x i64> %shuffle
500 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
501 ; SSE2-LABEL: shuffle_v2i64_21:
503 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
506 ; SSE3-LABEL: shuffle_v2i64_21:
508 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
511 ; SSSE3-LABEL: shuffle_v2i64_21:
513 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
516 ; SSE41-LABEL: shuffle_v2i64_21:
518 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
521 ; AVX1-LABEL: shuffle_v2i64_21:
523 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
526 ; AVX2-LABEL: shuffle_v2i64_21:
528 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
530 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
531 ret <2 x i64> %shuffle
533 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
534 ; SSE2-LABEL: shuffle_v2i64_21_copy:
536 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
537 ; SSE2-NEXT: movapd %xmm1, %xmm0
540 ; SSE3-LABEL: shuffle_v2i64_21_copy:
542 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
543 ; SSE3-NEXT: movapd %xmm1, %xmm0
546 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
548 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
549 ; SSSE3-NEXT: movapd %xmm1, %xmm0
552 ; SSE41-LABEL: shuffle_v2i64_21_copy:
554 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
555 ; SSE41-NEXT: movdqa %xmm1, %xmm0
558 ; AVX1-LABEL: shuffle_v2i64_21_copy:
560 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
563 ; AVX2-LABEL: shuffle_v2i64_21_copy:
565 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
567 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
568 ret <2 x i64> %shuffle
570 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
571 ; SSE2-LABEL: shuffle_v2i64_30:
573 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
574 ; SSE2-NEXT: movapd %xmm1, %xmm0
577 ; SSE3-LABEL: shuffle_v2i64_30:
579 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
580 ; SSE3-NEXT: movapd %xmm1, %xmm0
583 ; SSSE3-LABEL: shuffle_v2i64_30:
585 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
588 ; SSE41-LABEL: shuffle_v2i64_30:
590 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
593 ; AVX-LABEL: shuffle_v2i64_30:
595 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
597 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
598 ret <2 x i64> %shuffle
600 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
601 ; SSE2-LABEL: shuffle_v2i64_30_copy:
603 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
604 ; SSE2-NEXT: movapd %xmm2, %xmm0
607 ; SSE3-LABEL: shuffle_v2i64_30_copy:
609 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
610 ; SSE3-NEXT: movapd %xmm2, %xmm0
613 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
615 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
616 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
619 ; SSE41-LABEL: shuffle_v2i64_30_copy:
621 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
622 ; SSE41-NEXT: movdqa %xmm1, %xmm0
625 ; AVX-LABEL: shuffle_v2i64_30_copy:
627 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
629 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
630 ret <2 x i64> %shuffle
632 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
633 ; SSE-LABEL: shuffle_v2i64_31:
635 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
636 ; SSE-NEXT: movdqa %xmm1, %xmm0
639 ; AVX-LABEL: shuffle_v2i64_31:
641 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
643 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
644 ret <2 x i64> %shuffle
646 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
647 ; SSE-LABEL: shuffle_v2i64_31_copy:
649 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
650 ; SSE-NEXT: movdqa %xmm2, %xmm0
653 ; AVX-LABEL: shuffle_v2i64_31_copy:
655 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
657 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
658 ret <2 x i64> %shuffle
661 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
662 ; SSE-LABEL: shuffle_v2i64_0z:
664 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
667 ; AVX-LABEL: shuffle_v2i64_0z:
669 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
671 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
672 ret <2 x i64> %shuffle
675 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
676 ; SSE-LABEL: shuffle_v2i64_1z:
678 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
681 ; AVX-LABEL: shuffle_v2i64_1z:
683 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
685 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
686 ret <2 x i64> %shuffle
689 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
690 ; SSE-LABEL: shuffle_v2i64_z0:
692 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
695 ; AVX-LABEL: shuffle_v2i64_z0:
697 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
699 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
700 ret <2 x i64> %shuffle
703 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
704 ; SSE2-LABEL: shuffle_v2i64_z1:
706 ; SSE2-NEXT: xorpd %xmm1, %xmm1
707 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
710 ; SSE3-LABEL: shuffle_v2i64_z1:
712 ; SSE3-NEXT: xorpd %xmm1, %xmm1
713 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
716 ; SSSE3-LABEL: shuffle_v2i64_z1:
718 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
719 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
722 ; SSE41-LABEL: shuffle_v2i64_z1:
724 ; SSE41-NEXT: pxor %xmm1, %xmm1
725 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
728 ; AVX1-LABEL: shuffle_v2i64_z1:
730 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
731 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
734 ; AVX2-LABEL: shuffle_v2i64_z1:
736 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
737 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
739 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
740 ret <2 x i64> %shuffle
743 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
744 ; SSE-LABEL: shuffle_v2f64_0z:
746 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
749 ; AVX-LABEL: shuffle_v2f64_0z:
751 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
753 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
754 ret <2 x double> %shuffle
757 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
758 ; SSE-LABEL: shuffle_v2f64_1z:
760 ; SSE-NEXT: xorpd %xmm1, %xmm1
761 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
764 ; AVX-LABEL: shuffle_v2f64_1z:
766 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
767 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
769 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
770 ret <2 x double> %shuffle
773 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
774 ; SSE-LABEL: shuffle_v2f64_z0:
776 ; SSE-NEXT: xorpd %xmm1, %xmm1
777 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
778 ; SSE-NEXT: movapd %xmm1, %xmm0
781 ; AVX-LABEL: shuffle_v2f64_z0:
783 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
784 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
786 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
787 ret <2 x double> %shuffle
790 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
791 ; SSE2-LABEL: shuffle_v2f64_z1:
793 ; SSE2-NEXT: xorpd %xmm1, %xmm1
794 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
797 ; SSE3-LABEL: shuffle_v2f64_z1:
799 ; SSE3-NEXT: xorpd %xmm1, %xmm1
800 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
803 ; SSSE3-LABEL: shuffle_v2f64_z1:
805 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
806 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
809 ; SSE41-LABEL: shuffle_v2f64_z1:
811 ; SSE41-NEXT: xorpd %xmm1, %xmm1
812 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
815 ; AVX-LABEL: shuffle_v2f64_z1:
817 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
818 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
820 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
821 ret <2 x double> %shuffle
824 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
825 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
827 ; SSE-NEXT: xorpd %xmm1, %xmm1
828 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
831 ; AVX-LABEL: shuffle_v2f64_bitcast_1z:
833 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
834 ; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
836 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
837 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
838 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
839 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
840 ret <2 x double> %bitcast64
843 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
844 ; SSE-LABEL: insert_reg_and_zero_v2i64:
846 ; SSE-NEXT: movd %rdi, %xmm0
849 ; AVX-LABEL: insert_reg_and_zero_v2i64:
851 ; AVX-NEXT: vmovq %rdi, %xmm0
853 %v = insertelement <2 x i64> undef, i64 %a, i32 0
854 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
855 ret <2 x i64> %shuffle
858 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
859 ; SSE-LABEL: insert_mem_and_zero_v2i64:
861 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
864 ; AVX-LABEL: insert_mem_and_zero_v2i64:
866 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
868 %a = load i64, i64* %ptr
869 %v = insertelement <2 x i64> undef, i64 %a, i32 0
870 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
871 ret <2 x i64> %shuffle
874 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
875 ; SSE-LABEL: insert_reg_and_zero_v2f64:
877 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
880 ; AVX-LABEL: insert_reg_and_zero_v2f64:
882 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
884 %v = insertelement <2 x double> undef, double %a, i32 0
885 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
886 ret <2 x double> %shuffle
889 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
890 ; SSE-LABEL: insert_mem_and_zero_v2f64:
892 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
895 ; AVX-LABEL: insert_mem_and_zero_v2f64:
897 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
899 %a = load double, double* %ptr
900 %v = insertelement <2 x double> undef, double %a, i32 0
901 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
902 ret <2 x double> %shuffle
905 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
906 ; SSE2-LABEL: insert_reg_lo_v2i64:
908 ; SSE2-NEXT: movd %rdi, %xmm1
909 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
912 ; SSE3-LABEL: insert_reg_lo_v2i64:
914 ; SSE3-NEXT: movd %rdi, %xmm1
915 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
918 ; SSSE3-LABEL: insert_reg_lo_v2i64:
920 ; SSSE3-NEXT: movd %rdi, %xmm1
921 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
924 ; SSE41-LABEL: insert_reg_lo_v2i64:
926 ; SSE41-NEXT: movd %rdi, %xmm1
927 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
930 ; AVX1-LABEL: insert_reg_lo_v2i64:
932 ; AVX1-NEXT: vmovq %rdi, %xmm1
933 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
936 ; AVX2-LABEL: insert_reg_lo_v2i64:
938 ; AVX2-NEXT: vmovq %rdi, %xmm1
939 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
941 %v = insertelement <2 x i64> undef, i64 %a, i32 0
942 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
943 ret <2 x i64> %shuffle
946 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
947 ; SSE2-LABEL: insert_mem_lo_v2i64:
949 ; SSE2-NEXT: movlpd (%rdi), %xmm0
952 ; SSE3-LABEL: insert_mem_lo_v2i64:
954 ; SSE3-NEXT: movlpd (%rdi), %xmm0
957 ; SSSE3-LABEL: insert_mem_lo_v2i64:
959 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
962 ; SSE41-LABEL: insert_mem_lo_v2i64:
964 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
965 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
968 ; AVX1-LABEL: insert_mem_lo_v2i64:
970 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
971 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
974 ; AVX2-LABEL: insert_mem_lo_v2i64:
976 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
977 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
979 %a = load i64, i64* %ptr
980 %v = insertelement <2 x i64> undef, i64 %a, i32 0
981 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
982 ret <2 x i64> %shuffle
985 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
986 ; SSE-LABEL: insert_reg_hi_v2i64:
988 ; SSE-NEXT: movd %rdi, %xmm1
989 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
992 ; AVX-LABEL: insert_reg_hi_v2i64:
994 ; AVX-NEXT: vmovq %rdi, %xmm1
995 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
997 %v = insertelement <2 x i64> undef, i64 %a, i32 0
998 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
999 ret <2 x i64> %shuffle
1002 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1003 ; SSE-LABEL: insert_mem_hi_v2i64:
1005 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1006 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1009 ; AVX-LABEL: insert_mem_hi_v2i64:
1011 ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1012 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1014 %a = load i64, i64* %ptr
1015 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1016 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1017 ret <2 x i64> %shuffle
1020 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1021 ; SSE-LABEL: insert_reg_lo_v2f64:
1023 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1024 ; SSE-NEXT: movapd %xmm1, %xmm0
1027 ; AVX-LABEL: insert_reg_lo_v2f64:
1029 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1031 %v = insertelement <2 x double> undef, double %a, i32 0
1032 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1033 ret <2 x double> %shuffle
1036 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1037 ; SSE-LABEL: insert_mem_lo_v2f64:
1039 ; SSE-NEXT: movlpd (%rdi), %xmm0
1042 ; AVX-LABEL: insert_mem_lo_v2f64:
1044 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1046 %a = load double, double* %ptr
1047 %v = insertelement <2 x double> undef, double %a, i32 0
1048 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1049 ret <2 x double> %shuffle
1052 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1053 ; SSE-LABEL: insert_reg_hi_v2f64:
1055 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1056 ; SSE-NEXT: movapd %xmm1, %xmm0
1059 ; AVX-LABEL: insert_reg_hi_v2f64:
1061 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1063 %v = insertelement <2 x double> undef, double %a, i32 0
1064 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1065 ret <2 x double> %shuffle
1068 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1069 ; SSE-LABEL: insert_mem_hi_v2f64:
1071 ; SSE-NEXT: movhpd (%rdi), %xmm0
1074 ; AVX-LABEL: insert_mem_hi_v2f64:
1076 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1078 %a = load double, double* %ptr
1079 %v = insertelement <2 x double> undef, double %a, i32 0
1080 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1081 ret <2 x double> %shuffle
1084 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1085 ; FIXME: We should match movddup for SSE3 and higher here.
1087 ; SSE2-LABEL: insert_dup_reg_v2f64:
1089 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1092 ; SSE3-LABEL: insert_dup_reg_v2f64:
1094 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1097 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1099 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1102 ; SSE41-LABEL: insert_dup_reg_v2f64:
1104 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1107 ; AVX-LABEL: insert_dup_reg_v2f64:
1109 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1111 %v = insertelement <2 x double> undef, double %a, i32 0
1112 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1113 ret <2 x double> %shuffle
1115 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1116 ; SSE2-LABEL: insert_dup_mem_v2f64:
1118 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1119 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1122 ; SSE3-LABEL: insert_dup_mem_v2f64:
1124 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1127 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1129 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1132 ; SSE41-LABEL: insert_dup_mem_v2f64:
1134 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1137 ; AVX-LABEL: insert_dup_mem_v2f64:
1139 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1141 %a = load double, double* %ptr
1142 %v = insertelement <2 x double> undef, double %a, i32 0
1143 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1144 ret <2 x double> %shuffle
1147 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1148 ; SSE-LABEL: insert_dup_mem_v2i64:
1150 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1151 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1154 ; AVX1-LABEL: insert_dup_mem_v2i64:
1156 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1157 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1160 ; AVX2-LABEL: insert_dup_mem_v2i64:
1162 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
1164 %tmp = load i64, i64* %ptr, align 1
1165 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1166 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1170 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1171 ; SSE-LABEL: shuffle_mem_v2f64_10:
1173 ; SSE-NEXT: movapd (%rdi), %xmm0
1174 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1177 ; AVX-LABEL: shuffle_mem_v2f64_10:
1179 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1182 ; AVX512VL-LABEL: shuffle_mem_v2f64_10:
1184 ; AVX512VL-NEXT: vpermilpd $1, (%rdi), %xmm0
1185 ; AVX512VL-NEXT: retq
1186 %a = load <2 x double>, <2 x double>* %ptr
1187 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1188 ret <2 x double> %shuffle