1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-unknown-unknown"
12 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
13 ; SSE-LABEL: shuffle_v2i64_00:
15 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
18 ; AVX1-LABEL: shuffle_v2i64_00:
20 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
23 ; AVX2-LABEL: shuffle_v2i64_00:
25 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
28 ; AVX512VL-LABEL: shuffle_v2i64_00:
30 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0
32 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
33 ret <2 x i64> %shuffle
35 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
36 ; SSE-LABEL: shuffle_v2i64_10:
38 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
41 ; AVX-LABEL: shuffle_v2i64_10:
43 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
45 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
46 ret <2 x i64> %shuffle
48 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
49 ; SSE-LABEL: shuffle_v2i64_11:
51 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
54 ; AVX-LABEL: shuffle_v2i64_11:
56 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
58 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
59 ret <2 x i64> %shuffle
61 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
62 ; SSE-LABEL: shuffle_v2i64_22:
64 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
67 ; AVX1-LABEL: shuffle_v2i64_22:
69 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
72 ; AVX2-LABEL: shuffle_v2i64_22:
74 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
77 ; AVX512VL-LABEL: shuffle_v2i64_22:
79 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0
81 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
82 ret <2 x i64> %shuffle
84 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
85 ; SSE-LABEL: shuffle_v2i64_32:
87 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
90 ; AVX-LABEL: shuffle_v2i64_32:
92 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
94 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
95 ret <2 x i64> %shuffle
97 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
98 ; SSE-LABEL: shuffle_v2i64_33:
100 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
103 ; AVX-LABEL: shuffle_v2i64_33:
105 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
107 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
108 ret <2 x i64> %shuffle
111 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
112 ; SSE2-LABEL: shuffle_v2f64_00:
114 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
117 ; SSE3-LABEL: shuffle_v2f64_00:
119 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
122 ; SSSE3-LABEL: shuffle_v2f64_00:
124 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
127 ; SSE41-LABEL: shuffle_v2f64_00:
129 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
132 ; AVX-LABEL: shuffle_v2f64_00:
134 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
136 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
137 ret <2 x double> %shuffle
139 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
140 ; SSE-LABEL: shuffle_v2f64_10:
142 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
145 ; AVX1-LABEL: shuffle_v2f64_10:
147 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
150 ; AVX2-LABEL: shuffle_v2f64_10:
152 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
155 ; AVX512VL-LABEL: shuffle_v2f64_10:
157 ; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0
158 ; AVX512VL-NEXT: retq
160 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
161 ret <2 x double> %shuffle
163 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
164 ; SSE-LABEL: shuffle_v2f64_11:
166 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
169 ; AVX1-LABEL: shuffle_v2f64_11:
171 ; AVX1-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
174 ; AVX2-LABEL: shuffle_v2f64_11:
176 ; AVX2-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
179 ; AVX512VL-LABEL: shuffle_v2f64_11:
181 ; AVX512VL-NEXT: vmovhlps %xmm0, %xmm0, %xmm0
182 ; AVX512VL-NEXT: retq
183 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
184 ret <2 x double> %shuffle
186 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
187 ; SSE2-LABEL: shuffle_v2f64_22:
189 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
190 ; SSE2-NEXT: movaps %xmm1, %xmm0
193 ; SSE3-LABEL: shuffle_v2f64_22:
195 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
198 ; SSSE3-LABEL: shuffle_v2f64_22:
200 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
203 ; SSE41-LABEL: shuffle_v2f64_22:
205 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
208 ; AVX-LABEL: shuffle_v2f64_22:
210 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
212 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
213 ret <2 x double> %shuffle
215 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
216 ; SSE-LABEL: shuffle_v2f64_32:
218 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
219 ; SSE-NEXT: movapd %xmm1, %xmm0
222 ; AVX1-LABEL: shuffle_v2f64_32:
224 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
227 ; AVX2-LABEL: shuffle_v2f64_32:
229 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
232 ; AVX512VL-LABEL: shuffle_v2f64_32:
234 ; AVX512VL-NEXT: vpermilpd $1, %xmm1, %xmm0
235 ; AVX512VL-NEXT: retq
237 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
238 ret <2 x double> %shuffle
240 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
241 ; SSE-LABEL: shuffle_v2f64_33:
243 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
244 ; SSE-NEXT: movaps %xmm1, %xmm0
247 ; AVX1-LABEL: shuffle_v2f64_33:
249 ; AVX1-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
252 ; AVX2-LABEL: shuffle_v2f64_33:
254 ; AVX2-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
257 ; AVX512VL-LABEL: shuffle_v2f64_33:
259 ; AVX512VL-NEXT: vmovhlps %xmm1, %xmm1, %xmm0
260 ; AVX512VL-NEXT: retq
261 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
262 ret <2 x double> %shuffle
264 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
265 ; SSE2-LABEL: shuffle_v2f64_03:
267 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
268 ; SSE2-NEXT: movapd %xmm1, %xmm0
271 ; SSE3-LABEL: shuffle_v2f64_03:
273 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
274 ; SSE3-NEXT: movapd %xmm1, %xmm0
277 ; SSSE3-LABEL: shuffle_v2f64_03:
279 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
280 ; SSSE3-NEXT: movapd %xmm1, %xmm0
283 ; SSE41-LABEL: shuffle_v2f64_03:
285 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
288 ; AVX-LABEL: shuffle_v2f64_03:
290 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
292 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
293 ret <2 x double> %shuffle
295 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
296 ; SSE2-LABEL: shuffle_v2f64_21:
298 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
301 ; SSE3-LABEL: shuffle_v2f64_21:
303 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
306 ; SSSE3-LABEL: shuffle_v2f64_21:
308 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
311 ; SSE41-LABEL: shuffle_v2f64_21:
313 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
316 ; AVX-LABEL: shuffle_v2f64_21:
318 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
320 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
321 ret <2 x double> %shuffle
325 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
326 ; SSE-LABEL: shuffle_v2i64_02:
328 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
331 ; AVX-LABEL: shuffle_v2i64_02:
333 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
335 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
336 ret <2 x i64> %shuffle
338 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
339 ; SSE-LABEL: shuffle_v2i64_02_copy:
341 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
342 ; SSE-NEXT: movdqa %xmm1, %xmm0
345 ; AVX-LABEL: shuffle_v2i64_02_copy:
347 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
349 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
350 ret <2 x i64> %shuffle
352 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
353 ; SSE2-LABEL: shuffle_v2i64_03:
355 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
356 ; SSE2-NEXT: movapd %xmm1, %xmm0
359 ; SSE3-LABEL: shuffle_v2i64_03:
361 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
362 ; SSE3-NEXT: movapd %xmm1, %xmm0
365 ; SSSE3-LABEL: shuffle_v2i64_03:
367 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
368 ; SSSE3-NEXT: movapd %xmm1, %xmm0
371 ; SSE41-LABEL: shuffle_v2i64_03:
373 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
376 ; AVX1-LABEL: shuffle_v2i64_03:
378 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
381 ; AVX2-LABEL: shuffle_v2i64_03:
383 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
386 ; AVX512VL-LABEL: shuffle_v2i64_03:
388 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
389 ; AVX512VL-NEXT: retq
390 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
391 ret <2 x i64> %shuffle
393 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
394 ; SSE2-LABEL: shuffle_v2i64_03_copy:
396 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
397 ; SSE2-NEXT: movapd %xmm2, %xmm0
400 ; SSE3-LABEL: shuffle_v2i64_03_copy:
402 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
403 ; SSE3-NEXT: movapd %xmm2, %xmm0
406 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
408 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
409 ; SSSE3-NEXT: movapd %xmm2, %xmm0
412 ; SSE41-LABEL: shuffle_v2i64_03_copy:
414 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
415 ; SSE41-NEXT: movdqa %xmm1, %xmm0
418 ; AVX1-LABEL: shuffle_v2i64_03_copy:
420 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
423 ; AVX2-LABEL: shuffle_v2i64_03_copy:
425 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
428 ; AVX512VL-LABEL: shuffle_v2i64_03_copy:
430 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
431 ; AVX512VL-NEXT: retq
432 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
433 ret <2 x i64> %shuffle
435 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
436 ; SSE2-LABEL: shuffle_v2i64_12:
438 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
441 ; SSE3-LABEL: shuffle_v2i64_12:
443 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
446 ; SSSE3-LABEL: shuffle_v2i64_12:
448 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
449 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
452 ; SSE41-LABEL: shuffle_v2i64_12:
454 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
455 ; SSE41-NEXT: movdqa %xmm1, %xmm0
458 ; AVX-LABEL: shuffle_v2i64_12:
460 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
462 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
463 ret <2 x i64> %shuffle
465 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
466 ; SSE2-LABEL: shuffle_v2i64_12_copy:
468 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
469 ; SSE2-NEXT: movapd %xmm1, %xmm0
472 ; SSE3-LABEL: shuffle_v2i64_12_copy:
474 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
475 ; SSE3-NEXT: movapd %xmm1, %xmm0
478 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
480 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
481 ; SSSE3-NEXT: movdqa %xmm2, %xmm0
484 ; SSE41-LABEL: shuffle_v2i64_12_copy:
486 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
487 ; SSE41-NEXT: movdqa %xmm2, %xmm0
490 ; AVX-LABEL: shuffle_v2i64_12_copy:
492 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
494 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
495 ret <2 x i64> %shuffle
497 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
498 ; SSE-LABEL: shuffle_v2i64_13:
500 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
503 ; AVX-LABEL: shuffle_v2i64_13:
505 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
507 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
508 ret <2 x i64> %shuffle
510 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
511 ; SSE-LABEL: shuffle_v2i64_13_copy:
513 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
514 ; SSE-NEXT: movdqa %xmm1, %xmm0
517 ; AVX-LABEL: shuffle_v2i64_13_copy:
519 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
521 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
522 ret <2 x i64> %shuffle
524 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
525 ; SSE-LABEL: shuffle_v2i64_20:
527 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
528 ; SSE-NEXT: movdqa %xmm1, %xmm0
531 ; AVX-LABEL: shuffle_v2i64_20:
533 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
535 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
536 ret <2 x i64> %shuffle
538 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
539 ; SSE-LABEL: shuffle_v2i64_20_copy:
541 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
542 ; SSE-NEXT: movdqa %xmm2, %xmm0
545 ; AVX-LABEL: shuffle_v2i64_20_copy:
547 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
549 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
550 ret <2 x i64> %shuffle
552 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
553 ; SSE2-LABEL: shuffle_v2i64_21:
555 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
558 ; SSE3-LABEL: shuffle_v2i64_21:
560 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
563 ; SSSE3-LABEL: shuffle_v2i64_21:
565 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
568 ; SSE41-LABEL: shuffle_v2i64_21:
570 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
573 ; AVX1-LABEL: shuffle_v2i64_21:
575 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
578 ; AVX2-LABEL: shuffle_v2i64_21:
580 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
583 ; AVX512VL-LABEL: shuffle_v2i64_21:
585 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
586 ; AVX512VL-NEXT: retq
587 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
588 ret <2 x i64> %shuffle
590 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
591 ; SSE2-LABEL: shuffle_v2i64_21_copy:
593 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
594 ; SSE2-NEXT: movapd %xmm1, %xmm0
597 ; SSE3-LABEL: shuffle_v2i64_21_copy:
599 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
600 ; SSE3-NEXT: movapd %xmm1, %xmm0
603 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
605 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
606 ; SSSE3-NEXT: movapd %xmm1, %xmm0
609 ; SSE41-LABEL: shuffle_v2i64_21_copy:
611 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
612 ; SSE41-NEXT: movdqa %xmm1, %xmm0
615 ; AVX1-LABEL: shuffle_v2i64_21_copy:
617 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
620 ; AVX2-LABEL: shuffle_v2i64_21_copy:
622 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
625 ; AVX512VL-LABEL: shuffle_v2i64_21_copy:
627 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
628 ; AVX512VL-NEXT: retq
629 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
630 ret <2 x i64> %shuffle
632 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
633 ; SSE2-LABEL: shuffle_v2i64_30:
635 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
636 ; SSE2-NEXT: movapd %xmm1, %xmm0
639 ; SSE3-LABEL: shuffle_v2i64_30:
641 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
642 ; SSE3-NEXT: movapd %xmm1, %xmm0
645 ; SSSE3-LABEL: shuffle_v2i64_30:
647 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
650 ; SSE41-LABEL: shuffle_v2i64_30:
652 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
655 ; AVX-LABEL: shuffle_v2i64_30:
657 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
659 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
660 ret <2 x i64> %shuffle
662 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
663 ; SSE2-LABEL: shuffle_v2i64_30_copy:
665 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
666 ; SSE2-NEXT: movapd %xmm2, %xmm0
669 ; SSE3-LABEL: shuffle_v2i64_30_copy:
671 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
672 ; SSE3-NEXT: movapd %xmm2, %xmm0
675 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
677 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
678 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
681 ; SSE41-LABEL: shuffle_v2i64_30_copy:
683 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
684 ; SSE41-NEXT: movdqa %xmm1, %xmm0
687 ; AVX-LABEL: shuffle_v2i64_30_copy:
689 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
691 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
692 ret <2 x i64> %shuffle
694 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
695 ; SSE-LABEL: shuffle_v2i64_31:
697 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
698 ; SSE-NEXT: movdqa %xmm1, %xmm0
701 ; AVX-LABEL: shuffle_v2i64_31:
703 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
705 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
706 ret <2 x i64> %shuffle
708 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
709 ; SSE-LABEL: shuffle_v2i64_31_copy:
711 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
712 ; SSE-NEXT: movdqa %xmm2, %xmm0
715 ; AVX-LABEL: shuffle_v2i64_31_copy:
717 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
719 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
720 ret <2 x i64> %shuffle
723 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
724 ; SSE-LABEL: shuffle_v2i64_0z:
726 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
729 ; AVX1-LABEL: shuffle_v2i64_0z:
731 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
734 ; AVX2-LABEL: shuffle_v2i64_0z:
736 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
739 ; AVX512VL-LABEL: shuffle_v2i64_0z:
741 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
742 ; AVX512VL-NEXT: retq
743 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
744 ret <2 x i64> %shuffle
747 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
748 ; SSE-LABEL: shuffle_v2i64_1z:
750 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
753 ; AVX-LABEL: shuffle_v2i64_1z:
755 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
757 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
758 ret <2 x i64> %shuffle
761 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
762 ; SSE-LABEL: shuffle_v2i64_z0:
764 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
767 ; AVX-LABEL: shuffle_v2i64_z0:
769 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
771 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
772 ret <2 x i64> %shuffle
775 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
776 ; SSE2-LABEL: shuffle_v2i64_z1:
778 ; SSE2-NEXT: xorpd %xmm1, %xmm1
779 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
782 ; SSE3-LABEL: shuffle_v2i64_z1:
784 ; SSE3-NEXT: xorpd %xmm1, %xmm1
785 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
788 ; SSSE3-LABEL: shuffle_v2i64_z1:
790 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
791 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
794 ; SSE41-LABEL: shuffle_v2i64_z1:
796 ; SSE41-NEXT: pxor %xmm1, %xmm1
797 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
800 ; AVX1-LABEL: shuffle_v2i64_z1:
802 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
803 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
806 ; AVX2-LABEL: shuffle_v2i64_z1:
808 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
809 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
812 ; AVX512VL-LABEL: shuffle_v2i64_z1:
814 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
815 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
816 ; AVX512VL-NEXT: retq
817 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
818 ret <2 x i64> %shuffle
821 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
822 ; SSE-LABEL: shuffle_v2f64_0z:
824 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
827 ; AVX1-LABEL: shuffle_v2f64_0z:
829 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
832 ; AVX2-LABEL: shuffle_v2f64_0z:
834 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
837 ; AVX512VL-LABEL: shuffle_v2f64_0z:
839 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
840 ; AVX512VL-NEXT: retq
841 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
842 ret <2 x double> %shuffle
845 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
846 ; SSE-LABEL: shuffle_v2f64_1z:
848 ; SSE-NEXT: xorpd %xmm1, %xmm1
849 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
852 ; AVX1-LABEL: shuffle_v2f64_1z:
854 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
855 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
858 ; AVX2-LABEL: shuffle_v2f64_1z:
860 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
861 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
864 ; AVX512VL-LABEL: shuffle_v2f64_1z:
866 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
867 ; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
868 ; AVX512VL-NEXT: retq
869 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
870 ret <2 x double> %shuffle
873 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
874 ; SSE-LABEL: shuffle_v2f64_z0:
876 ; SSE-NEXT: xorpd %xmm1, %xmm1
877 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
878 ; SSE-NEXT: movapd %xmm1, %xmm0
881 ; AVX1-LABEL: shuffle_v2f64_z0:
883 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
884 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
887 ; AVX2-LABEL: shuffle_v2f64_z0:
889 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
890 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
893 ; AVX512VL-LABEL: shuffle_v2f64_z0:
895 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
896 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
897 ; AVX512VL-NEXT: retq
898 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
899 ret <2 x double> %shuffle
902 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
903 ; SSE2-LABEL: shuffle_v2f64_z1:
905 ; SSE2-NEXT: xorpd %xmm1, %xmm1
906 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
909 ; SSE3-LABEL: shuffle_v2f64_z1:
911 ; SSE3-NEXT: xorpd %xmm1, %xmm1
912 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
915 ; SSSE3-LABEL: shuffle_v2f64_z1:
917 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
918 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
921 ; SSE41-LABEL: shuffle_v2f64_z1:
923 ; SSE41-NEXT: xorpd %xmm1, %xmm1
924 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
927 ; AVX-LABEL: shuffle_v2f64_z1:
929 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
930 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
932 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
933 ret <2 x double> %shuffle
936 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
937 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
939 ; SSE-NEXT: xorpd %xmm1, %xmm1
940 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
943 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
945 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
946 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
949 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
951 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
952 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
955 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
957 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
958 ; AVX512VL-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0
959 ; AVX512VL-NEXT: retq
960 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
961 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
962 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
963 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
964 ret <2 x double> %bitcast64
967 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
968 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
970 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
971 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
972 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
975 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
977 ; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
978 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
979 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
982 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
984 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
985 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
986 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
989 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
991 ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
992 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
993 ; SSE41-NEXT: xorps %xmm1, %xmm1
994 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
997 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
999 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1000 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1001 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1002 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1005 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
1007 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1008 ; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1009 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
1010 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1013 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
1015 ; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm1
1016 ; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1017 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1018 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1019 ; AVX512VL-NEXT: retq
1020 %bitcast32 = bitcast <2 x i64> %x to <4 x float>
1021 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1022 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
1023 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
1027 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
1028 ; SSE-LABEL: insert_reg_and_zero_v2i64:
1030 ; SSE-NEXT: movd %rdi, %xmm0
1033 ; AVX-LABEL: insert_reg_and_zero_v2i64:
1035 ; AVX-NEXT: vmovq %rdi, %xmm0
1037 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1038 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1039 ret <2 x i64> %shuffle
1042 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
1043 ; SSE-LABEL: insert_mem_and_zero_v2i64:
1045 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1048 ; AVX1-LABEL: insert_mem_and_zero_v2i64:
1050 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1053 ; AVX2-LABEL: insert_mem_and_zero_v2i64:
1055 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1058 ; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
1060 ; AVX512VL-NEXT: vmovq (%rdi), %xmm0
1061 ; AVX512VL-NEXT: retq
1062 %a = load i64, i64* %ptr
1063 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1064 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
1065 ret <2 x i64> %shuffle
1068 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
1069 ; SSE-LABEL: insert_reg_and_zero_v2f64:
1071 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1074 ; AVX1-LABEL: insert_reg_and_zero_v2f64:
1076 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1079 ; AVX2-LABEL: insert_reg_and_zero_v2f64:
1081 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1084 ; AVX512VL-LABEL: insert_reg_and_zero_v2f64:
1086 ; AVX512VL-NEXT: vmovq %xmm0, %xmm0
1087 ; AVX512VL-NEXT: retq
1088 %v = insertelement <2 x double> undef, double %a, i32 0
1089 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1090 ret <2 x double> %shuffle
1093 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
1094 ; SSE-LABEL: insert_mem_and_zero_v2f64:
1096 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1099 ; AVX1-LABEL: insert_mem_and_zero_v2f64:
1101 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1104 ; AVX2-LABEL: insert_mem_and_zero_v2f64:
1106 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1109 ; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
1111 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1112 ; AVX512VL-NEXT: retq
1113 %a = load double, double* %ptr
1114 %v = insertelement <2 x double> undef, double %a, i32 0
1115 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
1116 ret <2 x double> %shuffle
1119 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
1120 ; SSE2-LABEL: insert_reg_lo_v2i64:
1122 ; SSE2-NEXT: movd %rdi, %xmm1
1123 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1126 ; SSE3-LABEL: insert_reg_lo_v2i64:
1128 ; SSE3-NEXT: movd %rdi, %xmm1
1129 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1132 ; SSSE3-LABEL: insert_reg_lo_v2i64:
1134 ; SSSE3-NEXT: movd %rdi, %xmm1
1135 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1138 ; SSE41-LABEL: insert_reg_lo_v2i64:
1140 ; SSE41-NEXT: movd %rdi, %xmm1
1141 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1144 ; AVX1-LABEL: insert_reg_lo_v2i64:
1146 ; AVX1-NEXT: vmovq %rdi, %xmm1
1147 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1150 ; AVX2-LABEL: insert_reg_lo_v2i64:
1152 ; AVX2-NEXT: vmovq %rdi, %xmm1
1153 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1156 ; AVX512VL-LABEL: insert_reg_lo_v2i64:
1158 ; AVX512VL-NEXT: vmovq %rdi, %xmm1
1159 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1160 ; AVX512VL-NEXT: retq
1161 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1162 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1163 ret <2 x i64> %shuffle
1166 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
1167 ; SSE2-LABEL: insert_mem_lo_v2i64:
1169 ; SSE2-NEXT: movlpd (%rdi), %xmm0
1172 ; SSE3-LABEL: insert_mem_lo_v2i64:
1174 ; SSE3-NEXT: movlpd (%rdi), %xmm0
1177 ; SSSE3-LABEL: insert_mem_lo_v2i64:
1179 ; SSSE3-NEXT: movlpd (%rdi), %xmm0
1182 ; SSE41-LABEL: insert_mem_lo_v2i64:
1184 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1185 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1188 ; AVX1-LABEL: insert_mem_lo_v2i64:
1190 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1191 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1194 ; AVX2-LABEL: insert_mem_lo_v2i64:
1196 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1197 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1200 ; AVX512VL-LABEL: insert_mem_lo_v2i64:
1202 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1203 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1204 ; AVX512VL-NEXT: retq
1205 %a = load i64, i64* %ptr
1206 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1207 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
1208 ret <2 x i64> %shuffle
1211 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
1212 ; SSE-LABEL: insert_reg_hi_v2i64:
1214 ; SSE-NEXT: movd %rdi, %xmm1
1215 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1218 ; AVX-LABEL: insert_reg_hi_v2i64:
1220 ; AVX-NEXT: vmovq %rdi, %xmm1
1221 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1223 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1224 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1225 ret <2 x i64> %shuffle
1228 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
1229 ; SSE-LABEL: insert_mem_hi_v2i64:
1231 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1232 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1235 ; AVX1-LABEL: insert_mem_hi_v2i64:
1237 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1238 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1241 ; AVX2-LABEL: insert_mem_hi_v2i64:
1243 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1244 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1247 ; AVX512VL-LABEL: insert_mem_hi_v2i64:
1249 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1
1250 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1251 ; AVX512VL-NEXT: retq
1252 %a = load i64, i64* %ptr
1253 %v = insertelement <2 x i64> undef, i64 %a, i32 0
1254 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
1255 ret <2 x i64> %shuffle
1258 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
1259 ; SSE-LABEL: insert_reg_lo_v2f64:
1261 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1262 ; SSE-NEXT: movapd %xmm1, %xmm0
1265 ; AVX1-LABEL: insert_reg_lo_v2f64:
1267 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1270 ; AVX2-LABEL: insert_reg_lo_v2f64:
1272 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1275 ; AVX512VL-LABEL: insert_reg_lo_v2f64:
1277 ; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1278 ; AVX512VL-NEXT: retq
1279 %v = insertelement <2 x double> undef, double %a, i32 0
1280 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1281 ret <2 x double> %shuffle
1284 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
1285 ; SSE-LABEL: insert_mem_lo_v2f64:
1287 ; SSE-NEXT: movlpd (%rdi), %xmm0
1290 ; AVX-LABEL: insert_mem_lo_v2f64:
1292 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
1294 %a = load double, double* %ptr
1295 %v = insertelement <2 x double> undef, double %a, i32 0
1296 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
1297 ret <2 x double> %shuffle
1300 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
1301 ; SSE-LABEL: insert_reg_hi_v2f64:
1303 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1304 ; SSE-NEXT: movapd %xmm1, %xmm0
1307 ; AVX-LABEL: insert_reg_hi_v2f64:
1309 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1311 %v = insertelement <2 x double> undef, double %a, i32 0
1312 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1313 ret <2 x double> %shuffle
1316 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
1317 ; SSE-LABEL: insert_mem_hi_v2f64:
1319 ; SSE-NEXT: movhpd (%rdi), %xmm0
1322 ; AVX-LABEL: insert_mem_hi_v2f64:
1324 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
1326 %a = load double, double* %ptr
1327 %v = insertelement <2 x double> undef, double %a, i32 0
1328 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
1329 ret <2 x double> %shuffle
1332 define <2 x double> @insert_dup_reg_v2f64(double %a) {
1333 ; SSE2-LABEL: insert_dup_reg_v2f64:
1335 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1338 ; SSE3-LABEL: insert_dup_reg_v2f64:
1340 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1343 ; SSSE3-LABEL: insert_dup_reg_v2f64:
1345 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1348 ; SSE41-LABEL: insert_dup_reg_v2f64:
1350 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
1353 ; AVX-LABEL: insert_dup_reg_v2f64:
1355 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1357 %v = insertelement <2 x double> undef, double %a, i32 0
1358 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1359 ret <2 x double> %shuffle
1362 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
1363 ; SSE2-LABEL: insert_dup_mem_v2f64:
1365 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1366 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1369 ; SSE3-LABEL: insert_dup_mem_v2f64:
1371 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1374 ; SSSE3-LABEL: insert_dup_mem_v2f64:
1376 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1379 ; SSE41-LABEL: insert_dup_mem_v2f64:
1381 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
1384 ; AVX1-LABEL: insert_dup_mem_v2f64:
1386 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1389 ; AVX2-LABEL: insert_dup_mem_v2f64:
1391 ; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1394 ; AVX512VL-LABEL: insert_dup_mem_v2f64:
1396 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1397 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1398 ; AVX512VL-NEXT: retq
1399 %a = load double, double* %ptr
1400 %v = insertelement <2 x double> undef, double %a, i32 0
1401 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1402 ret <2 x double> %shuffle
1405 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
1406 ; SSE-LABEL: insert_dup_mem_v2i64:
1408 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1409 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1412 ; AVX1-LABEL: insert_dup_mem_v2i64:
1414 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1415 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1418 ; AVX2-LABEL: insert_dup_mem_v2i64:
1420 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
1423 ; AVX512VL-LABEL: insert_dup_mem_v2i64:
1425 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0
1426 ; AVX512VL-NEXT: retq
1427 %tmp = load i64, i64* %ptr, align 1
1428 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1429 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
1433 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
1434 ; SSE-LABEL: shuffle_mem_v2f64_10:
1436 ; SSE-NEXT: movapd (%rdi), %xmm0
1437 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1440 ; AVX1-LABEL: shuffle_mem_v2f64_10:
1442 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1445 ; AVX2-LABEL: shuffle_mem_v2f64_10:
1447 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1450 ; AVX512VL-LABEL: shuffle_mem_v2f64_10:
1452 ; AVX512VL-NEXT: vpermilpd $1, (%rdi), %xmm0
1453 ; AVX512VL-NEXT: retq
1455 %a = load <2 x double>, <2 x double>* %ptr
1456 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1457 ret <2 x double> %shuffle