1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4 ; RUN: llc < %s -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
6 target triple = "x86_64-unknown-unknown"
8 define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
9 ; AVX1-LABEL: shuffle_v4f64_0000:
11 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
12 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
15 ; AVX2-LABEL: shuffle_v4f64_0000:
17 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
20 ; AVX512VL-LABEL: shuffle_v4f64_0000:
22 ; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
24 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
25 ret <4 x double> %shuffle
28 define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
29 ; AVX1-LABEL: shuffle_v4f64_0001:
31 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
32 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
35 ; AVX2-LABEL: shuffle_v4f64_0001:
37 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
40 ; AVX512VL-LABEL: shuffle_v4f64_0001:
42 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
44 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
45 ret <4 x double> %shuffle
48 define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
49 ; AVX1-LABEL: shuffle_v4f64_0020:
51 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
52 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
53 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
54 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
57 ; AVX2-LABEL: shuffle_v4f64_0020:
59 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
62 ; AVX512VL-LABEL: shuffle_v4f64_0020:
64 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
66 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
67 ret <4 x double> %shuffle
70 define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
71 ; AVX1-LABEL: shuffle_v4f64_0300:
73 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
74 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
75 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
78 ; AVX2-LABEL: shuffle_v4f64_0300:
80 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
83 ; AVX512VL-LABEL: shuffle_v4f64_0300:
85 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
87 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
88 ret <4 x double> %shuffle
91 define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
92 ; AVX1-LABEL: shuffle_v4f64_1000:
94 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
95 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
96 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
99 ; AVX2-LABEL: shuffle_v4f64_1000:
101 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
104 ; AVX512VL-LABEL: shuffle_v4f64_1000:
106 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
107 ; AVX512VL-NEXT: retq
108 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
109 ret <4 x double> %shuffle
112 define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
113 ; AVX1-LABEL: shuffle_v4f64_2200:
115 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
116 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
119 ; AVX2-LABEL: shuffle_v4f64_2200:
121 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
124 ; AVX512VL-LABEL: shuffle_v4f64_2200:
126 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
127 ; AVX512VL-NEXT: retq
128 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
129 ret <4 x double> %shuffle
132 define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
133 ; AVX1-LABEL: shuffle_v4f64_3330:
135 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
136 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
137 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
140 ; AVX2-LABEL: shuffle_v4f64_3330:
142 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
145 ; AVX512VL-LABEL: shuffle_v4f64_3330:
147 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
148 ; AVX512VL-NEXT: retq
149 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
150 ret <4 x double> %shuffle
153 define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
154 ; AVX1-LABEL: shuffle_v4f64_3210:
156 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
157 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
160 ; AVX2-LABEL: shuffle_v4f64_3210:
162 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
165 ; AVX512VL-LABEL: shuffle_v4f64_3210:
167 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
168 ; AVX512VL-NEXT: retq
169 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
170 ret <4 x double> %shuffle
173 define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
174 ; ALL-LABEL: shuffle_v4f64_0023:
176 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
179 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
180 ret <4 x double> %shuffle
183 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
184 ; ALL-LABEL: shuffle_v4f64_0022:
186 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
188 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
189 ret <4 x double> %shuffle
192 define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) {
193 ; ALL-LABEL: shuffle_v4f64mem_0022:
195 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
197 %a = load <4 x double>, <4 x double>* %ptr
198 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
199 ret <4 x double> %shuffle
202 define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
203 ; ALL-LABEL: shuffle_v4f64_1032:
205 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
207 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
208 ret <4 x double> %shuffle
211 define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
212 ; ALL-LABEL: shuffle_v4f64_1133:
214 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
216 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
217 ret <4 x double> %shuffle
220 define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
221 ; ALL-LABEL: shuffle_v4f64_1023:
223 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
225 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
226 ret <4 x double> %shuffle
229 define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
230 ; ALL-LABEL: shuffle_v4f64_1022:
232 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
234 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
235 ret <4 x double> %shuffle
238 define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
239 ; ALL-LABEL: shuffle_v4f64_0423:
241 ; ALL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
242 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
244 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
245 ret <4 x double> %shuffle
248 define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
249 ; ALL-LABEL: shuffle_v4f64_0462:
251 ; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
252 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
253 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
255 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
256 ret <4 x double> %shuffle
259 define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
260 ; ALL-LABEL: shuffle_v4f64_0426:
262 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
264 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
265 ret <4 x double> %shuffle
268 define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
269 ; ALL-LABEL: shuffle_v4f64_1537:
271 ; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
273 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
274 ret <4 x double> %shuffle
277 define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
278 ; ALL-LABEL: shuffle_v4f64_4062:
280 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
282 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
283 ret <4 x double> %shuffle
286 define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
287 ; ALL-LABEL: shuffle_v4f64_5173:
289 ; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
291 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
292 ret <4 x double> %shuffle
295 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
296 ; ALL-LABEL: shuffle_v4f64_5163:
298 ; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
300 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
301 ret <4 x double> %shuffle
304 define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
305 ; ALL-LABEL: shuffle_v4f64_0527:
307 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
309 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
310 ret <4 x double> %shuffle
313 define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
314 ; ALL-LABEL: shuffle_v4f64_4163:
316 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
318 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
319 ret <4 x double> %shuffle
322 define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
323 ; AVX1-LABEL: shuffle_v4f64_0145:
325 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
328 ; AVX2-LABEL: shuffle_v4f64_0145:
330 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
333 ; AVX512VL-LABEL: shuffle_v4f64_0145:
335 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
336 ; AVX512VL-NEXT: retq
337 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
338 ret <4 x double> %shuffle
341 define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
342 ; AVX1-LABEL: shuffle_v4f64_4501:
344 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
347 ; AVX2-LABEL: shuffle_v4f64_4501:
349 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
352 ; AVX512VL-LABEL: shuffle_v4f64_4501:
354 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm1, %ymm0
355 ; AVX512VL-NEXT: retq
356 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
357 ret <4 x double> %shuffle
360 define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
361 ; ALL-LABEL: shuffle_v4f64_0167:
363 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
365 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
366 ret <4 x double> %shuffle
369 define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
370 ; AVX1-LABEL: shuffle_v4f64_1054:
372 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
373 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
376 ; AVX2-LABEL: shuffle_v4f64_1054:
378 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
379 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
382 ; AVX512VL-LABEL: shuffle_v4f64_1054:
384 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
385 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
386 ; AVX512VL-NEXT: retq
387 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
388 ret <4 x double> %shuffle
391 define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
392 ; AVX1-LABEL: shuffle_v4f64_3254:
394 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
395 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
398 ; AVX2-LABEL: shuffle_v4f64_3254:
400 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
401 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
404 ; AVX512VL-LABEL: shuffle_v4f64_3254:
406 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
407 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
408 ; AVX512VL-NEXT: retq
409 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
410 ret <4 x double> %shuffle
413 define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
414 ; AVX1-LABEL: shuffle_v4f64_3276:
416 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
417 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
420 ; AVX2-LABEL: shuffle_v4f64_3276:
422 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
423 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
426 ; AVX512VL-LABEL: shuffle_v4f64_3276:
428 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
429 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
430 ; AVX512VL-NEXT: retq
431 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
432 ret <4 x double> %shuffle
435 define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) {
436 ; ALL-LABEL: shuffle_v4f64_1076:
438 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
439 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
441 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
442 ret <4 x double> %shuffle
445 define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
446 ; AVX1-LABEL: shuffle_v4f64_0415:
448 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
449 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
450 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
453 ; AVX2-LABEL: shuffle_v4f64_0415:
455 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
456 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
457 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
460 ; AVX512VL-LABEL: shuffle_v4f64_0415:
462 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
463 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
464 ; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
465 ; AVX512VL-NEXT: retq
466 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
467 ret <4 x double> %shuffle
470 define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) {
471 ; ALL-LABEL: shuffle_v4f64_u062:
473 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
475 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2>
476 ret <4 x double> %shuffle
479 define <4 x double> @shuffle_v4f64_15uu(<4 x double> %a, <4 x double> %b) {
480 ; ALL-LABEL: shuffle_v4f64_15uu:
482 ; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
484 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
485 ret <4 x double> %shuffle
488 define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {
489 ; ALL-LABEL: shuffle_v4f64_11uu:
491 ; ALL-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
493 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
494 ret <4 x double> %shuffle
497 define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {
498 ; AVX1-LABEL: shuffle_v4f64_22uu:
500 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
501 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
504 ; AVX2-LABEL: shuffle_v4f64_22uu:
506 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
509 ; AVX512VL-LABEL: shuffle_v4f64_22uu:
511 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
512 ; AVX512VL-NEXT: retq
513 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
514 ret <4 x double> %shuffle
517 define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) {
518 ; AVX1-LABEL: shuffle_v4f64_3333:
520 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
521 ; AVX1-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
522 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
525 ; AVX2-LABEL: shuffle_v4f64_3333:
527 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
530 ; AVX512VL-LABEL: shuffle_v4f64_3333:
532 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
533 ; AVX512VL-NEXT: retq
534 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
535 ret <4 x double> %shuffle
538 define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
539 ; AVX1-LABEL: shuffle_v4i64_0000:
541 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
542 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
545 ; AVX2-LABEL: shuffle_v4i64_0000:
547 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
550 ; AVX512VL-LABEL: shuffle_v4i64_0000:
552 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0
553 ; AVX512VL-NEXT: retq
554 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
555 ret <4 x i64> %shuffle
558 define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
559 ; AVX1-LABEL: shuffle_v4i64_0001:
561 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
562 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
565 ; AVX2-LABEL: shuffle_v4i64_0001:
567 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
570 ; AVX512VL-LABEL: shuffle_v4i64_0001:
572 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
573 ; AVX512VL-NEXT: retq
574 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
575 ret <4 x i64> %shuffle
578 define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
579 ; AVX1-LABEL: shuffle_v4i64_0020:
581 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
582 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
583 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
584 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
587 ; AVX2-LABEL: shuffle_v4i64_0020:
589 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
592 ; AVX512VL-LABEL: shuffle_v4i64_0020:
594 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
595 ; AVX512VL-NEXT: retq
596 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
597 ret <4 x i64> %shuffle
600 define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
601 ; AVX1-LABEL: shuffle_v4i64_0112:
603 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
604 ; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0]
605 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
608 ; AVX2-LABEL: shuffle_v4i64_0112:
610 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
613 ; AVX512VL-LABEL: shuffle_v4i64_0112:
615 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
616 ; AVX512VL-NEXT: retq
617 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
618 ret <4 x i64> %shuffle
621 define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
622 ; AVX1-LABEL: shuffle_v4i64_0300:
624 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
625 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
626 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
629 ; AVX2-LABEL: shuffle_v4i64_0300:
631 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
634 ; AVX512VL-LABEL: shuffle_v4i64_0300:
636 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
637 ; AVX512VL-NEXT: retq
638 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
639 ret <4 x i64> %shuffle
642 define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
643 ; AVX1-LABEL: shuffle_v4i64_1000:
645 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
646 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
647 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
650 ; AVX2-LABEL: shuffle_v4i64_1000:
652 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
655 ; AVX512VL-LABEL: shuffle_v4i64_1000:
657 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
658 ; AVX512VL-NEXT: retq
659 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
660 ret <4 x i64> %shuffle
663 define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
664 ; AVX1-LABEL: shuffle_v4i64_2200:
666 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
667 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
670 ; AVX2-LABEL: shuffle_v4i64_2200:
672 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
675 ; AVX512VL-LABEL: shuffle_v4i64_2200:
677 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
678 ; AVX512VL-NEXT: retq
679 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
680 ret <4 x i64> %shuffle
683 define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
684 ; AVX1-LABEL: shuffle_v4i64_3330:
686 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
687 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
688 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
691 ; AVX2-LABEL: shuffle_v4i64_3330:
693 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
696 ; AVX512VL-LABEL: shuffle_v4i64_3330:
698 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
699 ; AVX512VL-NEXT: retq
700 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
701 ret <4 x i64> %shuffle
704 define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
705 ; AVX1-LABEL: shuffle_v4i64_3210:
707 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
708 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
711 ; AVX2-LABEL: shuffle_v4i64_3210:
713 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
716 ; AVX512VL-LABEL: shuffle_v4i64_3210:
718 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
719 ; AVX512VL-NEXT: retq
720 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
721 ret <4 x i64> %shuffle
724 define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
725 ; AVX1-LABEL: shuffle_v4i64_0124:
727 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
728 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
729 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
732 ; AVX2-LABEL: shuffle_v4i64_0124:
734 ; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
735 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
738 ; AVX512VL-LABEL: shuffle_v4i64_0124:
740 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
741 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
742 ; AVX512VL-NEXT: retq
743 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
744 ret <4 x i64> %shuffle
747 define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
748 ; AVX1-LABEL: shuffle_v4i64_0142:
750 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
751 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
752 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
755 ; AVX2-LABEL: shuffle_v4i64_0142:
757 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
758 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
759 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
762 ; AVX512VL-LABEL: shuffle_v4i64_0142:
764 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm1
765 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
766 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
767 ; AVX512VL-NEXT: retq
768 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
769 ret <4 x i64> %shuffle
772 define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
773 ; AVX1-LABEL: shuffle_v4i64_0412:
775 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
776 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
777 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
778 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
779 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
782 ; AVX2-LABEL: shuffle_v4i64_0412:
784 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
785 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
786 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
789 ; AVX512VL-LABEL: shuffle_v4i64_0412:
791 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
792 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
793 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
794 ; AVX512VL-NEXT: retq
795 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
796 ret <4 x i64> %shuffle
799 define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
800 ; AVX1-LABEL: shuffle_v4i64_4012:
802 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
803 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
804 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
805 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
806 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
809 ; AVX2-LABEL: shuffle_v4i64_4012:
811 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
812 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
815 ; AVX512VL-LABEL: shuffle_v4i64_4012:
817 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
818 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
819 ; AVX512VL-NEXT: retq
820 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
821 ret <4 x i64> %shuffle
824 define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
825 ; AVX1-LABEL: shuffle_v4i64_0145:
827 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
830 ; AVX2-LABEL: shuffle_v4i64_0145:
832 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
835 ; AVX512VL-LABEL: shuffle_v4i64_0145:
837 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
838 ; AVX512VL-NEXT: retq
839 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
840 ret <4 x i64> %shuffle
843 define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
844 ; AVX1-LABEL: shuffle_v4i64_0451:
846 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
847 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
848 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
851 ; AVX2-LABEL: shuffle_v4i64_0451:
853 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
854 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
855 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
858 ; AVX512VL-LABEL: shuffle_v4i64_0451:
860 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
861 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
862 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
863 ; AVX512VL-NEXT: retq
864 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
865 ret <4 x i64> %shuffle
868 define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
869 ; AVX1-LABEL: shuffle_v4i64_4501:
871 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
874 ; AVX2-LABEL: shuffle_v4i64_4501:
876 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
879 ; AVX512VL-LABEL: shuffle_v4i64_4501:
881 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0
882 ; AVX512VL-NEXT: retq
883 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
884 ret <4 x i64> %shuffle
887 define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
888 ; AVX1-LABEL: shuffle_v4i64_4015:
890 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
891 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
892 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
895 ; AVX2-LABEL: shuffle_v4i64_4015:
897 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
898 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
899 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
902 ; AVX512VL-LABEL: shuffle_v4i64_4015:
904 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm1, %ymm1
905 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
906 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
907 ; AVX512VL-NEXT: retq
908 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
909 ret <4 x i64> %shuffle
912 define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
913 ; AVX1-LABEL: shuffle_v4i64_2u35:
915 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
916 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1]
917 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
920 ; AVX2-LABEL: shuffle_v4i64_2u35:
922 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
923 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
926 ; AVX512VL-LABEL: shuffle_v4i64_2u35:
928 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
929 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
930 ; AVX512VL-NEXT: retq
931 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
932 ret <4 x i64> %shuffle
935 define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
936 ; AVX1-LABEL: shuffle_v4i64_1251:
938 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
939 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3]
940 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
941 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
942 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
945 ; AVX2-LABEL: shuffle_v4i64_1251:
947 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
948 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
949 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
952 ; AVX512VL-LABEL: shuffle_v4i64_1251:
954 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
955 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
956 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
957 ; AVX512VL-NEXT: retq
958 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
959 ret <4 x i64> %shuffle
962 define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
963 ; AVX1-LABEL: shuffle_v4i64_1054:
965 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
966 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
969 ; AVX2-LABEL: shuffle_v4i64_1054:
971 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
972 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
975 ; AVX512VL-LABEL: shuffle_v4i64_1054:
977 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
978 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
979 ; AVX512VL-NEXT: retq
980 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
981 ret <4 x i64> %shuffle
984 define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
985 ; AVX1-LABEL: shuffle_v4i64_3254:
987 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
988 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
991 ; AVX2-LABEL: shuffle_v4i64_3254:
993 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
994 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
997 ; AVX512VL-LABEL: shuffle_v4i64_3254:
999 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1000 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1001 ; AVX512VL-NEXT: retq
1002 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
1003 ret <4 x i64> %shuffle
1006 define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
1007 ; AVX1-LABEL: shuffle_v4i64_3276:
1009 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1010 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1013 ; AVX2-LABEL: shuffle_v4i64_3276:
1015 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1016 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1019 ; AVX512VL-LABEL: shuffle_v4i64_3276:
1021 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1022 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1023 ; AVX512VL-NEXT: retq
1024 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
1025 ret <4 x i64> %shuffle
1028 define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) {
1029 ; AVX1-LABEL: shuffle_v4i64_1076:
1031 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1032 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1035 ; AVX2-LABEL: shuffle_v4i64_1076:
1037 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1038 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1041 ; AVX512VL-LABEL: shuffle_v4i64_1076:
1043 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1044 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1045 ; AVX512VL-NEXT: retq
1046 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
1047 ret <4 x i64> %shuffle
1050 define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
1051 ; AVX1-LABEL: shuffle_v4i64_0415:
1053 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
1054 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1055 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1058 ; AVX2-LABEL: shuffle_v4i64_0415:
1060 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
1061 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
1062 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
1065 ; AVX512VL-LABEL: shuffle_v4i64_0415:
1067 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
1068 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
1069 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
1070 ; AVX512VL-NEXT: retq
1071 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1072 ret <4 x i64> %shuffle
1075 define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
1076 ; AVX1-LABEL: shuffle_v4i64_z4z6:
1078 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1079 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1082 ; AVX2-LABEL: shuffle_v4i64_z4z6:
1084 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
1087 ; AVX512VL-LABEL: shuffle_v4i64_z4z6:
1089 ; AVX512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
1090 ; AVX512VL-NEXT: retq
1091 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6>
1092 ret <4 x i64> %shuffle
1095 define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
1096 ; AVX1-LABEL: shuffle_v4i64_5zuz:
1098 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1099 ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1102 ; AVX2-LABEL: shuffle_v4i64_5zuz:
1104 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
1107 ; AVX512VL-LABEL: shuffle_v4i64_5zuz:
1109 ; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
1110 ; AVX512VL-NEXT: retq
1111 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0>
1112 ret <4 x i64> %shuffle
1115 define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
1116 ; AVX1-LABEL: shuffle_v4i64_40u2:
1118 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1121 ; AVX2-LABEL: shuffle_v4i64_40u2:
1123 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1126 ; AVX512VL-LABEL: shuffle_v4i64_40u2:
1128 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1129 ; AVX512VL-NEXT: retq
1130 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
1131 ret <4 x i64> %shuffle
1134 define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) {
1135 ; ALL-LABEL: shuffle_v4i64_15uu:
1137 ; ALL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1139 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
1140 ret <4 x i64> %shuffle
1143 define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) {
1144 ; ALL-LABEL: shuffle_v4i64_11uu:
1146 ; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1148 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
1149 ret <4 x i64> %shuffle
1152 define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) {
1153 ; AVX1-LABEL: shuffle_v4i64_22uu:
1155 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1156 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1159 ; AVX2-LABEL: shuffle_v4i64_22uu:
1161 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
1164 ; AVX512VL-LABEL: shuffle_v4i64_22uu:
1166 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
1167 ; AVX512VL-NEXT: retq
1168 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
1169 ret <4 x i64> %shuffle
1172 define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) {
1173 ; AVX1-LABEL: shuffle_v4i64_3333:
1175 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1176 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
1177 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1180 ; AVX2-LABEL: shuffle_v4i64_3333:
1182 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
1185 ; AVX512VL-LABEL: shuffle_v4i64_3333:
1187 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
1188 ; AVX512VL-NEXT: retq
1189 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1190 ret <4 x i64> %shuffle
1193 define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
1194 ; ALL-LABEL: stress_test1:
1196 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
1197 %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef>
1198 %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef>
1199 %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0>
1204 define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
1205 ; ALL-LABEL: insert_reg_and_zero_v4i64:
1207 ; ALL-NEXT: vmovq %rdi, %xmm0
1209 %v = insertelement <4 x i64> undef, i64 %a, i64 0
1210 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1211 ret <4 x i64> %shuffle
1214 define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
1215 ; AVX1-LABEL: insert_mem_and_zero_v4i64:
1217 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1220 ; AVX2-LABEL: insert_mem_and_zero_v4i64:
1222 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1225 ; AVX512VL-LABEL: insert_mem_and_zero_v4i64:
1227 ; AVX512VL-NEXT: vmovq (%rdi), %xmm0
1228 ; AVX512VL-NEXT: retq
1229 %a = load i64, i64* %ptr
1230 %v = insertelement <4 x i64> undef, i64 %a, i64 0
1231 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1232 ret <4 x i64> %shuffle
1235 define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
1236 ; AVX1-LABEL: insert_reg_and_zero_v4f64:
1238 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1239 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1242 ; AVX2-LABEL: insert_reg_and_zero_v4f64:
1244 ; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1245 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1248 ; AVX512VL-LABEL: insert_reg_and_zero_v4f64:
1250 ; AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1251 ; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
1252 ; AVX512VL-NEXT: retq
1253 %v = insertelement <4 x double> undef, double %a, i32 0
1254 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1255 ret <4 x double> %shuffle
1258 define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
1259 ; AVX1-LABEL: insert_mem_and_zero_v4f64:
1261 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1264 ; AVX2-LABEL: insert_mem_and_zero_v4f64:
1266 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1269 ; AVX512VL-LABEL: insert_mem_and_zero_v4f64:
1271 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
1272 ; AVX512VL-NEXT: retq
1273 %a = load double, double* %ptr
1274 %v = insertelement <4 x double> undef, double %a, i32 0
1275 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1276 ret <4 x double> %shuffle
1279 define <4 x double> @splat_mem_v4f64(double* %ptr) {
1280 ; ALL-LABEL: splat_mem_v4f64:
1282 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
1284 %a = load double, double* %ptr
1285 %v = insertelement <4 x double> undef, double %a, i32 0
1286 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1287 ret <4 x double> %shuffle
1290 define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
1291 ; AVX1-LABEL: splat_mem_v4i64:
1293 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
1296 ; AVX2-LABEL: splat_mem_v4i64:
1298 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
1301 ; AVX512VL-LABEL: splat_mem_v4i64:
1303 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
1304 ; AVX512VL-NEXT: retq
1305 %a = load i64, i64* %ptr
1306 %v = insertelement <4 x i64> undef, i64 %a, i64 0
1307 %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1308 ret <4 x i64> %shuffle
1311 define <4 x double> @splat_mem_v4f64_2(double* %p) {
1312 ; ALL-LABEL: splat_mem_v4f64_2:
1314 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
1316 %1 = load double, double* %p
1317 %2 = insertelement <2 x double> undef, double %1, i32 0
1318 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer
1322 define <4 x double> @splat_v4f64(<2 x double> %r) {
1323 ; AVX1-LABEL: splat_v4f64:
1325 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1326 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1329 ; AVX2-LABEL: splat_v4f64:
1331 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
1334 ; AVX512VL-LABEL: splat_v4f64:
1336 ; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
1337 ; AVX512VL-NEXT: retq
1338 %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
1342 define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
1343 ; AVX1-LABEL: splat_mem_v4i64_from_v2i64:
1345 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1346 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1349 ; AVX2-LABEL: splat_mem_v4i64_from_v2i64:
1351 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
1354 ; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64:
1356 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
1357 ; AVX512VL-NEXT: retq
1358 %v = load <2 x i64>, <2 x i64>* %ptr
1359 %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1360 ret <4 x i64> %shuffle
1363 define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
1364 ; ALL-LABEL: splat_mem_v4f64_from_v2f64:
1366 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
1368 %v = load <2 x double>, <2 x double>* %ptr
1369 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1370 ret <4 x double> %shuffle
1373 define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
1374 ; AVX1-LABEL: splat128_mem_v4i64_from_v2i64:
1376 ; AVX1-NEXT: vmovaps (%rdi), %xmm0
1377 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1380 ; AVX2-LABEL: splat128_mem_v4i64_from_v2i64:
1382 ; AVX2-NEXT: vmovaps (%rdi), %xmm0
1383 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1386 ; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64:
1388 ; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0
1389 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
1390 ; AVX512VL-NEXT: retq
1391 %v = load <2 x i64>, <2 x i64>* %ptr
1392 %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1393 ret <4 x i64> %shuffle
1396 define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
1397 ; AVX1-LABEL: splat128_mem_v4f64_from_v2f64:
1399 ; AVX1-NEXT: vmovaps (%rdi), %xmm0
1400 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1403 ; AVX2-LABEL: splat128_mem_v4f64_from_v2f64:
1405 ; AVX2-NEXT: vmovaps (%rdi), %xmm0
1406 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1409 ; AVX512VL-LABEL: splat128_mem_v4f64_from_v2f64:
1411 ; AVX512VL-NEXT: vmovapd (%rdi), %xmm0
1412 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
1413 ; AVX512VL-NEXT: retq
1414 %v = load <2 x double>, <2 x double>* %ptr
1415 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1416 ret <4 x double> %shuffle
1419 define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
1420 ; AVX1-LABEL: bitcast_v4f64_0426:
1422 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1425 ; AVX2-LABEL: bitcast_v4f64_0426:
1427 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1430 ; AVX512VL-LABEL: bitcast_v4f64_0426:
1432 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1433 ; AVX512VL-NEXT: retq
1434 %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
1435 %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
1436 %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1437 %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
1438 %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
1439 %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
1440 ret <4 x double> %bitcast64
1443 define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) {
1444 ; AVX1-LABEL: concat_v4i64_0167:
1446 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1449 ; AVX2-LABEL: concat_v4i64_0167:
1451 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1454 ; AVX512VL-LABEL: concat_v4i64_0167:
1456 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1457 ; AVX512VL-NEXT: retq
1458 %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
1459 %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7>
1460 %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1461 ret <4 x i64> %shuffle64
1464 define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) {
1465 ; AVX1-LABEL: concat_v4i64_0145_bc:
1467 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1470 ; AVX2-LABEL: concat_v4i64_0145_bc:
1472 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1475 ; AVX512VL-LABEL: concat_v4i64_0145_bc:
1477 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
1478 ; AVX512VL-NEXT: retq
1479 %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
1480 %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5>
1481 %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32>
1482 %bc1lo = bitcast <2 x i64> %a1lo to <4 x i32>
1483 %shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1484 %shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64>
1485 ret <4 x i64> %shuffle64
1488 define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) {
1489 ; AVX1-LABEL: insert_dup_mem_v4i64:
1491 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
1494 ; AVX2-LABEL: insert_dup_mem_v4i64:
1496 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
1499 ; AVX512VL-LABEL: insert_dup_mem_v4i64:
1501 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
1502 ; AVX512VL-NEXT: retq
1503 %tmp = load i64, i64* %ptr, align 1
1504 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
1505 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer