1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
4 target triple = "x86_64-unknown-unknown"
6 define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
7 ; ALL-LABEL: @shuffle_v4f64_0001
9 ; ALL-NEXT: vunpcklpd {{.*}} # xmm1 = xmm0[0,0]
10 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
12 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
13 ret <4 x double> %shuffle
16 define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
17 ; ALL-LABEL: @shuffle_v4f64_0020
19 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm1
20 ; ALL-NEXT: vunpcklpd {{.*}} # xmm1 = xmm1[0],xmm0[0]
21 ; ALL-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
22 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
24 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
25 ret <4 x double> %shuffle
28 define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
29 ; ALL-LABEL: @shuffle_v4f64_0300
31 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm1
32 ; ALL-NEXT: vblendpd {{.*}} # xmm1 = xmm0[0],xmm1[1]
33 ; ALL-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
34 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
36 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
37 ret <4 x double> %shuffle
40 define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
41 ; ALL-LABEL: @shuffle_v4f64_1000
43 ; ALL-NEXT: vpermilpd {{.*}} # xmm1 = xmm0[1,0]
44 ; ALL-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
45 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
47 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
48 ret <4 x double> %shuffle
51 define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
52 ; ALL-LABEL: @shuffle_v4f64_2200
54 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm1
55 ; ALL-NEXT: vunpcklpd {{.*}} # xmm1 = xmm1[0,0]
56 ; ALL-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
57 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
59 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
60 ret <4 x double> %shuffle
63 define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
64 ; ALL-LABEL: @shuffle_v4f64_3330
66 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm1
67 ; ALL-NEXT: vshufpd {{.*}} # xmm0 = xmm1[1],xmm0[0]
68 ; ALL-NEXT: vmovhlps {{.*}} # xmm1 = xmm1[1,1]
69 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
71 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
72 ret <4 x double> %shuffle
75 define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
76 ; ALL-LABEL: @shuffle_v4f64_3210
78 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm1
79 ; ALL-NEXT: vpermilpd {{.*}} # xmm1 = xmm1[1,0]
80 ; ALL-NEXT: vpermilpd {{.*}} # xmm0 = xmm0[1,0]
81 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
83 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
84 ret <4 x double> %shuffle
87 define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
88 ; ALL-LABEL: @shuffle_v4f64_0023
90 ; ALL-NEXT: vpermilpd {{.*}} # ymm0 = ymm0[0,0,2,3]
92 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
93 ret <4 x double> %shuffle
96 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
97 ; ALL-LABEL: @shuffle_v4f64_0022
99 ; ALL-NEXT: vpermilpd {{.*}} # ymm0 = ymm0[0,0,2,2]
101 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
102 ret <4 x double> %shuffle
105 define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
106 ; ALL-LABEL: @shuffle_v4f64_1032
108 ; ALL-NEXT: vpermilpd {{.*}} # ymm0 = ymm0[1,0,3,2]
110 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
111 ret <4 x double> %shuffle
114 define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
115 ; ALL-LABEL: @shuffle_v4f64_1133
117 ; ALL-NEXT: vpermilpd {{.*}} # ymm0 = ymm0[1,1,3,3]
119 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
120 ret <4 x double> %shuffle
123 define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
124 ; ALL-LABEL: @shuffle_v4f64_1023
126 ; ALL-NEXT: vpermilpd {{.*}} # ymm0 = ymm0[1,0,2,3]
128 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
129 ret <4 x double> %shuffle
132 define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
133 ; ALL-LABEL: @shuffle_v4f64_1022
135 ; ALL-NEXT: vpermilpd {{.*}} # ymm0 = ymm0[1,0,2,2]
137 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
138 ret <4 x double> %shuffle
141 define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
142 ; ALL-LABEL: @shuffle_v4f64_0423
144 ; ALL-NEXT: vpermilpd {{.*}} # ymm1 = ymm1[0,0,2,2]
145 ; ALL-NEXT: vblendpd {{.*}} # ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
147 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
148 ret <4 x double> %shuffle
151 define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
152 ; ALL-LABEL: @shuffle_v4f64_0462
154 ; ALL-NEXT: vpermilpd {{.*}} # ymm1 = ymm1[0,0,2,2]
155 ; ALL-NEXT: vpermilpd {{.*}} # ymm0 = ymm0[0,0,2,2]
156 ; ALL-NEXT: vblendpd {{.*}} # ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
158 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
159 ret <4 x double> %shuffle
162 define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
163 ; ALL-LABEL: @shuffle_v4f64_0426
165 ; ALL-NEXT: vunpcklpd {{.*}} # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
167 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
168 ret <4 x double> %shuffle
171 define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
172 ; ALL-LABEL: @shuffle_v4f64_1537
174 ; ALL-NEXT: vunpckhpd {{.*}} # ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
176 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
177 ret <4 x double> %shuffle
180 define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
181 ; ALL-LABEL: @shuffle_v4f64_4062
183 ; ALL-NEXT: vunpcklpd {{.*}} # ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
185 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
186 ret <4 x double> %shuffle
189 define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
190 ; ALL-LABEL: @shuffle_v4f64_5173
192 ; ALL-NEXT: vunpckhpd {{.*}} # ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
194 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
195 ret <4 x double> %shuffle
198 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
199 ; ALL-LABEL: @shuffle_v4f64_5163
201 ; ALL-NEXT: vshufpd {{.*}} # ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
203 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
204 ret <4 x double> %shuffle
207 define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
208 ; ALL-LABEL: @shuffle_v4f64_0527
210 ; ALL-NEXT: vblendpd {{.*}} # ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
212 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
213 ret <4 x double> %shuffle
216 define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
217 ; ALL-LABEL: @shuffle_v4f64_4163
219 ; ALL-NEXT: vblendpd {{.*}} # ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
221 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
222 ret <4 x double> %shuffle
225 define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
226 ; ALL-LABEL: @shuffle_v4f64_0145
228 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
230 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
231 ret <4 x double> %shuffle
234 define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
235 ; ALL-LABEL: @shuffle_v4f64_4501
237 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
239 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
240 ret <4 x double> %shuffle
243 define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
244 ; ALL-LABEL: @shuffle_v4f64_0167
246 ; ALL-NEXT: vblendpd {{.*}} # ymm0 = ymm0[0,1],ymm1[2,3]
248 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
249 ret <4 x double> %shuffle
252 define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
253 ; AVX1-LABEL: @shuffle_v4i64_0001
255 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm1 = xmm0[0,0]
256 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
259 ; AVX2-LABEL: @shuffle_v4i64_0001
261 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,0,0,1]
263 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
264 ret <4 x i64> %shuffle
267 define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
268 ; AVX1-LABEL: @shuffle_v4i64_0020
270 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
271 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm1 = xmm1[0],xmm0[0]
272 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
273 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
276 ; AVX2-LABEL: @shuffle_v4i64_0020
278 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,0,2,0]
280 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
281 ret <4 x i64> %shuffle
284 define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
285 ; AVX1-LABEL: @shuffle_v4i64_0112
287 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
288 ; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[1],xmm1[0]
289 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
292 ; AVX2-LABEL: @shuffle_v4i64_0112
294 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,1,1,2]
296 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
297 ret <4 x i64> %shuffle
300 define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
301 ; AVX1-LABEL: @shuffle_v4i64_0300
303 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
304 ; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm0[0],xmm1[1]
305 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
306 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
309 ; AVX2-LABEL: @shuffle_v4i64_0300
311 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,3,0,0]
313 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
314 ret <4 x i64> %shuffle
317 define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
318 ; AVX1-LABEL: @shuffle_v4i64_1000
320 ; AVX1-NEXT: vpermilpd {{.*}} # xmm1 = xmm0[1,0]
321 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
322 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
325 ; AVX2-LABEL: @shuffle_v4i64_1000
327 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[1,0,0,0]
329 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
330 ret <4 x i64> %shuffle
333 define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
334 ; AVX1-LABEL: @shuffle_v4i64_2200
336 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
337 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm1 = xmm1[0,0]
338 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
339 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
342 ; AVX2-LABEL: @shuffle_v4i64_2200
344 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[2,2,0,0]
346 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
347 ret <4 x i64> %shuffle
350 define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
351 ; AVX1-LABEL: @shuffle_v4i64_3330
353 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
354 ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[1],xmm0[0]
355 ; AVX1-NEXT: vmovhlps {{.*}} # xmm1 = xmm1[1,1]
356 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
359 ; AVX2-LABEL: @shuffle_v4i64_3330
361 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[3,3,3,0]
363 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
364 ret <4 x i64> %shuffle
367 define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
368 ; AVX1-LABEL: @shuffle_v4i64_3210
370 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
371 ; AVX1-NEXT: vpermilpd {{.*}} # xmm1 = xmm1[1,0]
372 ; AVX1-NEXT: vpermilpd {{.*}} # xmm0 = xmm0[1,0]
373 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
376 ; AVX2-LABEL: @shuffle_v4i64_3210
378 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[3,2,1,0]
380 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
381 ret <4 x i64> %shuffle
384 define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
385 ; AVX1-LABEL: @shuffle_v4i64_0124
387 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
388 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm1 = xmm1[0,0]
389 ; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm2[0],xmm1[1]
390 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
393 ; AVX2-LABEL: @shuffle_v4i64_0124
395 ; AVX2-NEXT: vpermq {{.*}} # ymm1 = ymm1[0,1,2,0]
396 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
398 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
399 ret <4 x i64> %shuffle
402 define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
403 ; AVX1-LABEL: @shuffle_v4i64_0142
405 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
406 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm2 = xmm2[0,0]
407 ; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
408 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
411 ; AVX2-LABEL: @shuffle_v4i64_0142
413 ; AVX2-NEXT: vpermq {{.*}} # ymm1 = ymm1[0,1,0,3]
414 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,1,2,2]
415 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
417 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
418 ret <4 x i64> %shuffle
421 define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
422 ; AVX1-LABEL: @shuffle_v4i64_0412
424 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
425 ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
426 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm1 = xmm1[0,0]
427 ; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
428 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
431 ; AVX2-LABEL: @shuffle_v4i64_0412
433 ; AVX2-NEXT: vpshufd {{.*}} # ymm1 = ymm1[0,1,0,1,4,5,4,5]
434 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,1,1,2]
435 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
437 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
438 ret <4 x i64> %shuffle
441 define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
442 ; AVX1-LABEL: @shuffle_v4i64_4012
444 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
445 ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
446 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
447 ; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
448 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
451 ; AVX2-LABEL: @shuffle_v4i64_4012
453 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,0,1,2]
454 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
456 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
457 ret <4 x i64> %shuffle
460 define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
461 ; AVX1-LABEL: @shuffle_v4i64_0145
463 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
466 ; AVX2-LABEL: @shuffle_v4i64_0145
468 ; AVX2-NEXT: vpermq {{.*}} # ymm1 = ymm1[0,1,0,1]
469 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
471 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
472 ret <4 x i64> %shuffle
475 define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
476 ; AVX1-LABEL: @shuffle_v4i64_0451
478 ; AVX1-NEXT: vpermilpd {{.*}} # xmm2 = xmm1[1,0]
479 ; AVX1-NEXT: vblendpd {{.*}} # xmm2 = xmm2[0],xmm0[1]
480 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm1 = xmm1[0,0]
481 ; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
482 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
485 ; AVX2-LABEL: @shuffle_v4i64_0451
487 ; AVX2-NEXT: vpermq {{.*}} # ymm1 = ymm1[0,0,1,3]
488 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,1,2,1]
489 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
491 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
492 ret <4 x i64> %shuffle
495 define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
496 ; AVX1-LABEL: @shuffle_v4i64_4501
498 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
501 ; AVX2-LABEL: @shuffle_v4i64_4501
503 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,1,0,1]
504 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
506 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
507 ret <4 x i64> %shuffle
510 define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
511 ; AVX1-LABEL: @shuffle_v4i64_4015
513 ; AVX1-NEXT: vpermilpd {{.*}} # xmm2 = xmm0[1,0]
514 ; AVX1-NEXT: vblendpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
515 ; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
516 ; AVX1-NEXT: vblendpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
517 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
520 ; AVX2-LABEL: @shuffle_v4i64_4015
522 ; AVX2-NEXT: vpermq {{.*}} # ymm1 = ymm1[0,1,2,1]
523 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,0,1,3]
524 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
526 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
527 ret <4 x i64> %shuffle
530 define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
531 ; AVX1-LABEL: @stress_test1
533 ; AVX1-NEXT: vpermilpd {{.*}} # xmm0 = xmm1[1,0]
534 ; AVX1-NEXT: vpermilpd {{.*}} # xmm0 = xmm0[1,0]
535 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
536 ; AVX1-NEXT: vmovhlps {{.*}} # xmm1 = xmm1[1,1]
537 ; AVX1-NEXT: vpermilpd {{.*}} # xmm1 = xmm1[1,0]
538 ; AVX1-NEXT: vblendpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
539 ; AVX1-NEXT: vpermilpd {{.*}} # xmm0 = xmm0[1,0]
540 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
543 ; AVX2-LABEL: @stress_test1
545 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm1[3,1,1,0]
546 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[3,1,2,3]
547 ; AVX2-NEXT: vpermq {{.*}} # ymm1 = ymm1[3,3,1,3]
548 ; AVX2-NEXT: vpshufd {{.*}} # ymm1 = ymm1[2,3,2,3,6,7,6,7]
549 ; AVX2-NEXT: vpermq {{.*}} # ymm0 = ymm0[0,1,1,0]
550 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
552 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
553 %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef>
554 %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef>
555 %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0>
560 define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
561 ; AVX1-LABEL: @insert_reg_and_zero_v4i64
563 ; AVX1-NEXT: vmovq %rdi, %xmm0
564 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
565 ; AVX1-NEXT: vblendpd {{.*}} # ymm0 = ymm0[0],ymm1[1,2,3]
568 ; AVX2-LABEL: @insert_reg_and_zero_v4i64
570 ; AVX2-NEXT: vmovq %rdi, %xmm0
571 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
572 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
574 %v = insertelement <4 x i64> undef, i64 %a, i64 0
575 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
576 ret <4 x i64> %shuffle
579 define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
580 ; AVX1-LABEL: @insert_mem_and_zero_v4i64
582 ; AVX1-NEXT: vmovq (%rdi), %xmm0
583 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
584 ; AVX1-NEXT: vblendpd {{.*}} # ymm0 = ymm0[0],ymm1[1,2,3]
587 ; AVX2-LABEL: @insert_mem_and_zero_v4i64
589 ; AVX2-NEXT: vmovq (%rdi), %xmm0
590 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
591 ; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
594 %v = insertelement <4 x i64> undef, i64 %a, i64 0
595 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
596 ret <4 x i64> %shuffle
599 define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
600 ; ALL-LABEL: @insert_reg_and_zero_v4f64
602 ; ALL: vxorpd %ymm1, %ymm1, %ymm1
603 ; ALL-NEXT: vblendpd {{.*}} # ymm0 = ymm0[0],ymm1[1,2,3]
605 %v = insertelement <4 x double> undef, double %a, i32 0
606 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
607 ret <4 x double> %shuffle
610 define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
611 ; ALL-LABEL: @insert_mem_and_zero_v4f64
613 ; ALL-NEXT: vmovsd (%rdi), %xmm0
614 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
615 ; ALL-NEXT: vblendpd {{.*}} # ymm0 = ymm0[0],ymm1[1,2,3]
617 %a = load double* %ptr
618 %v = insertelement <4 x double> undef, double %a, i32 0
619 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
620 ret <4 x double> %shuffle