1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
5 target triple = "x86_64-unknown-unknown"
7 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
8 ; AVX1-LABEL: shuffle_v8f32_00000000:
10 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
11 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
14 ; AVX2-LABEL: shuffle_v8f32_00000000:
16 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
18 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19 ret <8 x float> %shuffle
22 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
23 ; AVX1-LABEL: shuffle_v8f32_00000010:
25 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
26 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
27 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
30 ; AVX2-LABEL: shuffle_v8f32_00000010:
32 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
33 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
35 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
36 ret <8 x float> %shuffle
39 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
40 ; AVX1-LABEL: shuffle_v8f32_00000200:
42 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
43 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
44 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
47 ; AVX2-LABEL: shuffle_v8f32_00000200:
49 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
50 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
52 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
53 ret <8 x float> %shuffle
56 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
57 ; AVX1-LABEL: shuffle_v8f32_00003000:
59 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
60 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
61 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
64 ; AVX2-LABEL: shuffle_v8f32_00003000:
66 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
67 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
69 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
70 ret <8 x float> %shuffle
73 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
74 ; AVX1-LABEL: shuffle_v8f32_00040000:
76 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
77 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
78 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
79 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
82 ; AVX2-LABEL: shuffle_v8f32_00040000:
84 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
85 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
87 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
88 ret <8 x float> %shuffle
91 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
92 ; AVX1-LABEL: shuffle_v8f32_00500000:
94 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
95 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
96 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
99 ; AVX2-LABEL: shuffle_v8f32_00500000:
101 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
102 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
104 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
105 ret <8 x float> %shuffle
108 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
109 ; AVX1-LABEL: shuffle_v8f32_06000000:
111 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
112 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
113 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
116 ; AVX2-LABEL: shuffle_v8f32_06000000:
118 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
119 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
121 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
122 ret <8 x float> %shuffle
125 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
126 ; AVX1-LABEL: shuffle_v8f32_70000000:
128 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
129 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
130 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
133 ; AVX2-LABEL: shuffle_v8f32_70000000:
135 ; AVX2-NEXT: movl $7, %eax
136 ; AVX2-NEXT: vmovd %eax, %xmm1
137 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
139 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
140 ret <8 x float> %shuffle
143 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
144 ; ALL-LABEL: shuffle_v8f32_01014545:
146 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
148 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
149 ret <8 x float> %shuffle
152 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
153 ; AVX1-LABEL: shuffle_v8f32_00112233:
155 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
156 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
157 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
160 ; AVX2-LABEL: shuffle_v8f32_00112233:
162 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
163 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
165 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
166 ret <8 x float> %shuffle
169 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
170 ; AVX1-LABEL: shuffle_v8f32_00001111:
172 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
173 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
174 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
177 ; AVX2-LABEL: shuffle_v8f32_00001111:
179 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
180 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
182 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
183 ret <8 x float> %shuffle
186 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
187 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
189 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
191 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
192 ret <8 x float> %shuffle
195 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
196 ; AVX1-LABEL: shuffle_v8f32_08080808:
198 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
199 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
200 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
201 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
202 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
205 ; AVX2-LABEL: shuffle_v8f32_08080808:
207 ; AVX2-NEXT: vbroadcastss %xmm1, %ymm1
208 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
209 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
211 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
212 ret <8 x float> %shuffle
215 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
216 ; ALL-LABEL: shuffle_v8f32_08084c4c:
218 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
219 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
221 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
222 ret <8 x float> %shuffle
225 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
226 ; ALL-LABEL: shuffle_v8f32_8823cc67:
228 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
230 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
231 ret <8 x float> %shuffle
234 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
235 ; ALL-LABEL: shuffle_v8f32_9832dc76:
237 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
239 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
240 ret <8 x float> %shuffle
243 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
244 ; ALL-LABEL: shuffle_v8f32_9810dc54:
246 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
248 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
249 ret <8 x float> %shuffle
252 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
253 ; ALL-LABEL: shuffle_v8f32_08194c5d:
255 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
257 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
258 ret <8 x float> %shuffle
261 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
262 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
264 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
266 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
267 ret <8 x float> %shuffle
270 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
271 ; AVX1-LABEL: shuffle_v8f32_08192a3b:
273 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
274 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
275 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
278 ; AVX2-LABEL: shuffle_v8f32_08192a3b:
280 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
281 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
282 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
283 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
284 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
286 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
287 ret <8 x float> %shuffle
290 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
291 ; AVX1-LABEL: shuffle_v8f32_08991abb:
293 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
294 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
295 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
296 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
297 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
300 ; AVX2-LABEL: shuffle_v8f32_08991abb:
302 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
303 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
304 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
305 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
306 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
308 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
309 ret <8 x float> %shuffle
312 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
313 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
315 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
316 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
317 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
318 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
321 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
323 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
324 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
325 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
327 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
328 ret <8 x float> %shuffle
331 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
332 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
334 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
335 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
336 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
339 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
341 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
342 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
343 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
345 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
346 ret <8 x float> %shuffle
349 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
350 ; ALL-LABEL: shuffle_v8f32_00014445:
352 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
354 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
355 ret <8 x float> %shuffle
358 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
359 ; ALL-LABEL: shuffle_v8f32_00204464:
361 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
363 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
364 ret <8 x float> %shuffle
367 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
368 ; ALL-LABEL: shuffle_v8f32_03004744:
370 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
372 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
373 ret <8 x float> %shuffle
376 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
377 ; ALL-LABEL: shuffle_v8f32_10005444:
379 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
381 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
382 ret <8 x float> %shuffle
385 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
386 ; ALL-LABEL: shuffle_v8f32_22006644:
388 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
390 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
391 ret <8 x float> %shuffle
394 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
395 ; ALL-LABEL: shuffle_v8f32_33307774:
397 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
399 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
400 ret <8 x float> %shuffle
403 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
404 ; ALL-LABEL: shuffle_v8f32_32107654:
406 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
408 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
409 ret <8 x float> %shuffle
412 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
413 ; ALL-LABEL: shuffle_v8f32_00234467:
415 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
417 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
418 ret <8 x float> %shuffle
421 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
422 ; ALL-LABEL: shuffle_v8f32_00224466:
424 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
426 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
427 ret <8 x float> %shuffle
430 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
431 ; ALL-LABEL: shuffle_v8f32_10325476:
433 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
435 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
436 ret <8 x float> %shuffle
439 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
440 ; ALL-LABEL: shuffle_v8f32_11335577:
442 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
444 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
445 ret <8 x float> %shuffle
448 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
449 ; ALL-LABEL: shuffle_v8f32_10235467:
451 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
453 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
454 ret <8 x float> %shuffle
457 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
458 ; ALL-LABEL: shuffle_v8f32_10225466:
460 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
462 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
463 ret <8 x float> %shuffle
466 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
467 ; ALL-LABEL: shuffle_v8f32_00015444:
469 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
471 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
472 ret <8 x float> %shuffle
475 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
476 ; ALL-LABEL: shuffle_v8f32_00204644:
478 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
480 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
481 ret <8 x float> %shuffle
484 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
485 ; ALL-LABEL: shuffle_v8f32_03004474:
487 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
489 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
490 ret <8 x float> %shuffle
493 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
494 ; ALL-LABEL: shuffle_v8f32_10004444:
496 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
498 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
499 ret <8 x float> %shuffle
502 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
503 ; ALL-LABEL: shuffle_v8f32_22006446:
505 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
507 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
508 ret <8 x float> %shuffle
511 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
512 ; ALL-LABEL: shuffle_v8f32_33307474:
514 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
516 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
517 ret <8 x float> %shuffle
520 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
521 ; ALL-LABEL: shuffle_v8f32_32104567:
523 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
525 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
526 ret <8 x float> %shuffle
529 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
530 ; ALL-LABEL: shuffle_v8f32_00236744:
532 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
534 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
535 ret <8 x float> %shuffle
538 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
539 ; ALL-LABEL: shuffle_v8f32_00226644:
541 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
543 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
544 ret <8 x float> %shuffle
547 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
548 ; ALL-LABEL: shuffle_v8f32_10324567:
550 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
552 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
553 ret <8 x float> %shuffle
556 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
557 ; ALL-LABEL: shuffle_v8f32_11334567:
559 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
561 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
562 ret <8 x float> %shuffle
565 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
566 ; ALL-LABEL: shuffle_v8f32_01235467:
568 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
570 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
571 ret <8 x float> %shuffle
574 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
575 ; ALL-LABEL: shuffle_v8f32_01235466:
577 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
579 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
580 ret <8 x float> %shuffle
583 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
584 ; ALL-LABEL: shuffle_v8f32_002u6u44:
586 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
588 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
589 ret <8 x float> %shuffle
592 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
593 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
595 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
597 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
598 ret <8 x float> %shuffle
601 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
602 ; ALL-LABEL: shuffle_v8f32_103245uu:
604 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
606 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
607 ret <8 x float> %shuffle
610 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
611 ; ALL-LABEL: shuffle_v8f32_1133uu67:
613 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
615 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
616 ret <8 x float> %shuffle
619 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
620 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
622 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
624 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
625 ret <8 x float> %shuffle
628 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
629 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
631 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
633 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
634 ret <8 x float> %shuffle
637 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
638 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
640 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
641 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
642 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
643 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
644 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
645 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
648 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
650 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
651 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
652 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
653 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
654 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
656 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
657 ret <8 x float> %shuffle
660 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
661 ; AVX1-LABEL: shuffle_v8f32_f511235a:
663 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
664 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
665 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
666 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
667 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
668 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
669 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
672 ; AVX2-LABEL: shuffle_v8f32_f511235a:
674 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
675 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
676 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
677 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
678 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
680 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
681 ret <8 x float> %shuffle
684 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
685 ; AVX1-LABEL: shuffle_v8f32_32103210:
687 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
688 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
691 ; AVX2-LABEL: shuffle_v8f32_32103210:
693 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
694 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
696 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
697 ret <8 x float> %shuffle
700 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
701 ; AVX1-LABEL: shuffle_v8f32_76547654:
703 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
704 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
705 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
708 ; AVX2-LABEL: shuffle_v8f32_76547654:
710 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
711 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
713 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
714 ret <8 x float> %shuffle
717 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
718 ; AVX1-LABEL: shuffle_v8f32_76543210:
720 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
721 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
724 ; AVX2-LABEL: shuffle_v8f32_76543210:
726 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
727 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
729 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
730 ret <8 x float> %shuffle
733 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
734 ; ALL-LABEL: shuffle_v8f32_3210ba98:
736 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
737 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
739 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
740 ret <8 x float> %shuffle
743 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
744 ; ALL-LABEL: shuffle_v8f32_3210fedc:
746 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
747 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
749 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
750 ret <8 x float> %shuffle
753 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
754 ; ALL-LABEL: shuffle_v8f32_7654fedc:
756 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
757 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
759 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
760 ret <8 x float> %shuffle
763 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
764 ; ALL-LABEL: shuffle_v8f32_fedc7654:
766 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
767 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
769 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
770 ret <8 x float> %shuffle
773 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
774 ; AVX1-LABEL: PR21138:
776 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
777 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
778 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
779 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
780 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
781 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
784 ; AVX2-LABEL: PR21138:
786 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7>
787 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
788 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u>
789 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
790 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
792 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
793 ret <8 x float> %shuffle
796 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
797 ; ALL-LABEL: shuffle_v8f32_ba987654:
799 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
800 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
802 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
803 ret <8 x float> %shuffle
806 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
807 ; ALL-LABEL: shuffle_v8f32_ba983210:
809 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
810 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
812 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
813 ret <8 x float> %shuffle
816 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
817 ; ALL-LABEL: shuffle_v8f32_80u1c4u5:
819 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
821 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
822 ret <8 x float> %shuffle
825 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
826 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
828 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
830 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
831 ret <8 x float> %shuffle
834 define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
835 ; AVX1-LABEL: shuffle_v8f32_uuuu1111:
837 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
838 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
841 ; AVX2-LABEL: shuffle_v8f32_uuuu1111:
843 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
844 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
846 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
847 ret <8 x float> %shuffle
850 define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
851 ; AVX1-LABEL: shuffle_v8f32_44444444:
853 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
854 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
855 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
858 ; AVX2-LABEL: shuffle_v8f32_44444444:
860 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
861 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
863 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
864 ret <8 x float> %shuffle
867 define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
868 ; AVX1-LABEL: shuffle_v8f32_5555uuuu:
870 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
871 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
874 ; AVX2-LABEL: shuffle_v8f32_5555uuuu:
876 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
877 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
879 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
880 ret <8 x float> %shuffle
883 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
884 ; AVX1-LABEL: shuffle_v8i32_00000000:
886 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
887 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
890 ; AVX2-LABEL: shuffle_v8i32_00000000:
892 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
894 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
895 ret <8 x i32> %shuffle
898 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
899 ; AVX1-LABEL: shuffle_v8i32_00000010:
901 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
902 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
903 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
906 ; AVX2-LABEL: shuffle_v8i32_00000010:
908 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
909 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
911 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
912 ret <8 x i32> %shuffle
915 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
916 ; AVX1-LABEL: shuffle_v8i32_00000200:
918 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
919 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
920 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
923 ; AVX2-LABEL: shuffle_v8i32_00000200:
925 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
926 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
928 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
929 ret <8 x i32> %shuffle
932 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
933 ; AVX1-LABEL: shuffle_v8i32_00003000:
935 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
936 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
937 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
940 ; AVX2-LABEL: shuffle_v8i32_00003000:
942 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
943 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
945 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
946 ret <8 x i32> %shuffle
949 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
950 ; AVX1-LABEL: shuffle_v8i32_00040000:
952 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
953 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
954 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
955 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
958 ; AVX2-LABEL: shuffle_v8i32_00040000:
960 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
961 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
963 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
964 ret <8 x i32> %shuffle
967 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
968 ; AVX1-LABEL: shuffle_v8i32_00500000:
970 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
971 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
972 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
975 ; AVX2-LABEL: shuffle_v8i32_00500000:
977 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
978 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
980 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
981 ret <8 x i32> %shuffle
984 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
985 ; AVX1-LABEL: shuffle_v8i32_06000000:
987 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
988 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
989 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
992 ; AVX2-LABEL: shuffle_v8i32_06000000:
994 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
995 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
997 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
998 ret <8 x i32> %shuffle
1001 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
1002 ; AVX1-LABEL: shuffle_v8i32_70000000:
1004 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1005 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1006 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
1009 ; AVX2-LABEL: shuffle_v8i32_70000000:
1011 ; AVX2-NEXT: movl $7, %eax
1012 ; AVX2-NEXT: vmovd %eax, %xmm1
1013 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1015 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1016 ret <8 x i32> %shuffle
1019 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
1020 ; AVX1-LABEL: shuffle_v8i32_01014545:
1022 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
1025 ; AVX2-LABEL: shuffle_v8i32_01014545:
1027 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1029 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
1030 ret <8 x i32> %shuffle
1033 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
1034 ; AVX1-LABEL: shuffle_v8i32_00112233:
1036 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
1037 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
1038 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1041 ; AVX2-LABEL: shuffle_v8i32_00112233:
1043 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
1044 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1046 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
1047 ret <8 x i32> %shuffle
1050 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
1051 ; AVX1-LABEL: shuffle_v8i32_00001111:
1053 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1054 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1055 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1058 ; AVX2-LABEL: shuffle_v8i32_00001111:
1060 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1061 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1063 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1064 ret <8 x i32> %shuffle
1067 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
1068 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
1070 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1073 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
1075 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1077 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1078 ret <8 x i32> %shuffle
1081 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
1082 ; AVX1-LABEL: shuffle_v8i32_08080808:
1084 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
1085 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
1086 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1087 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1088 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1091 ; AVX2-LABEL: shuffle_v8i32_08080808:
1093 ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
1094 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
1095 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1097 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1098 ret <8 x i32> %shuffle
1101 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1102 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
1104 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1105 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1108 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
1110 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1111 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1112 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1114 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1115 ret <8 x i32> %shuffle
1118 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
1119 ; AVX1-LABEL: shuffle_v8i32_8823cc67:
1121 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1124 ; AVX2-LABEL: shuffle_v8i32_8823cc67:
1126 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1127 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1129 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1130 ret <8 x i32> %shuffle
1133 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
1134 ; AVX1-LABEL: shuffle_v8i32_9832dc76:
1136 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1139 ; AVX2-LABEL: shuffle_v8i32_9832dc76:
1141 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1142 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1144 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1145 ret <8 x i32> %shuffle
1148 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
1149 ; AVX1-LABEL: shuffle_v8i32_9810dc54:
1151 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1154 ; AVX2-LABEL: shuffle_v8i32_9810dc54:
1156 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1157 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1158 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1160 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1161 ret <8 x i32> %shuffle
1164 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1165 ; AVX1-LABEL: shuffle_v8i32_08194c5d:
1167 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1170 ; AVX2-LABEL: shuffle_v8i32_08194c5d:
1172 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1174 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1175 ret <8 x i32> %shuffle
1178 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1179 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
1181 ; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1184 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
1186 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1188 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1189 ret <8 x i32> %shuffle
1192 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1193 ; AVX1-LABEL: shuffle_v8i32_08192a3b:
1195 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1196 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1197 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1200 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
1202 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1203 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1204 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1205 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1207 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1208 ret <8 x i32> %shuffle
1211 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
1212 ; AVX1-LABEL: shuffle_v8i32_08991abb:
1214 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
1215 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
1216 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1217 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
1218 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1221 ; AVX2-LABEL: shuffle_v8i32_08991abb:
1223 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1224 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1225 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1226 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1227 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1229 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1230 ret <8 x i32> %shuffle
1233 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
1234 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
1236 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
1237 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
1238 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1239 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1242 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
1244 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1245 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1247 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1248 ret <8 x i32> %shuffle
1251 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
1252 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
1254 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1255 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1256 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1259 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
1261 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1262 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1263 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1265 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1266 ret <8 x i32> %shuffle
1269 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
1270 ; AVX1-LABEL: shuffle_v8i32_00014445:
1272 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1275 ; AVX2-LABEL: shuffle_v8i32_00014445:
1277 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1279 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1280 ret <8 x i32> %shuffle
1283 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
1284 ; AVX1-LABEL: shuffle_v8i32_00204464:
1286 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1289 ; AVX2-LABEL: shuffle_v8i32_00204464:
1291 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1293 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1294 ret <8 x i32> %shuffle
1297 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
1298 ; AVX1-LABEL: shuffle_v8i32_03004744:
1300 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1303 ; AVX2-LABEL: shuffle_v8i32_03004744:
1305 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1307 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1308 ret <8 x i32> %shuffle
1311 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
1312 ; AVX1-LABEL: shuffle_v8i32_10005444:
1314 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1317 ; AVX2-LABEL: shuffle_v8i32_10005444:
1319 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1321 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1322 ret <8 x i32> %shuffle
1325 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
1326 ; AVX1-LABEL: shuffle_v8i32_22006644:
1328 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1331 ; AVX2-LABEL: shuffle_v8i32_22006644:
1333 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1335 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1336 ret <8 x i32> %shuffle
1339 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
1340 ; AVX1-LABEL: shuffle_v8i32_33307774:
1342 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1345 ; AVX2-LABEL: shuffle_v8i32_33307774:
1347 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1349 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1350 ret <8 x i32> %shuffle
1353 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
1354 ; AVX1-LABEL: shuffle_v8i32_32107654:
1356 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1359 ; AVX2-LABEL: shuffle_v8i32_32107654:
1361 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1363 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1364 ret <8 x i32> %shuffle
1367 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
1368 ; AVX1-LABEL: shuffle_v8i32_00234467:
1370 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1373 ; AVX2-LABEL: shuffle_v8i32_00234467:
1375 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1377 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1378 ret <8 x i32> %shuffle
1381 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
1382 ; AVX1-LABEL: shuffle_v8i32_00224466:
1384 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1387 ; AVX2-LABEL: shuffle_v8i32_00224466:
1389 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1391 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1392 ret <8 x i32> %shuffle
1395 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
1396 ; AVX1-LABEL: shuffle_v8i32_10325476:
1398 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1401 ; AVX2-LABEL: shuffle_v8i32_10325476:
1403 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1405 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1406 ret <8 x i32> %shuffle
1409 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
1410 ; AVX1-LABEL: shuffle_v8i32_11335577:
1412 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1415 ; AVX2-LABEL: shuffle_v8i32_11335577:
1417 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1419 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1420 ret <8 x i32> %shuffle
1423 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
1424 ; AVX1-LABEL: shuffle_v8i32_10235467:
1426 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1429 ; AVX2-LABEL: shuffle_v8i32_10235467:
1431 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1433 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1434 ret <8 x i32> %shuffle
1437 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
1438 ; AVX1-LABEL: shuffle_v8i32_10225466:
1440 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1443 ; AVX2-LABEL: shuffle_v8i32_10225466:
1445 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1447 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1448 ret <8 x i32> %shuffle
1451 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
1452 ; AVX1-LABEL: shuffle_v8i32_00015444:
1454 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1457 ; AVX2-LABEL: shuffle_v8i32_00015444:
1459 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1460 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1462 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1463 ret <8 x i32> %shuffle
1466 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
1467 ; AVX1-LABEL: shuffle_v8i32_00204644:
1469 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1472 ; AVX2-LABEL: shuffle_v8i32_00204644:
1474 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1475 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1477 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1478 ret <8 x i32> %shuffle
1481 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
1482 ; AVX1-LABEL: shuffle_v8i32_03004474:
1484 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1487 ; AVX2-LABEL: shuffle_v8i32_03004474:
1489 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1490 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1492 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1493 ret <8 x i32> %shuffle
1496 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
1497 ; AVX1-LABEL: shuffle_v8i32_10004444:
1499 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1502 ; AVX2-LABEL: shuffle_v8i32_10004444:
1504 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1505 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1507 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1508 ret <8 x i32> %shuffle
1511 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
1512 ; AVX1-LABEL: shuffle_v8i32_22006446:
1514 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1517 ; AVX2-LABEL: shuffle_v8i32_22006446:
1519 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1520 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1522 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1523 ret <8 x i32> %shuffle
1526 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
1527 ; AVX1-LABEL: shuffle_v8i32_33307474:
1529 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1532 ; AVX2-LABEL: shuffle_v8i32_33307474:
1534 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1535 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1537 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1538 ret <8 x i32> %shuffle
1541 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
1542 ; AVX1-LABEL: shuffle_v8i32_32104567:
1544 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1547 ; AVX2-LABEL: shuffle_v8i32_32104567:
1549 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1550 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1552 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1553 ret <8 x i32> %shuffle
1556 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
1557 ; AVX1-LABEL: shuffle_v8i32_00236744:
1559 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1562 ; AVX2-LABEL: shuffle_v8i32_00236744:
1564 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1565 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1567 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1568 ret <8 x i32> %shuffle
1571 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
1572 ; AVX1-LABEL: shuffle_v8i32_00226644:
1574 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1577 ; AVX2-LABEL: shuffle_v8i32_00226644:
1579 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1580 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1582 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1583 ret <8 x i32> %shuffle
1586 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
1587 ; AVX1-LABEL: shuffle_v8i32_10324567:
1589 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1592 ; AVX2-LABEL: shuffle_v8i32_10324567:
1594 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1595 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1597 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1598 ret <8 x i32> %shuffle
1601 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
1602 ; AVX1-LABEL: shuffle_v8i32_11334567:
1604 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1607 ; AVX2-LABEL: shuffle_v8i32_11334567:
1609 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1610 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1612 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1613 ret <8 x i32> %shuffle
1616 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
1617 ; AVX1-LABEL: shuffle_v8i32_01235467:
1619 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1622 ; AVX2-LABEL: shuffle_v8i32_01235467:
1624 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1625 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1627 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1628 ret <8 x i32> %shuffle
1631 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
1632 ; AVX1-LABEL: shuffle_v8i32_01235466:
1634 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1637 ; AVX2-LABEL: shuffle_v8i32_01235466:
1639 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1640 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1642 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1643 ret <8 x i32> %shuffle
1646 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
1647 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
1649 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1652 ; AVX2-LABEL: shuffle_v8i32_002u6u44:
1654 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1655 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1657 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1658 ret <8 x i32> %shuffle
1661 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
1662 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
1664 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1667 ; AVX2-LABEL: shuffle_v8i32_00uu66uu:
1669 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1670 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1672 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1673 ret <8 x i32> %shuffle
1676 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
1677 ; AVX1-LABEL: shuffle_v8i32_103245uu:
1679 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1682 ; AVX2-LABEL: shuffle_v8i32_103245uu:
1684 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1685 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1687 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1688 ret <8 x i32> %shuffle
1691 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
1692 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
1694 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1697 ; AVX2-LABEL: shuffle_v8i32_1133uu67:
1699 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1700 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1702 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1703 ret <8 x i32> %shuffle
1706 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
1707 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
1709 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1712 ; AVX2-LABEL: shuffle_v8i32_0uu354uu:
1714 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1715 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1717 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1718 ret <8 x i32> %shuffle
1721 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
1722 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
1724 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1727 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
1729 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1730 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1732 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1733 ret <8 x i32> %shuffle
1736 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
1737 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
1739 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1740 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1741 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1742 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1743 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1744 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1747 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1749 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
1750 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1751 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1752 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1754 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1755 ret <8 x i32> %shuffle
1758 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
1759 ; AVX1-LABEL: shuffle_v8i32_32103210:
1761 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1762 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1765 ; AVX2-LABEL: shuffle_v8i32_32103210:
1767 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
1768 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1770 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1771 ret <8 x i32> %shuffle
1774 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
1775 ; AVX1-LABEL: shuffle_v8i32_76547654:
1777 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1778 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1779 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1782 ; AVX2-LABEL: shuffle_v8i32_76547654:
1784 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
1785 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1787 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1788 ret <8 x i32> %shuffle
1791 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
1792 ; AVX1-LABEL: shuffle_v8i32_76543210:
1794 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1795 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1798 ; AVX2-LABEL: shuffle_v8i32_76543210:
1800 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
1801 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1803 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1804 ret <8 x i32> %shuffle
1807 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
1808 ; AVX1-LABEL: shuffle_v8i32_3210ba98:
1810 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1811 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1814 ; AVX2-LABEL: shuffle_v8i32_3210ba98:
1816 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1817 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1819 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1820 ret <8 x i32> %shuffle
1823 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
1824 ; AVX1-LABEL: shuffle_v8i32_3210fedc:
1826 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1827 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1830 ; AVX2-LABEL: shuffle_v8i32_3210fedc:
1832 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1833 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1835 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1836 ret <8 x i32> %shuffle
1839 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
1840 ; AVX1-LABEL: shuffle_v8i32_7654fedc:
1842 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1843 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1846 ; AVX2-LABEL: shuffle_v8i32_7654fedc:
1848 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1849 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1851 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1852 ret <8 x i32> %shuffle
1855 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
1856 ; AVX1-LABEL: shuffle_v8i32_fedc7654:
1858 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1859 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1862 ; AVX2-LABEL: shuffle_v8i32_fedc7654:
1864 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1865 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1867 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1868 ret <8 x i32> %shuffle
1871 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
1872 ; AVX1-LABEL: shuffle_v8i32_ba987654:
1874 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1875 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1878 ; AVX2-LABEL: shuffle_v8i32_ba987654:
1880 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1881 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1883 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1884 ret <8 x i32> %shuffle
1887 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
1888 ; AVX1-LABEL: shuffle_v8i32_ba983210:
1890 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1891 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1894 ; AVX2-LABEL: shuffle_v8i32_ba983210:
1896 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1897 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1899 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1900 ret <8 x i32> %shuffle
1903 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
1904 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
1906 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1907 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
1910 ; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
1912 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
1914 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
1915 ret <8 x i32> %shuffle
1918 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
1919 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
1921 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1922 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
1923 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1926 ; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
1928 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
1930 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
1931 ret <8 x i32> %shuffle
1934 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
1935 ; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
1937 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1940 ; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
1942 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1944 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
1945 ret <8 x i32> %shuffle
1948 define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
1949 ; AVX1-LABEL: shuffle_v8i32_uuuu1111:
1951 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1952 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1955 ; AVX2-LABEL: shuffle_v8i32_uuuu1111:
1957 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
1958 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1960 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
1961 ret <8 x i32> %shuffle
1964 define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
1965 ; AVX1-LABEL: shuffle_v8i32_44444444:
1967 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1968 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1969 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1972 ; AVX2-LABEL: shuffle_v8i32_44444444:
1974 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
1975 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1977 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
1978 ret <8 x i32> %shuffle
1981 define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
1982 ; AVX1-LABEL: shuffle_v8i32_5555uuuu:
1984 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1985 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1988 ; AVX2-LABEL: shuffle_v8i32_5555uuuu:
1990 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
1991 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1993 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
1994 ret <8 x i32> %shuffle
1997 define <8 x float> @splat_mem_v8f32_2(float* %p) {
1998 ; ALL-LABEL: splat_mem_v8f32_2:
2000 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
2002 %1 = load float, float* %p
2003 %2 = insertelement <4 x float> undef, float %1, i32 0
2004 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
2008 define <8 x float> @splat_v8f32(<4 x float> %r) {
2009 ; AVX1-LABEL: splat_v8f32:
2011 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
2012 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2015 ; AVX2-LABEL: splat_v8f32:
2017 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
2019 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
2024 ; Shuffle to logical bit shifts
2027 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
2028 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
2030 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
2031 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
2032 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
2035 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
2037 ; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
2039 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
2040 ret <8 x i32> %shuffle
2043 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
2044 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
2046 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
2047 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
2048 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
2051 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
2053 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
2055 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
2056 ret <8 x i32> %shuffle
2059 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
2060 ; AVX1-LABEL: shuffle_v8i32_B012F456:
2062 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
2063 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
2066 ; AVX2-LABEL: shuffle_v8i32_B012F456:
2068 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
2070 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
2071 ret <8 x i32> %shuffle
2074 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
2075 ; AVX1-LABEL: shuffle_v8i32_1238567C:
2077 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
2078 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
2081 ; AVX2-LABEL: shuffle_v8i32_1238567C:
2083 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
2085 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
2086 ret <8 x i32> %shuffle
2089 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
2090 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
2092 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
2093 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
2096 ; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
2098 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
2100 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
2101 ret <8 x i32> %shuffle
2104 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
2105 ; AVX1-LABEL: shuffle_v8i32_389A7CDE:
2107 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
2108 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
2111 ; AVX2-LABEL: shuffle_v8i32_389A7CDE:
2113 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
2115 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
2116 ret <8 x i32> %shuffle
2119 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
2120 ; AVX1-LABEL: shuffle_v8i32_30127456:
2122 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2125 ; AVX2-LABEL: shuffle_v8i32_30127456:
2127 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2129 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
2130 ret <8 x i32> %shuffle
2133 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
2134 ; AVX1-LABEL: shuffle_v8i32_12305674:
2136 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2139 ; AVX2-LABEL: shuffle_v8i32_12305674:
2141 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2143 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
2144 ret <8 x i32> %shuffle
2147 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2148 ; ALL-LABEL: concat_v2f32_1:
2149 ; ALL: # BB#0: # %entry
2150 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2151 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2154 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2155 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2156 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2157 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2158 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
2159 ret <8 x float> %tmp76
2162 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2163 ; ALL-LABEL: concat_v2f32_2:
2164 ; ALL: # BB#0: # %entry
2165 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2166 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2169 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2170 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2171 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2172 ret <8 x float> %tmp76
2175 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2176 ; ALL-LABEL: concat_v2f32_3:
2177 ; ALL: # BB#0: # %entry
2178 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2179 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2182 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2183 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2184 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2185 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2186 ret <8 x float> %res
2189 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
2190 ; ALL-LABEL: insert_mem_and_zero_v8i32:
2192 ; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2194 %a = load i32, i32* %ptr
2195 %v = insertelement <8 x i32> undef, i32 %a, i32 0
2196 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2197 ret <8 x i32> %shuffle
2200 define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
2201 ; AVX1-LABEL: concat_v8i32_0123CDEF:
2203 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
2206 ; AVX2-LABEL: concat_v8i32_0123CDEF:
2208 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2210 %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2211 %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2212 %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2216 define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
2217 ; ALL-LABEL: concat_v8i32_4567CDEF_bc:
2219 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2221 %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2222 %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
2223 %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64>
2224 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
2225 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2226 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
2227 ret <8 x i32> %shuffle32
2230 define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
2231 ; ALL-LABEL: concat_v8f32_4567CDEF_bc:
2233 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2235 %a0 = bitcast <8 x float> %f0 to <4 x i64>
2236 %a1 = bitcast <8 x float> %f1 to <8 x i32>
2237 %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
2238 %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2239 %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64>
2240 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
2241 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2242 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float>
2243 ret <8 x float> %shuffle32
2246 define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) {
2247 ; ALL-LABEL: insert_dup_mem_v8i32:
2249 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
2251 %tmp = load i32, i32* %ptr, align 4
2252 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2253 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer