1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-legality | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-legality | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4 target triple = "x86_64-unknown-unknown"
6 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
7 ; AVX1-LABEL: shuffle_v8f32_00000000:
9 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX2-LABEL: shuffle_v8f32_00000000:
15 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
17 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
18 ret <8 x float> %shuffle
21 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
22 ; AVX1-LABEL: shuffle_v8f32_00000010:
24 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
25 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
26 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
29 ; AVX2-LABEL: shuffle_v8f32_00000010:
31 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
32 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
34 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
35 ret <8 x float> %shuffle
38 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
39 ; AVX1-LABEL: shuffle_v8f32_00000200:
41 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
42 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
43 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
46 ; AVX2-LABEL: shuffle_v8f32_00000200:
48 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
49 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
51 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
52 ret <8 x float> %shuffle
55 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
56 ; AVX1-LABEL: shuffle_v8f32_00003000:
58 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
59 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
60 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
63 ; AVX2-LABEL: shuffle_v8f32_00003000:
65 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
66 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
68 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
69 ret <8 x float> %shuffle
72 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
73 ; AVX1-LABEL: shuffle_v8f32_00040000:
75 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
76 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
77 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
78 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
81 ; AVX2-LABEL: shuffle_v8f32_00040000:
83 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
84 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
86 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
87 ret <8 x float> %shuffle
90 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
91 ; AVX1-LABEL: shuffle_v8f32_00500000:
93 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
94 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
95 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
98 ; AVX2-LABEL: shuffle_v8f32_00500000:
100 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
101 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
103 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
104 ret <8 x float> %shuffle
107 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
108 ; AVX1-LABEL: shuffle_v8f32_06000000:
110 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
111 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
112 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
115 ; AVX2-LABEL: shuffle_v8f32_06000000:
117 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
118 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
120 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
121 ret <8 x float> %shuffle
124 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
125 ; AVX1-LABEL: shuffle_v8f32_70000000:
127 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
128 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
129 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
132 ; AVX2-LABEL: shuffle_v8f32_70000000:
134 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
135 ; AVX2-NEXT: movl $7, %eax
136 ; AVX2-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
137 ; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
138 ; AVX2-NEXT: vinserti128 $0, %xmm1, %ymm2, %ymm1
139 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
141 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
142 ret <8 x float> %shuffle
145 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
146 ; ALL-LABEL: shuffle_v8f32_01014545:
148 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
150 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
151 ret <8 x float> %shuffle
154 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
155 ; AVX1-LABEL: shuffle_v8f32_00112233:
157 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
158 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
159 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
162 ; AVX2-LABEL: shuffle_v8f32_00112233:
164 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
165 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
167 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
168 ret <8 x float> %shuffle
171 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
172 ; AVX1-LABEL: shuffle_v8f32_00001111:
174 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
175 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
176 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
179 ; AVX2-LABEL: shuffle_v8f32_00001111:
181 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
182 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
184 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
185 ret <8 x float> %shuffle
188 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
189 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
191 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
193 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
194 ret <8 x float> %shuffle
197 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
198 ; AVX1-LABEL: shuffle_v8f32_08080808:
200 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
201 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
202 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
203 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
204 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
207 ; AVX2-LABEL: shuffle_v8f32_08080808:
209 ; AVX2-NEXT: vbroadcastss %xmm1, %ymm1
210 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
211 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
213 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
214 ret <8 x float> %shuffle
217 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
218 ; ALL-LABEL: shuffle_v8f32_08084c4c:
220 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
221 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
223 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
224 ret <8 x float> %shuffle
227 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
228 ; ALL-LABEL: shuffle_v8f32_8823cc67:
230 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
232 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
233 ret <8 x float> %shuffle
236 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
237 ; ALL-LABEL: shuffle_v8f32_9832dc76:
239 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
241 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
242 ret <8 x float> %shuffle
245 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
246 ; ALL-LABEL: shuffle_v8f32_9810dc54:
248 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
250 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
251 ret <8 x float> %shuffle
254 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
255 ; ALL-LABEL: shuffle_v8f32_08194c5d:
257 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
259 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
260 ret <8 x float> %shuffle
263 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
264 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
266 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
268 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
269 ret <8 x float> %shuffle
272 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
273 ; AVX1-LABEL: shuffle_v8f32_08192a3b:
275 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
276 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
277 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
280 ; AVX2-LABEL: shuffle_v8f32_08192a3b:
282 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
283 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
284 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
285 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
286 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
288 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
289 ret <8 x float> %shuffle
292 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
293 ; AVX1-LABEL: shuffle_v8f32_08991abb:
295 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
296 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
297 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
298 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
299 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
302 ; AVX2-LABEL: shuffle_v8f32_08991abb:
304 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
305 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
306 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
307 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
308 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
310 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
311 ret <8 x float> %shuffle
314 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
315 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
317 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
318 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
319 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
320 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
323 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
325 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
326 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
327 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
329 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
330 ret <8 x float> %shuffle
333 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
334 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
336 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
337 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
338 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
341 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
343 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
344 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
345 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
347 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
348 ret <8 x float> %shuffle
351 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
352 ; ALL-LABEL: shuffle_v8f32_00014445:
354 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
356 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
357 ret <8 x float> %shuffle
360 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
361 ; ALL-LABEL: shuffle_v8f32_00204464:
363 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
365 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
366 ret <8 x float> %shuffle
369 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
370 ; ALL-LABEL: shuffle_v8f32_03004744:
372 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
374 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
375 ret <8 x float> %shuffle
378 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
379 ; ALL-LABEL: shuffle_v8f32_10005444:
381 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
383 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
384 ret <8 x float> %shuffle
387 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
388 ; ALL-LABEL: shuffle_v8f32_22006644:
390 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
392 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
393 ret <8 x float> %shuffle
396 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
397 ; ALL-LABEL: shuffle_v8f32_33307774:
399 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
401 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
402 ret <8 x float> %shuffle
405 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
406 ; ALL-LABEL: shuffle_v8f32_32107654:
408 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
410 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
411 ret <8 x float> %shuffle
414 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
415 ; ALL-LABEL: shuffle_v8f32_00234467:
417 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
419 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
420 ret <8 x float> %shuffle
423 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
424 ; ALL-LABEL: shuffle_v8f32_00224466:
426 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
428 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
429 ret <8 x float> %shuffle
432 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
433 ; ALL-LABEL: shuffle_v8f32_10325476:
435 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
437 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
438 ret <8 x float> %shuffle
441 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
442 ; ALL-LABEL: shuffle_v8f32_11335577:
444 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
446 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
447 ret <8 x float> %shuffle
450 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
451 ; ALL-LABEL: shuffle_v8f32_10235467:
453 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
455 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
456 ret <8 x float> %shuffle
459 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
460 ; ALL-LABEL: shuffle_v8f32_10225466:
462 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
464 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
465 ret <8 x float> %shuffle
468 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
469 ; ALL-LABEL: shuffle_v8f32_00015444:
471 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
473 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
474 ret <8 x float> %shuffle
477 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
478 ; ALL-LABEL: shuffle_v8f32_00204644:
480 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
482 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
483 ret <8 x float> %shuffle
486 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
487 ; ALL-LABEL: shuffle_v8f32_03004474:
489 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
491 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
492 ret <8 x float> %shuffle
495 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
496 ; ALL-LABEL: shuffle_v8f32_10004444:
498 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
500 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
501 ret <8 x float> %shuffle
504 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
505 ; ALL-LABEL: shuffle_v8f32_22006446:
507 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
509 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
510 ret <8 x float> %shuffle
513 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
514 ; ALL-LABEL: shuffle_v8f32_33307474:
516 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
518 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
519 ret <8 x float> %shuffle
522 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
523 ; ALL-LABEL: shuffle_v8f32_32104567:
525 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
527 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
528 ret <8 x float> %shuffle
531 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
532 ; ALL-LABEL: shuffle_v8f32_00236744:
534 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
536 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
537 ret <8 x float> %shuffle
540 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
541 ; ALL-LABEL: shuffle_v8f32_00226644:
543 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
545 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
546 ret <8 x float> %shuffle
549 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
550 ; ALL-LABEL: shuffle_v8f32_10324567:
552 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
554 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
555 ret <8 x float> %shuffle
558 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
559 ; ALL-LABEL: shuffle_v8f32_11334567:
561 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
563 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
564 ret <8 x float> %shuffle
567 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
568 ; ALL-LABEL: shuffle_v8f32_01235467:
570 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
572 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
573 ret <8 x float> %shuffle
576 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
577 ; ALL-LABEL: shuffle_v8f32_01235466:
579 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
581 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
582 ret <8 x float> %shuffle
585 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
586 ; ALL-LABEL: shuffle_v8f32_002u6u44:
588 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
590 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
591 ret <8 x float> %shuffle
594 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
595 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
597 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
599 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
600 ret <8 x float> %shuffle
603 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
604 ; ALL-LABEL: shuffle_v8f32_103245uu:
606 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
608 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
609 ret <8 x float> %shuffle
612 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
613 ; ALL-LABEL: shuffle_v8f32_1133uu67:
615 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
617 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
618 ret <8 x float> %shuffle
621 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
622 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
624 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
626 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
627 ret <8 x float> %shuffle
630 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
631 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
633 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
635 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
636 ret <8 x float> %shuffle
639 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
640 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
642 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
643 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
644 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
645 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
646 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
647 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
650 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
652 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
653 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
654 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
655 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
656 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
658 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
659 ret <8 x float> %shuffle
662 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
663 ; AVX1-LABEL: shuffle_v8f32_f511235a:
665 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
666 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
667 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
668 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
669 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
670 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
671 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
674 ; AVX2-LABEL: shuffle_v8f32_f511235a:
676 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
677 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
678 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
679 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
680 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
682 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
683 ret <8 x float> %shuffle
686 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
687 ; AVX1-LABEL: shuffle_v8f32_32103210:
689 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
690 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
693 ; AVX2-LABEL: shuffle_v8f32_32103210:
695 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
696 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
698 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
699 ret <8 x float> %shuffle
702 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
703 ; AVX1-LABEL: shuffle_v8f32_76547654:
705 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
706 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
707 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
710 ; AVX2-LABEL: shuffle_v8f32_76547654:
712 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
713 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
715 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
716 ret <8 x float> %shuffle
719 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
720 ; AVX1-LABEL: shuffle_v8f32_76543210:
722 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
723 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
726 ; AVX2-LABEL: shuffle_v8f32_76543210:
728 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
729 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
731 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
732 ret <8 x float> %shuffle
735 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
736 ; ALL-LABEL: shuffle_v8f32_3210ba98:
738 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
739 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
741 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
742 ret <8 x float> %shuffle
745 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
746 ; ALL-LABEL: shuffle_v8f32_3210fedc:
748 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
749 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
751 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
752 ret <8 x float> %shuffle
755 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
756 ; ALL-LABEL: shuffle_v8f32_7654fedc:
758 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
759 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
761 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
762 ret <8 x float> %shuffle
765 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
766 ; ALL-LABEL: shuffle_v8f32_fedc7654:
768 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
769 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
771 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
772 ret <8 x float> %shuffle
775 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
776 ; ALL-LABEL: shuffle_v8f32_ba987654:
778 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
779 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
781 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
782 ret <8 x float> %shuffle
785 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
786 ; ALL-LABEL: shuffle_v8f32_ba983210:
788 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
789 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
791 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
792 ret <8 x float> %shuffle
795 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
796 ; AVX1-LABEL: shuffle_v8i32_00000000:
798 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
799 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
802 ; AVX2-LABEL: shuffle_v8i32_00000000:
804 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
806 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
807 ret <8 x i32> %shuffle
810 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
811 ; AVX1-LABEL: shuffle_v8i32_00000010:
813 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
814 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
815 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
818 ; AVX2-LABEL: shuffle_v8i32_00000010:
820 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
821 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
823 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
824 ret <8 x i32> %shuffle
827 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
828 ; AVX1-LABEL: shuffle_v8i32_00000200:
830 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
831 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
832 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
835 ; AVX2-LABEL: shuffle_v8i32_00000200:
837 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
838 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
840 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
841 ret <8 x i32> %shuffle
844 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
845 ; AVX1-LABEL: shuffle_v8i32_00003000:
847 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
848 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
849 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
852 ; AVX2-LABEL: shuffle_v8i32_00003000:
854 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
855 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
857 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
858 ret <8 x i32> %shuffle
861 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
862 ; AVX1-LABEL: shuffle_v8i32_00040000:
864 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
865 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
866 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
867 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
870 ; AVX2-LABEL: shuffle_v8i32_00040000:
872 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
873 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
875 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
876 ret <8 x i32> %shuffle
879 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
880 ; AVX1-LABEL: shuffle_v8i32_00500000:
882 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
883 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
884 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
887 ; AVX2-LABEL: shuffle_v8i32_00500000:
889 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
890 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
892 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
893 ret <8 x i32> %shuffle
896 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
897 ; AVX1-LABEL: shuffle_v8i32_06000000:
899 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
900 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
901 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
904 ; AVX2-LABEL: shuffle_v8i32_06000000:
906 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
907 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
909 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
910 ret <8 x i32> %shuffle
913 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
914 ; AVX1-LABEL: shuffle_v8i32_70000000:
916 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
917 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
918 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
921 ; AVX2-LABEL: shuffle_v8i32_70000000:
923 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
924 ; AVX2-NEXT: movl $7, %eax
925 ; AVX2-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
926 ; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
927 ; AVX2-NEXT: vinserti128 $0, %xmm1, %ymm2, %ymm1
928 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
930 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
931 ret <8 x i32> %shuffle
934 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
935 ; AVX1-LABEL: shuffle_v8i32_01014545:
937 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
940 ; AVX2-LABEL: shuffle_v8i32_01014545:
942 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
944 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
945 ret <8 x i32> %shuffle
948 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
949 ; AVX1-LABEL: shuffle_v8i32_00112233:
951 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
952 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
953 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
956 ; AVX2-LABEL: shuffle_v8i32_00112233:
958 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
959 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
961 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
962 ret <8 x i32> %shuffle
965 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
966 ; AVX1-LABEL: shuffle_v8i32_00001111:
968 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
969 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
970 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
973 ; AVX2-LABEL: shuffle_v8i32_00001111:
975 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
976 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
978 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
979 ret <8 x i32> %shuffle
982 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
983 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
985 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
988 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
990 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
992 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
993 ret <8 x i32> %shuffle
996 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
997 ; AVX1-LABEL: shuffle_v8i32_08080808:
999 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
1000 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
1001 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1002 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1003 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1006 ; AVX2-LABEL: shuffle_v8i32_08080808:
1008 ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
1009 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
1010 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1012 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1013 ret <8 x i32> %shuffle
1016 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1017 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
1019 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1020 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1023 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
1025 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1026 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1027 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1029 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1030 ret <8 x i32> %shuffle
1033 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
1034 ; AVX1-LABEL: shuffle_v8i32_8823cc67:
1036 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1039 ; AVX2-LABEL: shuffle_v8i32_8823cc67:
1041 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1042 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1044 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1045 ret <8 x i32> %shuffle
1048 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
1049 ; AVX1-LABEL: shuffle_v8i32_9832dc76:
1051 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1054 ; AVX2-LABEL: shuffle_v8i32_9832dc76:
1056 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1057 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1059 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1060 ret <8 x i32> %shuffle
1063 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
1064 ; AVX1-LABEL: shuffle_v8i32_9810dc54:
1066 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1069 ; AVX2-LABEL: shuffle_v8i32_9810dc54:
1071 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1072 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1073 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1075 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1076 ret <8 x i32> %shuffle
1079 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1080 ; AVX1-LABEL: shuffle_v8i32_08194c5d:
1082 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1085 ; AVX2-LABEL: shuffle_v8i32_08194c5d:
1087 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1089 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1090 ret <8 x i32> %shuffle
1093 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1094 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
1096 ; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1099 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
1101 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1103 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1104 ret <8 x i32> %shuffle
1107 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1108 ; AVX1-LABEL: shuffle_v8i32_08192a3b:
1110 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1111 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1112 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1115 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
1117 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1118 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1119 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1120 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1122 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1123 ret <8 x i32> %shuffle
1126 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
1127 ; AVX1-LABEL: shuffle_v8i32_08991abb:
1129 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
1130 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
1131 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1132 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
1133 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1136 ; AVX2-LABEL: shuffle_v8i32_08991abb:
1138 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1139 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1140 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1141 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1142 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1144 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1145 ret <8 x i32> %shuffle
1148 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
1149 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
1151 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
1152 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
1153 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1154 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1157 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
1159 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1160 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1162 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1163 ret <8 x i32> %shuffle
1166 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
1167 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
1169 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1170 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1171 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1174 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
1176 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1177 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1178 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1180 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1181 ret <8 x i32> %shuffle
1184 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
1185 ; AVX1-LABEL: shuffle_v8i32_00014445:
1187 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1190 ; AVX2-LABEL: shuffle_v8i32_00014445:
1192 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1194 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1195 ret <8 x i32> %shuffle
1198 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
1199 ; AVX1-LABEL: shuffle_v8i32_00204464:
1201 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1204 ; AVX2-LABEL: shuffle_v8i32_00204464:
1206 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1208 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1209 ret <8 x i32> %shuffle
1212 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
1213 ; AVX1-LABEL: shuffle_v8i32_03004744:
1215 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1218 ; AVX2-LABEL: shuffle_v8i32_03004744:
1220 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1222 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1223 ret <8 x i32> %shuffle
1226 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
1227 ; AVX1-LABEL: shuffle_v8i32_10005444:
1229 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1232 ; AVX2-LABEL: shuffle_v8i32_10005444:
1234 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1236 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1237 ret <8 x i32> %shuffle
1240 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
1241 ; AVX1-LABEL: shuffle_v8i32_22006644:
1243 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1246 ; AVX2-LABEL: shuffle_v8i32_22006644:
1248 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1250 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1251 ret <8 x i32> %shuffle
1254 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
1255 ; AVX1-LABEL: shuffle_v8i32_33307774:
1257 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1260 ; AVX2-LABEL: shuffle_v8i32_33307774:
1262 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1264 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1265 ret <8 x i32> %shuffle
1268 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
1269 ; AVX1-LABEL: shuffle_v8i32_32107654:
1271 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1274 ; AVX2-LABEL: shuffle_v8i32_32107654:
1276 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1278 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1279 ret <8 x i32> %shuffle
1282 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
1283 ; AVX1-LABEL: shuffle_v8i32_00234467:
1285 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1288 ; AVX2-LABEL: shuffle_v8i32_00234467:
1290 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1292 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1293 ret <8 x i32> %shuffle
1296 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
1297 ; AVX1-LABEL: shuffle_v8i32_00224466:
1299 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1302 ; AVX2-LABEL: shuffle_v8i32_00224466:
1304 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1306 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1307 ret <8 x i32> %shuffle
1310 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
1311 ; AVX1-LABEL: shuffle_v8i32_10325476:
1313 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1316 ; AVX2-LABEL: shuffle_v8i32_10325476:
1318 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1320 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1321 ret <8 x i32> %shuffle
1324 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
1325 ; AVX1-LABEL: shuffle_v8i32_11335577:
1327 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1330 ; AVX2-LABEL: shuffle_v8i32_11335577:
1332 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1334 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1335 ret <8 x i32> %shuffle
1338 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
1339 ; AVX1-LABEL: shuffle_v8i32_10235467:
1341 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1344 ; AVX2-LABEL: shuffle_v8i32_10235467:
1346 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1348 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1349 ret <8 x i32> %shuffle
1352 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
1353 ; AVX1-LABEL: shuffle_v8i32_10225466:
1355 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1358 ; AVX2-LABEL: shuffle_v8i32_10225466:
1360 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1362 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1363 ret <8 x i32> %shuffle
1366 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
1367 ; AVX1-LABEL: shuffle_v8i32_00015444:
1369 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1372 ; AVX2-LABEL: shuffle_v8i32_00015444:
1374 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1375 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1377 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1378 ret <8 x i32> %shuffle
1381 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
1382 ; AVX1-LABEL: shuffle_v8i32_00204644:
1384 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1387 ; AVX2-LABEL: shuffle_v8i32_00204644:
1389 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1390 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1392 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1393 ret <8 x i32> %shuffle
1396 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
1397 ; AVX1-LABEL: shuffle_v8i32_03004474:
1399 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1402 ; AVX2-LABEL: shuffle_v8i32_03004474:
1404 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1405 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1407 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1408 ret <8 x i32> %shuffle
1411 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
1412 ; AVX1-LABEL: shuffle_v8i32_10004444:
1414 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1417 ; AVX2-LABEL: shuffle_v8i32_10004444:
1419 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1420 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1422 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1423 ret <8 x i32> %shuffle
1426 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
1427 ; AVX1-LABEL: shuffle_v8i32_22006446:
1429 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1432 ; AVX2-LABEL: shuffle_v8i32_22006446:
1434 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1435 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1437 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1438 ret <8 x i32> %shuffle
1441 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
1442 ; AVX1-LABEL: shuffle_v8i32_33307474:
1444 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1447 ; AVX2-LABEL: shuffle_v8i32_33307474:
1449 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1450 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1452 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1453 ret <8 x i32> %shuffle
1456 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
1457 ; AVX1-LABEL: shuffle_v8i32_32104567:
1459 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1462 ; AVX2-LABEL: shuffle_v8i32_32104567:
1464 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1465 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1467 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1468 ret <8 x i32> %shuffle
1471 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
1472 ; AVX1-LABEL: shuffle_v8i32_00236744:
1474 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1477 ; AVX2-LABEL: shuffle_v8i32_00236744:
1479 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1480 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1482 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1483 ret <8 x i32> %shuffle
1486 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
1487 ; AVX1-LABEL: shuffle_v8i32_00226644:
1489 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1492 ; AVX2-LABEL: shuffle_v8i32_00226644:
1494 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1495 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1497 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1498 ret <8 x i32> %shuffle
1501 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
1502 ; AVX1-LABEL: shuffle_v8i32_10324567:
1504 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1507 ; AVX2-LABEL: shuffle_v8i32_10324567:
1509 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1510 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1512 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1513 ret <8 x i32> %shuffle
1516 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
1517 ; AVX1-LABEL: shuffle_v8i32_11334567:
1519 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1522 ; AVX2-LABEL: shuffle_v8i32_11334567:
1524 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1525 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1527 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1528 ret <8 x i32> %shuffle
1531 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
1532 ; AVX1-LABEL: shuffle_v8i32_01235467:
1534 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1537 ; AVX2-LABEL: shuffle_v8i32_01235467:
1539 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1540 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1542 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1543 ret <8 x i32> %shuffle
1546 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
1547 ; AVX1-LABEL: shuffle_v8i32_01235466:
1549 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1552 ; AVX2-LABEL: shuffle_v8i32_01235466:
1554 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1555 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1557 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1558 ret <8 x i32> %shuffle
1561 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
1562 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
1564 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1567 ; AVX2-LABEL: shuffle_v8i32_002u6u44:
1569 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1570 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1572 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1573 ret <8 x i32> %shuffle
1576 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
1577 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
1579 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1582 ; AVX2-LABEL: shuffle_v8i32_00uu66uu:
1584 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1585 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1587 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1588 ret <8 x i32> %shuffle
1591 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
1592 ; AVX1-LABEL: shuffle_v8i32_103245uu:
1594 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1597 ; AVX2-LABEL: shuffle_v8i32_103245uu:
1599 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1600 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1602 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1603 ret <8 x i32> %shuffle
1606 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
1607 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
1609 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1612 ; AVX2-LABEL: shuffle_v8i32_1133uu67:
1614 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1615 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1617 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1618 ret <8 x i32> %shuffle
1621 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
1622 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
1624 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1627 ; AVX2-LABEL: shuffle_v8i32_0uu354uu:
1629 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1630 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1632 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1633 ret <8 x i32> %shuffle
1636 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
1637 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
1639 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1642 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
1644 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1645 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1647 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1648 ret <8 x i32> %shuffle
1651 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
1652 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
1654 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1655 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1656 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1657 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1658 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1659 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1662 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1664 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
1665 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1666 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1667 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1669 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1670 ret <8 x i32> %shuffle
1673 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
1674 ; AVX1-LABEL: shuffle_v8i32_32103210:
1676 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1677 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1680 ; AVX2-LABEL: shuffle_v8i32_32103210:
1682 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
1683 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1685 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1686 ret <8 x i32> %shuffle
1689 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
1690 ; AVX1-LABEL: shuffle_v8i32_76547654:
1692 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1693 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1694 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1697 ; AVX2-LABEL: shuffle_v8i32_76547654:
1699 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
1700 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1702 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1703 ret <8 x i32> %shuffle
1706 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
1707 ; AVX1-LABEL: shuffle_v8i32_76543210:
1709 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1710 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1713 ; AVX2-LABEL: shuffle_v8i32_76543210:
1715 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
1716 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1718 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1719 ret <8 x i32> %shuffle
1722 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
1723 ; AVX1-LABEL: shuffle_v8i32_3210ba98:
1725 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1726 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1729 ; AVX2-LABEL: shuffle_v8i32_3210ba98:
1731 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1732 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1734 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1735 ret <8 x i32> %shuffle
1738 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
1739 ; AVX1-LABEL: shuffle_v8i32_3210fedc:
1741 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1742 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1745 ; AVX2-LABEL: shuffle_v8i32_3210fedc:
1747 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1748 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1750 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1751 ret <8 x i32> %shuffle
1754 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
1755 ; AVX1-LABEL: shuffle_v8i32_7654fedc:
1757 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1758 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1761 ; AVX2-LABEL: shuffle_v8i32_7654fedc:
1763 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1764 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1766 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1767 ret <8 x i32> %shuffle
1770 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
1771 ; AVX1-LABEL: shuffle_v8i32_fedc7654:
1773 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1774 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1777 ; AVX2-LABEL: shuffle_v8i32_fedc7654:
1779 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1780 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1782 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1783 ret <8 x i32> %shuffle
1786 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
1787 ; AVX1-LABEL: shuffle_v8i32_ba987654:
1789 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1790 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1793 ; AVX2-LABEL: shuffle_v8i32_ba987654:
1795 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1796 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1798 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1799 ret <8 x i32> %shuffle
1802 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
1803 ; AVX1-LABEL: shuffle_v8i32_ba983210:
1805 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1806 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1809 ; AVX2-LABEL: shuffle_v8i32_ba983210:
1811 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1812 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1814 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1815 ret <8 x i32> %shuffle
1818 define <8 x float> @splat_mem_v8f32_2(float* %p) {
1819 ; ALL-LABEL: splat_mem_v8f32_2:
1821 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
1824 %2 = insertelement <4 x float> undef, float %1, i32 0
1825 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
1829 define <8 x float> @splat_v8f32(<4 x float> %r) {
1830 ; AVX1-LABEL: splat_v8f32:
1832 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1833 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1836 ; AVX2-LABEL: splat_v8f32:
1838 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
1840 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
1845 ; Shuffle to logical bit shifts
1848 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
1849 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
1851 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1852 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
1853 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
1856 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
1858 ; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
1860 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
1861 ret <8 x i32> %shuffle
1864 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
1865 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
1867 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1868 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1869 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1872 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
1874 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
1876 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
1877 ret <8 x i32> %shuffle
1880 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
1881 ; ALL-LABEL: concat_v2f32_1:
1882 ; ALL: # BB#0: # %entry
1883 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1884 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
1887 %tmp74 = load <2 x float>* %tmp65, align 8
1888 %tmp72 = load <2 x float>* %tmp64, align 8
1889 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1890 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1891 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
1892 ret <8 x float> %tmp76
1895 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
1896 ; ALL-LABEL: concat_v2f32_2:
1897 ; ALL: # BB#0: # %entry
1898 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1899 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
1902 %tmp74 = load <2 x float>* %tmp65, align 8
1903 %tmp72 = load <2 x float>* %tmp64, align 8
1904 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1905 ret <8 x float> %tmp76
1908 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
1909 ; ALL-LABEL: concat_v2f32_3:
1910 ; ALL: # BB#0: # %entry
1911 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1912 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
1915 %tmp74 = load <2 x float>* %tmp65, align 8
1916 %tmp72 = load <2 x float>* %tmp64, align 8
1917 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1918 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1919 ret <8 x float> %res