1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-legality | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-legality | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4 target triple = "x86_64-unknown-unknown"
6 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
7 ; AVX1-LABEL: shuffle_v8f32_00000000:
9 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX2-LABEL: shuffle_v8f32_00000000:
15 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
17 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
18 ret <8 x float> %shuffle
21 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
22 ; AVX1-LABEL: shuffle_v8f32_00000010:
24 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
25 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
26 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
29 ; AVX2-LABEL: shuffle_v8f32_00000010:
31 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
32 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
34 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
35 ret <8 x float> %shuffle
38 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
39 ; AVX1-LABEL: shuffle_v8f32_00000200:
41 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
42 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
43 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
46 ; AVX2-LABEL: shuffle_v8f32_00000200:
48 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
49 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
51 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
52 ret <8 x float> %shuffle
55 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
56 ; AVX1-LABEL: shuffle_v8f32_00003000:
58 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
59 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
60 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
63 ; AVX2-LABEL: shuffle_v8f32_00003000:
65 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
66 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
68 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
69 ret <8 x float> %shuffle
72 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
73 ; AVX1-LABEL: shuffle_v8f32_00040000:
75 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
76 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
77 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
78 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
81 ; AVX2-LABEL: shuffle_v8f32_00040000:
83 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
84 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
86 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
87 ret <8 x float> %shuffle
90 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
91 ; AVX1-LABEL: shuffle_v8f32_00500000:
93 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
94 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
95 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
98 ; AVX2-LABEL: shuffle_v8f32_00500000:
100 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
101 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
103 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
104 ret <8 x float> %shuffle
107 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
108 ; AVX1-LABEL: shuffle_v8f32_06000000:
110 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
111 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
112 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
115 ; AVX2-LABEL: shuffle_v8f32_06000000:
117 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
118 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
120 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
121 ret <8 x float> %shuffle
124 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
125 ; AVX1-LABEL: shuffle_v8f32_70000000:
127 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
128 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
129 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
132 ; AVX2-LABEL: shuffle_v8f32_70000000:
134 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
135 ; AVX2-NEXT: movl $7, %eax
136 ; AVX2-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
137 ; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
138 ; AVX2-NEXT: vinserti128 $0, %xmm1, %ymm2, %ymm1
139 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
141 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
142 ret <8 x float> %shuffle
145 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
146 ; ALL-LABEL: shuffle_v8f32_01014545:
148 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
150 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
151 ret <8 x float> %shuffle
154 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
155 ; AVX1-LABEL: shuffle_v8f32_00112233:
157 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
158 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
159 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
162 ; AVX2-LABEL: shuffle_v8f32_00112233:
164 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
165 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
167 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
168 ret <8 x float> %shuffle
171 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
172 ; AVX1-LABEL: shuffle_v8f32_00001111:
174 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
175 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
176 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
179 ; AVX2-LABEL: shuffle_v8f32_00001111:
181 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
182 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
184 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
185 ret <8 x float> %shuffle
188 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
189 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
191 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
193 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
194 ret <8 x float> %shuffle
197 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
198 ; AVX1-LABEL: shuffle_v8f32_08080808:
200 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
201 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
202 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
203 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
204 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
207 ; AVX2-LABEL: shuffle_v8f32_08080808:
209 ; AVX2-NEXT: vbroadcastss %xmm1, %ymm1
210 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
211 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
213 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
214 ret <8 x float> %shuffle
217 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
218 ; ALL-LABEL: shuffle_v8f32_08084c4c:
220 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
221 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
223 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
224 ret <8 x float> %shuffle
227 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
228 ; ALL-LABEL: shuffle_v8f32_8823cc67:
230 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
232 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
233 ret <8 x float> %shuffle
236 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
237 ; ALL-LABEL: shuffle_v8f32_9832dc76:
239 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
241 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
242 ret <8 x float> %shuffle
245 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
246 ; ALL-LABEL: shuffle_v8f32_9810dc54:
248 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
250 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
251 ret <8 x float> %shuffle
254 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
255 ; ALL-LABEL: shuffle_v8f32_08194c5d:
257 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
259 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
260 ret <8 x float> %shuffle
263 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
264 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
266 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
268 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
269 ret <8 x float> %shuffle
272 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
273 ; AVX1-LABEL: shuffle_v8f32_08192a3b:
275 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
276 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
277 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
280 ; AVX2-LABEL: shuffle_v8f32_08192a3b:
282 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
283 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
284 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
285 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
286 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
288 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
289 ret <8 x float> %shuffle
292 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
293 ; AVX1-LABEL: shuffle_v8f32_08991abb:
295 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
296 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
297 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
298 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
299 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
302 ; AVX2-LABEL: shuffle_v8f32_08991abb:
304 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
305 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
306 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
307 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
308 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
310 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
311 ret <8 x float> %shuffle
314 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
315 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
317 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
318 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
319 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
320 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
323 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
325 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
326 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
327 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
329 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
330 ret <8 x float> %shuffle
333 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
334 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
336 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
337 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
338 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
341 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
343 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
344 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
345 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
347 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
348 ret <8 x float> %shuffle
351 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
352 ; ALL-LABEL: shuffle_v8f32_00014445:
354 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
356 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
357 ret <8 x float> %shuffle
360 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
361 ; ALL-LABEL: shuffle_v8f32_00204464:
363 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
365 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
366 ret <8 x float> %shuffle
369 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
370 ; ALL-LABEL: shuffle_v8f32_03004744:
372 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
374 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
375 ret <8 x float> %shuffle
378 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
379 ; ALL-LABEL: shuffle_v8f32_10005444:
381 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
383 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
384 ret <8 x float> %shuffle
387 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
388 ; ALL-LABEL: shuffle_v8f32_22006644:
390 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
392 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
393 ret <8 x float> %shuffle
396 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
397 ; ALL-LABEL: shuffle_v8f32_33307774:
399 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
401 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
402 ret <8 x float> %shuffle
405 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
406 ; ALL-LABEL: shuffle_v8f32_32107654:
408 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
410 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
411 ret <8 x float> %shuffle
414 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
415 ; ALL-LABEL: shuffle_v8f32_00234467:
417 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
419 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
420 ret <8 x float> %shuffle
423 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
424 ; ALL-LABEL: shuffle_v8f32_00224466:
426 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
428 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
429 ret <8 x float> %shuffle
432 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
433 ; ALL-LABEL: shuffle_v8f32_10325476:
435 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
437 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
438 ret <8 x float> %shuffle
441 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
442 ; ALL-LABEL: shuffle_v8f32_11335577:
444 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
446 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
447 ret <8 x float> %shuffle
450 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
451 ; ALL-LABEL: shuffle_v8f32_10235467:
453 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
455 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
456 ret <8 x float> %shuffle
459 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
460 ; ALL-LABEL: shuffle_v8f32_10225466:
462 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
464 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
465 ret <8 x float> %shuffle
468 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
469 ; ALL-LABEL: shuffle_v8f32_00015444:
471 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
473 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
474 ret <8 x float> %shuffle
477 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
478 ; ALL-LABEL: shuffle_v8f32_00204644:
480 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
482 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
483 ret <8 x float> %shuffle
486 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
487 ; ALL-LABEL: shuffle_v8f32_03004474:
489 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
491 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
492 ret <8 x float> %shuffle
495 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
496 ; ALL-LABEL: shuffle_v8f32_10004444:
498 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
500 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
501 ret <8 x float> %shuffle
504 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
505 ; ALL-LABEL: shuffle_v8f32_22006446:
507 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
509 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
510 ret <8 x float> %shuffle
513 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
514 ; ALL-LABEL: shuffle_v8f32_33307474:
516 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
518 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
519 ret <8 x float> %shuffle
522 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
523 ; ALL-LABEL: shuffle_v8f32_32104567:
525 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
527 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
528 ret <8 x float> %shuffle
531 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
532 ; ALL-LABEL: shuffle_v8f32_00236744:
534 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
536 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
537 ret <8 x float> %shuffle
540 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
541 ; ALL-LABEL: shuffle_v8f32_00226644:
543 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
545 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
546 ret <8 x float> %shuffle
549 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
550 ; ALL-LABEL: shuffle_v8f32_10324567:
552 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
554 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
555 ret <8 x float> %shuffle
558 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
559 ; ALL-LABEL: shuffle_v8f32_11334567:
561 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
563 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
564 ret <8 x float> %shuffle
567 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
568 ; ALL-LABEL: shuffle_v8f32_01235467:
570 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
572 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
573 ret <8 x float> %shuffle
576 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
577 ; ALL-LABEL: shuffle_v8f32_01235466:
579 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
581 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
582 ret <8 x float> %shuffle
585 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
586 ; ALL-LABEL: shuffle_v8f32_002u6u44:
588 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
590 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
591 ret <8 x float> %shuffle
594 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
595 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
597 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
599 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
600 ret <8 x float> %shuffle
603 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
604 ; ALL-LABEL: shuffle_v8f32_103245uu:
606 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
608 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
609 ret <8 x float> %shuffle
612 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
613 ; ALL-LABEL: shuffle_v8f32_1133uu67:
615 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
617 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
618 ret <8 x float> %shuffle
621 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
622 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
624 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
626 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
627 ret <8 x float> %shuffle
630 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
631 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
633 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
635 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
636 ret <8 x float> %shuffle
639 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
640 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
642 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
643 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
644 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
645 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
646 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
647 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
650 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
652 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
653 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
654 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
655 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
656 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
658 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
659 ret <8 x float> %shuffle
662 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
663 ; AVX1-LABEL: shuffle_v8f32_f511235a:
665 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
666 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
667 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
668 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
669 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
670 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
671 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
674 ; AVX2-LABEL: shuffle_v8f32_f511235a:
676 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
677 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
678 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
679 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
680 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
682 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
683 ret <8 x float> %shuffle
686 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
687 ; AVX1-LABEL: shuffle_v8f32_32103210:
689 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
690 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
693 ; AVX2-LABEL: shuffle_v8f32_32103210:
695 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
696 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
698 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
699 ret <8 x float> %shuffle
702 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
703 ; AVX1-LABEL: shuffle_v8f32_76547654:
705 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
706 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
707 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
710 ; AVX2-LABEL: shuffle_v8f32_76547654:
712 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
713 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
715 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
716 ret <8 x float> %shuffle
719 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
720 ; AVX1-LABEL: shuffle_v8f32_76543210:
722 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
723 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
726 ; AVX2-LABEL: shuffle_v8f32_76543210:
728 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
729 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
731 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
732 ret <8 x float> %shuffle
735 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
736 ; ALL-LABEL: shuffle_v8f32_3210ba98:
738 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
739 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
741 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
742 ret <8 x float> %shuffle
745 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
746 ; ALL-LABEL: shuffle_v8f32_3210fedc:
748 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
749 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
751 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
752 ret <8 x float> %shuffle
755 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
756 ; ALL-LABEL: shuffle_v8f32_7654fedc:
758 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
759 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
761 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
762 ret <8 x float> %shuffle
765 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
766 ; ALL-LABEL: shuffle_v8f32_fedc7654:
768 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
769 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
771 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
772 ret <8 x float> %shuffle
775 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
776 ; AVX1-LABEL: PR21138:
778 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
779 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
780 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
781 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
782 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
783 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
786 ; AVX2-LABEL: PR21138:
788 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7>
789 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
790 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u>
791 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
792 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
794 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
795 ret <8 x float> %shuffle
798 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
799 ; ALL-LABEL: shuffle_v8f32_ba987654:
801 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
802 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
804 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
805 ret <8 x float> %shuffle
808 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
809 ; ALL-LABEL: shuffle_v8f32_ba983210:
811 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
812 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
814 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
815 ret <8 x float> %shuffle
818 define <8 x float> @shuffle_v8f32_80u1b4uu(<8 x float> %a, <8 x float> %b) {
819 ; ALL-LABEL: shuffle_v8f32_80u1b4uu:
821 ; ALL-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[0,0],ymm1[4,4],ymm0[4,4]
822 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[2,1],ymm1[4,6],ymm0[6,5]
824 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
825 ret <8 x float> %shuffle
828 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
829 ; AVX1-LABEL: shuffle_v8i32_00000000:
831 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
832 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
835 ; AVX2-LABEL: shuffle_v8i32_00000000:
837 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
839 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
840 ret <8 x i32> %shuffle
843 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
844 ; AVX1-LABEL: shuffle_v8i32_00000010:
846 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
847 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
848 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
851 ; AVX2-LABEL: shuffle_v8i32_00000010:
853 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
854 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
856 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
857 ret <8 x i32> %shuffle
860 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
861 ; AVX1-LABEL: shuffle_v8i32_00000200:
863 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
864 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
865 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
868 ; AVX2-LABEL: shuffle_v8i32_00000200:
870 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
871 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
873 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
874 ret <8 x i32> %shuffle
877 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
878 ; AVX1-LABEL: shuffle_v8i32_00003000:
880 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
881 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
882 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
885 ; AVX2-LABEL: shuffle_v8i32_00003000:
887 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
888 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
890 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
891 ret <8 x i32> %shuffle
894 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
895 ; AVX1-LABEL: shuffle_v8i32_00040000:
897 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
898 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
899 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
900 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
903 ; AVX2-LABEL: shuffle_v8i32_00040000:
905 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
906 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
908 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
909 ret <8 x i32> %shuffle
912 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
913 ; AVX1-LABEL: shuffle_v8i32_00500000:
915 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
916 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
917 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
920 ; AVX2-LABEL: shuffle_v8i32_00500000:
922 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
923 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
925 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
926 ret <8 x i32> %shuffle
929 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
930 ; AVX1-LABEL: shuffle_v8i32_06000000:
932 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
933 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
934 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
937 ; AVX2-LABEL: shuffle_v8i32_06000000:
939 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
940 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
942 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
943 ret <8 x i32> %shuffle
946 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
947 ; AVX1-LABEL: shuffle_v8i32_70000000:
949 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
950 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
951 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
954 ; AVX2-LABEL: shuffle_v8i32_70000000:
956 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
957 ; AVX2-NEXT: movl $7, %eax
958 ; AVX2-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
959 ; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
960 ; AVX2-NEXT: vinserti128 $0, %xmm1, %ymm2, %ymm1
961 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
963 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
964 ret <8 x i32> %shuffle
967 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
968 ; AVX1-LABEL: shuffle_v8i32_01014545:
970 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
973 ; AVX2-LABEL: shuffle_v8i32_01014545:
975 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
977 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
978 ret <8 x i32> %shuffle
981 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
982 ; AVX1-LABEL: shuffle_v8i32_00112233:
984 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
985 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
986 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
989 ; AVX2-LABEL: shuffle_v8i32_00112233:
991 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
992 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
994 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
995 ret <8 x i32> %shuffle
998 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
999 ; AVX1-LABEL: shuffle_v8i32_00001111:
1001 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1002 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1003 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1006 ; AVX2-LABEL: shuffle_v8i32_00001111:
1008 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1009 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1011 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1012 ret <8 x i32> %shuffle
1015 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
1016 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
1018 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1021 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
1023 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1025 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1026 ret <8 x i32> %shuffle
1029 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
1030 ; AVX1-LABEL: shuffle_v8i32_08080808:
1032 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
1033 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
1034 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1035 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1036 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1039 ; AVX2-LABEL: shuffle_v8i32_08080808:
1041 ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
1042 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
1043 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1045 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1046 ret <8 x i32> %shuffle
1049 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1050 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
1052 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1053 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1056 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
1058 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1059 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1060 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1062 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1063 ret <8 x i32> %shuffle
1066 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
1067 ; AVX1-LABEL: shuffle_v8i32_8823cc67:
1069 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1072 ; AVX2-LABEL: shuffle_v8i32_8823cc67:
1074 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1075 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1077 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1078 ret <8 x i32> %shuffle
1081 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
1082 ; AVX1-LABEL: shuffle_v8i32_9832dc76:
1084 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1087 ; AVX2-LABEL: shuffle_v8i32_9832dc76:
1089 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1090 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1092 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1093 ret <8 x i32> %shuffle
1096 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
1097 ; AVX1-LABEL: shuffle_v8i32_9810dc54:
1099 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1102 ; AVX2-LABEL: shuffle_v8i32_9810dc54:
1104 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1105 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1106 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1108 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1109 ret <8 x i32> %shuffle
1112 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1113 ; AVX1-LABEL: shuffle_v8i32_08194c5d:
1115 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1118 ; AVX2-LABEL: shuffle_v8i32_08194c5d:
1120 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1122 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1123 ret <8 x i32> %shuffle
1126 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1127 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
1129 ; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1132 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
1134 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1136 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1137 ret <8 x i32> %shuffle
1140 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1141 ; AVX1-LABEL: shuffle_v8i32_08192a3b:
1143 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1144 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1145 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1148 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
1150 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1151 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1152 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1153 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1155 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1156 ret <8 x i32> %shuffle
1159 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
1160 ; AVX1-LABEL: shuffle_v8i32_08991abb:
1162 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
1163 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
1164 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1165 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
1166 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1169 ; AVX2-LABEL: shuffle_v8i32_08991abb:
1171 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1172 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1173 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1174 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1175 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1177 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1178 ret <8 x i32> %shuffle
1181 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
1182 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
1184 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
1185 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
1186 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1187 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1190 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
1192 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1193 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1195 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1196 ret <8 x i32> %shuffle
1199 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
1200 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
1202 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1203 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1204 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1207 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
1209 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1210 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1211 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1213 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1214 ret <8 x i32> %shuffle
1217 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
1218 ; AVX1-LABEL: shuffle_v8i32_00014445:
1220 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1223 ; AVX2-LABEL: shuffle_v8i32_00014445:
1225 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1227 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1228 ret <8 x i32> %shuffle
1231 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
1232 ; AVX1-LABEL: shuffle_v8i32_00204464:
1234 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1237 ; AVX2-LABEL: shuffle_v8i32_00204464:
1239 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1241 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1242 ret <8 x i32> %shuffle
1245 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
1246 ; AVX1-LABEL: shuffle_v8i32_03004744:
1248 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1251 ; AVX2-LABEL: shuffle_v8i32_03004744:
1253 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1255 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1256 ret <8 x i32> %shuffle
1259 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
1260 ; AVX1-LABEL: shuffle_v8i32_10005444:
1262 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1265 ; AVX2-LABEL: shuffle_v8i32_10005444:
1267 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1269 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1270 ret <8 x i32> %shuffle
1273 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
1274 ; AVX1-LABEL: shuffle_v8i32_22006644:
1276 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1279 ; AVX2-LABEL: shuffle_v8i32_22006644:
1281 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1283 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1284 ret <8 x i32> %shuffle
1287 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
1288 ; AVX1-LABEL: shuffle_v8i32_33307774:
1290 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1293 ; AVX2-LABEL: shuffle_v8i32_33307774:
1295 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1297 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1298 ret <8 x i32> %shuffle
1301 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
1302 ; AVX1-LABEL: shuffle_v8i32_32107654:
1304 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1307 ; AVX2-LABEL: shuffle_v8i32_32107654:
1309 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1311 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1312 ret <8 x i32> %shuffle
1315 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
1316 ; AVX1-LABEL: shuffle_v8i32_00234467:
1318 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1321 ; AVX2-LABEL: shuffle_v8i32_00234467:
1323 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1325 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1326 ret <8 x i32> %shuffle
1329 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
1330 ; AVX1-LABEL: shuffle_v8i32_00224466:
1332 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1335 ; AVX2-LABEL: shuffle_v8i32_00224466:
1337 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1339 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1340 ret <8 x i32> %shuffle
1343 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
1344 ; AVX1-LABEL: shuffle_v8i32_10325476:
1346 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1349 ; AVX2-LABEL: shuffle_v8i32_10325476:
1351 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1353 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1354 ret <8 x i32> %shuffle
1357 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
1358 ; AVX1-LABEL: shuffle_v8i32_11335577:
1360 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1363 ; AVX2-LABEL: shuffle_v8i32_11335577:
1365 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1367 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1368 ret <8 x i32> %shuffle
1371 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
1372 ; AVX1-LABEL: shuffle_v8i32_10235467:
1374 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1377 ; AVX2-LABEL: shuffle_v8i32_10235467:
1379 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1381 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1382 ret <8 x i32> %shuffle
1385 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
1386 ; AVX1-LABEL: shuffle_v8i32_10225466:
1388 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1391 ; AVX2-LABEL: shuffle_v8i32_10225466:
1393 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1395 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1396 ret <8 x i32> %shuffle
1399 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
1400 ; AVX1-LABEL: shuffle_v8i32_00015444:
1402 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1405 ; AVX2-LABEL: shuffle_v8i32_00015444:
1407 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1408 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1410 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1411 ret <8 x i32> %shuffle
1414 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
1415 ; AVX1-LABEL: shuffle_v8i32_00204644:
1417 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1420 ; AVX2-LABEL: shuffle_v8i32_00204644:
1422 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1423 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1425 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1426 ret <8 x i32> %shuffle
1429 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
1430 ; AVX1-LABEL: shuffle_v8i32_03004474:
1432 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1435 ; AVX2-LABEL: shuffle_v8i32_03004474:
1437 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1438 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1440 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1441 ret <8 x i32> %shuffle
1444 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
1445 ; AVX1-LABEL: shuffle_v8i32_10004444:
1447 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1450 ; AVX2-LABEL: shuffle_v8i32_10004444:
1452 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1453 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1455 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1456 ret <8 x i32> %shuffle
1459 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
1460 ; AVX1-LABEL: shuffle_v8i32_22006446:
1462 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1465 ; AVX2-LABEL: shuffle_v8i32_22006446:
1467 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1468 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1470 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1471 ret <8 x i32> %shuffle
1474 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
1475 ; AVX1-LABEL: shuffle_v8i32_33307474:
1477 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1480 ; AVX2-LABEL: shuffle_v8i32_33307474:
1482 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1483 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1485 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1486 ret <8 x i32> %shuffle
1489 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
1490 ; AVX1-LABEL: shuffle_v8i32_32104567:
1492 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1495 ; AVX2-LABEL: shuffle_v8i32_32104567:
1497 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1498 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1500 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1501 ret <8 x i32> %shuffle
1504 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
1505 ; AVX1-LABEL: shuffle_v8i32_00236744:
1507 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1510 ; AVX2-LABEL: shuffle_v8i32_00236744:
1512 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1513 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1515 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1516 ret <8 x i32> %shuffle
1519 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
1520 ; AVX1-LABEL: shuffle_v8i32_00226644:
1522 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1525 ; AVX2-LABEL: shuffle_v8i32_00226644:
1527 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1528 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1530 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1531 ret <8 x i32> %shuffle
1534 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
1535 ; AVX1-LABEL: shuffle_v8i32_10324567:
1537 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1540 ; AVX2-LABEL: shuffle_v8i32_10324567:
1542 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1543 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1545 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1546 ret <8 x i32> %shuffle
1549 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
1550 ; AVX1-LABEL: shuffle_v8i32_11334567:
1552 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1555 ; AVX2-LABEL: shuffle_v8i32_11334567:
1557 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1558 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1560 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1561 ret <8 x i32> %shuffle
1564 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
1565 ; AVX1-LABEL: shuffle_v8i32_01235467:
1567 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1570 ; AVX2-LABEL: shuffle_v8i32_01235467:
1572 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1573 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1575 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1576 ret <8 x i32> %shuffle
1579 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
1580 ; AVX1-LABEL: shuffle_v8i32_01235466:
1582 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1585 ; AVX2-LABEL: shuffle_v8i32_01235466:
1587 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1588 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1590 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1591 ret <8 x i32> %shuffle
1594 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
1595 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
1597 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1600 ; AVX2-LABEL: shuffle_v8i32_002u6u44:
1602 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1603 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1605 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1606 ret <8 x i32> %shuffle
1609 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
1610 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
1612 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1615 ; AVX2-LABEL: shuffle_v8i32_00uu66uu:
1617 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1618 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1620 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1621 ret <8 x i32> %shuffle
1624 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
1625 ; AVX1-LABEL: shuffle_v8i32_103245uu:
1627 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1630 ; AVX2-LABEL: shuffle_v8i32_103245uu:
1632 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1633 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1635 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1636 ret <8 x i32> %shuffle
1639 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
1640 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
1642 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1645 ; AVX2-LABEL: shuffle_v8i32_1133uu67:
1647 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1648 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1650 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1651 ret <8 x i32> %shuffle
1654 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
1655 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
1657 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1660 ; AVX2-LABEL: shuffle_v8i32_0uu354uu:
1662 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1663 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1665 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1666 ret <8 x i32> %shuffle
1669 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
1670 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
1672 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1675 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
1677 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1678 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1680 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1681 ret <8 x i32> %shuffle
1684 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
1685 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
1687 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1688 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1689 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1690 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1691 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1692 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1695 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1697 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
1698 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1699 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1700 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1702 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1703 ret <8 x i32> %shuffle
1706 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
1707 ; AVX1-LABEL: shuffle_v8i32_32103210:
1709 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1710 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1713 ; AVX2-LABEL: shuffle_v8i32_32103210:
1715 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
1716 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1718 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1719 ret <8 x i32> %shuffle
1722 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
1723 ; AVX1-LABEL: shuffle_v8i32_76547654:
1725 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1726 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1727 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1730 ; AVX2-LABEL: shuffle_v8i32_76547654:
1732 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
1733 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1735 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1736 ret <8 x i32> %shuffle
1739 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
1740 ; AVX1-LABEL: shuffle_v8i32_76543210:
1742 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1743 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1746 ; AVX2-LABEL: shuffle_v8i32_76543210:
1748 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
1749 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1751 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1752 ret <8 x i32> %shuffle
1755 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
1756 ; AVX1-LABEL: shuffle_v8i32_3210ba98:
1758 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1759 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1762 ; AVX2-LABEL: shuffle_v8i32_3210ba98:
1764 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1765 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1767 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1768 ret <8 x i32> %shuffle
1771 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
1772 ; AVX1-LABEL: shuffle_v8i32_3210fedc:
1774 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1775 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1778 ; AVX2-LABEL: shuffle_v8i32_3210fedc:
1780 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1781 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1783 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1784 ret <8 x i32> %shuffle
1787 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
1788 ; AVX1-LABEL: shuffle_v8i32_7654fedc:
1790 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1791 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1794 ; AVX2-LABEL: shuffle_v8i32_7654fedc:
1796 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1797 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1799 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1800 ret <8 x i32> %shuffle
1803 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
1804 ; AVX1-LABEL: shuffle_v8i32_fedc7654:
1806 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1807 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1810 ; AVX2-LABEL: shuffle_v8i32_fedc7654:
1812 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1813 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1815 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1816 ret <8 x i32> %shuffle
1819 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
1820 ; AVX1-LABEL: shuffle_v8i32_ba987654:
1822 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1823 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1826 ; AVX2-LABEL: shuffle_v8i32_ba987654:
1828 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1829 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1831 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1832 ret <8 x i32> %shuffle
1835 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
1836 ; AVX1-LABEL: shuffle_v8i32_ba983210:
1838 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1839 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1842 ; AVX2-LABEL: shuffle_v8i32_ba983210:
1844 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1845 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1847 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1848 ret <8 x i32> %shuffle
1851 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
1852 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
1854 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1855 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
1858 ; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
1860 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
1862 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
1863 ret <8 x i32> %shuffle
1866 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
1867 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
1869 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1870 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
1871 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1874 ; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
1876 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
1878 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
1879 ret <8 x i32> %shuffle
1882 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
1883 ; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
1885 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[0,0],ymm1[4,4],ymm0[4,4]
1886 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[2,1],ymm1[4,6],ymm0[6,5]
1889 ; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
1891 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,1,4,4,6,5]
1892 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7]
1894 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
1895 ret <8 x i32> %shuffle
1898 define <8 x float> @splat_mem_v8f32_2(float* %p) {
1899 ; ALL-LABEL: splat_mem_v8f32_2:
1901 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
1904 %2 = insertelement <4 x float> undef, float %1, i32 0
1905 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
1909 define <8 x float> @splat_v8f32(<4 x float> %r) {
1910 ; AVX1-LABEL: splat_v8f32:
1912 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1913 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1916 ; AVX2-LABEL: splat_v8f32:
1918 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
1920 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
1925 ; Shuffle to logical bit shifts
1928 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
1929 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
1931 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1932 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
1933 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
1936 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
1938 ; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
1940 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
1941 ret <8 x i32> %shuffle
1944 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
1945 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
1947 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1948 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1949 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1952 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
1954 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
1956 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
1957 ret <8 x i32> %shuffle
1960 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
1961 ; AVX1-LABEL: shuffle_v8i32_B012F456:
1963 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
1964 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
1967 ; AVX2-LABEL: shuffle_v8i32_B012F456:
1969 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
1971 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
1972 ret <8 x i32> %shuffle
1975 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
1976 ; AVX1-LABEL: shuffle_v8i32_1238567C:
1978 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
1979 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1982 ; AVX2-LABEL: shuffle_v8i32_1238567C:
1984 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
1986 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
1987 ret <8 x i32> %shuffle
1990 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
1991 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
1993 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
1994 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
1997 ; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
1999 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
2001 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
2002 ret <8 x i32> %shuffle
2005 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
2006 ; AVX1-LABEL: shuffle_v8i32_389A7CDE:
2008 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
2009 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
2012 ; AVX2-LABEL: shuffle_v8i32_389A7CDE:
2014 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
2016 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
2017 ret <8 x i32> %shuffle
2020 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
2021 ; AVX1-LABEL: shuffle_v8i32_30127456:
2023 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2026 ; AVX2-LABEL: shuffle_v8i32_30127456:
2028 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2030 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
2031 ret <8 x i32> %shuffle
2034 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
2035 ; AVX1-LABEL: shuffle_v8i32_12305674:
2037 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2040 ; AVX2-LABEL: shuffle_v8i32_12305674:
2042 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2044 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
2045 ret <8 x i32> %shuffle
2048 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2049 ; ALL-LABEL: concat_v2f32_1:
2050 ; ALL: # BB#0: # %entry
2051 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2052 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2055 %tmp74 = load <2 x float>* %tmp65, align 8
2056 %tmp72 = load <2 x float>* %tmp64, align 8
2057 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2058 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2059 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
2060 ret <8 x float> %tmp76
2063 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2064 ; ALL-LABEL: concat_v2f32_2:
2065 ; ALL: # BB#0: # %entry
2066 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2067 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2070 %tmp74 = load <2 x float>* %tmp65, align 8
2071 %tmp72 = load <2 x float>* %tmp64, align 8
2072 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2073 ret <8 x float> %tmp76
2076 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2077 ; ALL-LABEL: concat_v2f32_3:
2078 ; ALL: # BB#0: # %entry
2079 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2080 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2083 %tmp74 = load <2 x float>* %tmp65, align 8
2084 %tmp72 = load <2 x float>* %tmp64, align 8
2085 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2086 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2087 ret <8 x float> %res