1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4 target triple = "x86_64-unknown-unknown"
6 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
7 ; AVX1-LABEL: shuffle_v8f32_00000000:
9 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX2-LABEL: shuffle_v8f32_00000000:
15 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
17 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
18 ret <8 x float> %shuffle
21 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
22 ; AVX1-LABEL: shuffle_v8f32_00000010:
24 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
25 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
26 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
29 ; AVX2-LABEL: shuffle_v8f32_00000010:
31 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
32 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
34 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
35 ret <8 x float> %shuffle
38 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
39 ; AVX1-LABEL: shuffle_v8f32_00000200:
41 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
42 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
43 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
46 ; AVX2-LABEL: shuffle_v8f32_00000200:
48 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
49 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
51 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
52 ret <8 x float> %shuffle
55 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
56 ; AVX1-LABEL: shuffle_v8f32_00003000:
58 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
59 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
60 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
63 ; AVX2-LABEL: shuffle_v8f32_00003000:
65 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
66 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
68 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
69 ret <8 x float> %shuffle
72 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
73 ; AVX1-LABEL: shuffle_v8f32_00040000:
75 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
76 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
77 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
78 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
81 ; AVX2-LABEL: shuffle_v8f32_00040000:
83 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
84 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
86 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
87 ret <8 x float> %shuffle
90 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
91 ; AVX1-LABEL: shuffle_v8f32_00500000:
93 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
94 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
95 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
98 ; AVX2-LABEL: shuffle_v8f32_00500000:
100 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
101 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
103 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
104 ret <8 x float> %shuffle
107 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
108 ; AVX1-LABEL: shuffle_v8f32_06000000:
110 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
111 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
112 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
115 ; AVX2-LABEL: shuffle_v8f32_06000000:
117 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
118 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
120 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
121 ret <8 x float> %shuffle
124 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
125 ; AVX1-LABEL: shuffle_v8f32_70000000:
127 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
128 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
129 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
132 ; AVX2-LABEL: shuffle_v8f32_70000000:
134 ; AVX2-NEXT: movl $7, %eax
135 ; AVX2-NEXT: vmovd %eax, %xmm1
136 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
138 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
139 ret <8 x float> %shuffle
142 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
143 ; ALL-LABEL: shuffle_v8f32_01014545:
145 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
147 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
148 ret <8 x float> %shuffle
151 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
152 ; AVX1-LABEL: shuffle_v8f32_00112233:
154 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
155 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
156 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
159 ; AVX2-LABEL: shuffle_v8f32_00112233:
161 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
162 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
164 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
165 ret <8 x float> %shuffle
168 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
169 ; AVX1-LABEL: shuffle_v8f32_00001111:
171 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
172 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
173 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
176 ; AVX2-LABEL: shuffle_v8f32_00001111:
178 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
179 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
181 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
182 ret <8 x float> %shuffle
185 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
186 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
188 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
190 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
191 ret <8 x float> %shuffle
194 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
195 ; AVX1-LABEL: shuffle_v8f32_08080808:
197 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
198 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
199 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
200 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
201 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
204 ; AVX2-LABEL: shuffle_v8f32_08080808:
206 ; AVX2-NEXT: vbroadcastss %xmm1, %ymm1
207 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
208 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
210 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
211 ret <8 x float> %shuffle
214 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
215 ; ALL-LABEL: shuffle_v8f32_08084c4c:
217 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
218 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
220 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
221 ret <8 x float> %shuffle
224 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
225 ; ALL-LABEL: shuffle_v8f32_8823cc67:
227 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
229 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
230 ret <8 x float> %shuffle
233 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
234 ; ALL-LABEL: shuffle_v8f32_9832dc76:
236 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
238 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
239 ret <8 x float> %shuffle
242 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
243 ; ALL-LABEL: shuffle_v8f32_9810dc54:
245 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
247 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
248 ret <8 x float> %shuffle
251 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
252 ; ALL-LABEL: shuffle_v8f32_08194c5d:
254 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
256 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
257 ret <8 x float> %shuffle
260 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
261 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
263 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
265 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
266 ret <8 x float> %shuffle
269 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
270 ; AVX1-LABEL: shuffle_v8f32_08192a3b:
272 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
273 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
274 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
277 ; AVX2-LABEL: shuffle_v8f32_08192a3b:
279 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
280 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
281 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
282 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
283 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
285 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
286 ret <8 x float> %shuffle
289 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
290 ; AVX1-LABEL: shuffle_v8f32_08991abb:
292 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
293 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
294 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
295 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
296 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
299 ; AVX2-LABEL: shuffle_v8f32_08991abb:
301 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
302 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
303 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
304 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
305 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
307 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
308 ret <8 x float> %shuffle
311 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
312 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
314 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
315 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
316 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
317 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
320 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
322 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
323 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
324 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
326 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
327 ret <8 x float> %shuffle
330 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
331 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
333 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
334 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
335 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
338 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
340 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
341 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
342 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
344 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
345 ret <8 x float> %shuffle
348 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
349 ; ALL-LABEL: shuffle_v8f32_00014445:
351 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
353 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
354 ret <8 x float> %shuffle
357 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
358 ; ALL-LABEL: shuffle_v8f32_00204464:
360 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
362 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
363 ret <8 x float> %shuffle
366 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
367 ; ALL-LABEL: shuffle_v8f32_03004744:
369 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
371 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
372 ret <8 x float> %shuffle
375 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
376 ; ALL-LABEL: shuffle_v8f32_10005444:
378 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
380 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
381 ret <8 x float> %shuffle
384 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
385 ; ALL-LABEL: shuffle_v8f32_22006644:
387 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
389 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
390 ret <8 x float> %shuffle
393 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
394 ; ALL-LABEL: shuffle_v8f32_33307774:
396 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
398 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
399 ret <8 x float> %shuffle
402 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
403 ; ALL-LABEL: shuffle_v8f32_32107654:
405 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
407 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
408 ret <8 x float> %shuffle
411 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
412 ; ALL-LABEL: shuffle_v8f32_00234467:
414 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
416 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
417 ret <8 x float> %shuffle
420 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
421 ; ALL-LABEL: shuffle_v8f32_00224466:
423 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
425 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
426 ret <8 x float> %shuffle
429 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
430 ; ALL-LABEL: shuffle_v8f32_10325476:
432 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
434 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
435 ret <8 x float> %shuffle
438 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
439 ; ALL-LABEL: shuffle_v8f32_11335577:
441 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
443 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
444 ret <8 x float> %shuffle
447 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
448 ; ALL-LABEL: shuffle_v8f32_10235467:
450 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
452 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
453 ret <8 x float> %shuffle
456 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
457 ; ALL-LABEL: shuffle_v8f32_10225466:
459 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
461 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
462 ret <8 x float> %shuffle
465 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
466 ; ALL-LABEL: shuffle_v8f32_00015444:
468 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
470 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
471 ret <8 x float> %shuffle
474 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
475 ; ALL-LABEL: shuffle_v8f32_00204644:
477 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
479 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
480 ret <8 x float> %shuffle
483 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
484 ; ALL-LABEL: shuffle_v8f32_03004474:
486 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
488 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
489 ret <8 x float> %shuffle
492 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
493 ; ALL-LABEL: shuffle_v8f32_10004444:
495 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
497 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
498 ret <8 x float> %shuffle
501 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
502 ; ALL-LABEL: shuffle_v8f32_22006446:
504 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
506 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
507 ret <8 x float> %shuffle
510 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
511 ; ALL-LABEL: shuffle_v8f32_33307474:
513 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
515 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
516 ret <8 x float> %shuffle
519 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
520 ; ALL-LABEL: shuffle_v8f32_32104567:
522 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
524 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
525 ret <8 x float> %shuffle
528 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
529 ; ALL-LABEL: shuffle_v8f32_00236744:
531 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
533 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
534 ret <8 x float> %shuffle
537 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
538 ; ALL-LABEL: shuffle_v8f32_00226644:
540 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
542 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
543 ret <8 x float> %shuffle
546 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
547 ; ALL-LABEL: shuffle_v8f32_10324567:
549 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
551 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
552 ret <8 x float> %shuffle
555 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
556 ; ALL-LABEL: shuffle_v8f32_11334567:
558 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
560 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
561 ret <8 x float> %shuffle
564 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
565 ; ALL-LABEL: shuffle_v8f32_01235467:
567 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
569 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
570 ret <8 x float> %shuffle
573 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
574 ; ALL-LABEL: shuffle_v8f32_01235466:
576 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
578 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
579 ret <8 x float> %shuffle
582 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
583 ; ALL-LABEL: shuffle_v8f32_002u6u44:
585 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
587 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
588 ret <8 x float> %shuffle
591 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
592 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
594 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
596 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
597 ret <8 x float> %shuffle
600 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
601 ; ALL-LABEL: shuffle_v8f32_103245uu:
603 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
605 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
606 ret <8 x float> %shuffle
609 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
610 ; ALL-LABEL: shuffle_v8f32_1133uu67:
612 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
614 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
615 ret <8 x float> %shuffle
618 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
619 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
621 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
623 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
624 ret <8 x float> %shuffle
627 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
628 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
630 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
632 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
633 ret <8 x float> %shuffle
636 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
637 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
639 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
640 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
641 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
642 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
643 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
644 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
647 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
649 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
650 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
651 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
652 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
653 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
655 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
656 ret <8 x float> %shuffle
659 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
660 ; AVX1-LABEL: shuffle_v8f32_f511235a:
662 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
663 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
664 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
665 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
666 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
667 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
668 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
671 ; AVX2-LABEL: shuffle_v8f32_f511235a:
673 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
674 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
675 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
676 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
677 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
679 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
680 ret <8 x float> %shuffle
683 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
684 ; AVX1-LABEL: shuffle_v8f32_32103210:
686 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
687 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
690 ; AVX2-LABEL: shuffle_v8f32_32103210:
692 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
693 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
695 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
696 ret <8 x float> %shuffle
699 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
700 ; AVX1-LABEL: shuffle_v8f32_76547654:
702 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
703 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
704 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
707 ; AVX2-LABEL: shuffle_v8f32_76547654:
709 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
710 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
712 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
713 ret <8 x float> %shuffle
716 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
717 ; AVX1-LABEL: shuffle_v8f32_76543210:
719 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
720 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
723 ; AVX2-LABEL: shuffle_v8f32_76543210:
725 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
726 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
728 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
729 ret <8 x float> %shuffle
732 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
733 ; ALL-LABEL: shuffle_v8f32_3210ba98:
735 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
736 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
738 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
739 ret <8 x float> %shuffle
742 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
743 ; ALL-LABEL: shuffle_v8f32_3210fedc:
745 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
746 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
748 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
749 ret <8 x float> %shuffle
752 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
753 ; ALL-LABEL: shuffle_v8f32_7654fedc:
755 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
756 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
758 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
759 ret <8 x float> %shuffle
762 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
763 ; ALL-LABEL: shuffle_v8f32_fedc7654:
765 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
766 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
768 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
769 ret <8 x float> %shuffle
772 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
773 ; AVX1-LABEL: PR21138:
775 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
776 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
777 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
778 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
779 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
780 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
783 ; AVX2-LABEL: PR21138:
785 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7>
786 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
787 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u>
788 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
789 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
791 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
792 ret <8 x float> %shuffle
795 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
796 ; ALL-LABEL: shuffle_v8f32_ba987654:
798 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
799 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
801 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
802 ret <8 x float> %shuffle
805 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
806 ; ALL-LABEL: shuffle_v8f32_ba983210:
808 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
809 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
811 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
812 ret <8 x float> %shuffle
815 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
816 ; ALL-LABEL: shuffle_v8f32_80u1c4u5:
818 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
820 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
821 ret <8 x float> %shuffle
824 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
825 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
827 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
829 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
830 ret <8 x float> %shuffle
833 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
834 ; AVX1-LABEL: shuffle_v8i32_00000000:
836 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
837 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
840 ; AVX2-LABEL: shuffle_v8i32_00000000:
842 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
844 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
845 ret <8 x i32> %shuffle
848 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
849 ; AVX1-LABEL: shuffle_v8i32_00000010:
851 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
852 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
853 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
856 ; AVX2-LABEL: shuffle_v8i32_00000010:
858 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
859 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
861 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
862 ret <8 x i32> %shuffle
865 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
866 ; AVX1-LABEL: shuffle_v8i32_00000200:
868 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
869 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
870 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
873 ; AVX2-LABEL: shuffle_v8i32_00000200:
875 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
876 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
878 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
879 ret <8 x i32> %shuffle
882 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
883 ; AVX1-LABEL: shuffle_v8i32_00003000:
885 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
886 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
887 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
890 ; AVX2-LABEL: shuffle_v8i32_00003000:
892 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
893 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
895 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
896 ret <8 x i32> %shuffle
899 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
900 ; AVX1-LABEL: shuffle_v8i32_00040000:
902 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
903 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
904 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
905 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
908 ; AVX2-LABEL: shuffle_v8i32_00040000:
910 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
911 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
913 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
914 ret <8 x i32> %shuffle
917 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
918 ; AVX1-LABEL: shuffle_v8i32_00500000:
920 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
921 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
922 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
925 ; AVX2-LABEL: shuffle_v8i32_00500000:
927 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
928 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
930 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
931 ret <8 x i32> %shuffle
934 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
935 ; AVX1-LABEL: shuffle_v8i32_06000000:
937 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
938 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
939 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
942 ; AVX2-LABEL: shuffle_v8i32_06000000:
944 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
945 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
947 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
948 ret <8 x i32> %shuffle
951 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
952 ; AVX1-LABEL: shuffle_v8i32_70000000:
954 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
955 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
956 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
959 ; AVX2-LABEL: shuffle_v8i32_70000000:
961 ; AVX2-NEXT: movl $7, %eax
962 ; AVX2-NEXT: vmovd %eax, %xmm1
963 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
965 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
966 ret <8 x i32> %shuffle
969 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
970 ; AVX1-LABEL: shuffle_v8i32_01014545:
972 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
975 ; AVX2-LABEL: shuffle_v8i32_01014545:
977 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
979 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
980 ret <8 x i32> %shuffle
983 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
984 ; AVX1-LABEL: shuffle_v8i32_00112233:
986 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
987 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
988 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
991 ; AVX2-LABEL: shuffle_v8i32_00112233:
993 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
994 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
996 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
997 ret <8 x i32> %shuffle
1000 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
1001 ; AVX1-LABEL: shuffle_v8i32_00001111:
1003 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1004 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1005 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1008 ; AVX2-LABEL: shuffle_v8i32_00001111:
1010 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1011 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1013 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1014 ret <8 x i32> %shuffle
1017 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
1018 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
1020 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1023 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
1025 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1027 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1028 ret <8 x i32> %shuffle
1031 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
1032 ; AVX1-LABEL: shuffle_v8i32_08080808:
1034 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
1035 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
1036 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1037 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1038 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1041 ; AVX2-LABEL: shuffle_v8i32_08080808:
1043 ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
1044 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
1045 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1047 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1048 ret <8 x i32> %shuffle
1051 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1052 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
1054 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1055 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1058 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
1060 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1061 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1062 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1064 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1065 ret <8 x i32> %shuffle
1068 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
1069 ; AVX1-LABEL: shuffle_v8i32_8823cc67:
1071 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1074 ; AVX2-LABEL: shuffle_v8i32_8823cc67:
1076 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1077 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1079 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1080 ret <8 x i32> %shuffle
1083 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
1084 ; AVX1-LABEL: shuffle_v8i32_9832dc76:
1086 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1089 ; AVX2-LABEL: shuffle_v8i32_9832dc76:
1091 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1092 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1094 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1095 ret <8 x i32> %shuffle
1098 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
1099 ; AVX1-LABEL: shuffle_v8i32_9810dc54:
1101 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1104 ; AVX2-LABEL: shuffle_v8i32_9810dc54:
1106 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1107 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1108 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1110 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1111 ret <8 x i32> %shuffle
1114 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1115 ; AVX1-LABEL: shuffle_v8i32_08194c5d:
1117 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1120 ; AVX2-LABEL: shuffle_v8i32_08194c5d:
1122 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1124 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1125 ret <8 x i32> %shuffle
1128 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1129 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
1131 ; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1134 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
1136 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1138 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1139 ret <8 x i32> %shuffle
1142 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1143 ; AVX1-LABEL: shuffle_v8i32_08192a3b:
1145 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1146 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1147 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1150 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
1152 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1153 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1154 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1155 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1157 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1158 ret <8 x i32> %shuffle
1161 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
1162 ; AVX1-LABEL: shuffle_v8i32_08991abb:
1164 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
1165 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
1166 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1167 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
1168 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1171 ; AVX2-LABEL: shuffle_v8i32_08991abb:
1173 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1174 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1175 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1176 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1177 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1179 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1180 ret <8 x i32> %shuffle
1183 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
1184 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
1186 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
1187 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
1188 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1189 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1192 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
1194 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1195 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1197 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1198 ret <8 x i32> %shuffle
1201 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
1202 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
1204 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1205 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1206 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1209 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
1211 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1212 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1213 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1215 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1216 ret <8 x i32> %shuffle
1219 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
1220 ; AVX1-LABEL: shuffle_v8i32_00014445:
1222 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1225 ; AVX2-LABEL: shuffle_v8i32_00014445:
1227 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1229 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1230 ret <8 x i32> %shuffle
1233 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
1234 ; AVX1-LABEL: shuffle_v8i32_00204464:
1236 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1239 ; AVX2-LABEL: shuffle_v8i32_00204464:
1241 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1243 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1244 ret <8 x i32> %shuffle
1247 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
1248 ; AVX1-LABEL: shuffle_v8i32_03004744:
1250 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1253 ; AVX2-LABEL: shuffle_v8i32_03004744:
1255 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1257 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1258 ret <8 x i32> %shuffle
1261 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
1262 ; AVX1-LABEL: shuffle_v8i32_10005444:
1264 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1267 ; AVX2-LABEL: shuffle_v8i32_10005444:
1269 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1271 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1272 ret <8 x i32> %shuffle
1275 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
1276 ; AVX1-LABEL: shuffle_v8i32_22006644:
1278 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1281 ; AVX2-LABEL: shuffle_v8i32_22006644:
1283 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1285 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1286 ret <8 x i32> %shuffle
1289 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
1290 ; AVX1-LABEL: shuffle_v8i32_33307774:
1292 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1295 ; AVX2-LABEL: shuffle_v8i32_33307774:
1297 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1299 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1300 ret <8 x i32> %shuffle
1303 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
1304 ; AVX1-LABEL: shuffle_v8i32_32107654:
1306 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1309 ; AVX2-LABEL: shuffle_v8i32_32107654:
1311 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1313 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1314 ret <8 x i32> %shuffle
1317 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
1318 ; AVX1-LABEL: shuffle_v8i32_00234467:
1320 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1323 ; AVX2-LABEL: shuffle_v8i32_00234467:
1325 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1327 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1328 ret <8 x i32> %shuffle
1331 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
1332 ; AVX1-LABEL: shuffle_v8i32_00224466:
1334 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1337 ; AVX2-LABEL: shuffle_v8i32_00224466:
1339 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1341 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1342 ret <8 x i32> %shuffle
1345 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
1346 ; AVX1-LABEL: shuffle_v8i32_10325476:
1348 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1351 ; AVX2-LABEL: shuffle_v8i32_10325476:
1353 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1355 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1356 ret <8 x i32> %shuffle
1359 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
1360 ; AVX1-LABEL: shuffle_v8i32_11335577:
1362 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1365 ; AVX2-LABEL: shuffle_v8i32_11335577:
1367 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1369 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1370 ret <8 x i32> %shuffle
1373 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
1374 ; AVX1-LABEL: shuffle_v8i32_10235467:
1376 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1379 ; AVX2-LABEL: shuffle_v8i32_10235467:
1381 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1383 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1384 ret <8 x i32> %shuffle
1387 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
1388 ; AVX1-LABEL: shuffle_v8i32_10225466:
1390 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1393 ; AVX2-LABEL: shuffle_v8i32_10225466:
1395 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1397 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1398 ret <8 x i32> %shuffle
1401 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
1402 ; AVX1-LABEL: shuffle_v8i32_00015444:
1404 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1407 ; AVX2-LABEL: shuffle_v8i32_00015444:
1409 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1410 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1412 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1413 ret <8 x i32> %shuffle
1416 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
1417 ; AVX1-LABEL: shuffle_v8i32_00204644:
1419 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1422 ; AVX2-LABEL: shuffle_v8i32_00204644:
1424 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1425 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1427 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1428 ret <8 x i32> %shuffle
1431 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
1432 ; AVX1-LABEL: shuffle_v8i32_03004474:
1434 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1437 ; AVX2-LABEL: shuffle_v8i32_03004474:
1439 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1440 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1442 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1443 ret <8 x i32> %shuffle
1446 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
1447 ; AVX1-LABEL: shuffle_v8i32_10004444:
1449 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1452 ; AVX2-LABEL: shuffle_v8i32_10004444:
1454 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1455 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1457 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1458 ret <8 x i32> %shuffle
1461 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
1462 ; AVX1-LABEL: shuffle_v8i32_22006446:
1464 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1467 ; AVX2-LABEL: shuffle_v8i32_22006446:
1469 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1470 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1472 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1473 ret <8 x i32> %shuffle
1476 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
1477 ; AVX1-LABEL: shuffle_v8i32_33307474:
1479 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1482 ; AVX2-LABEL: shuffle_v8i32_33307474:
1484 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1485 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1487 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1488 ret <8 x i32> %shuffle
1491 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
1492 ; AVX1-LABEL: shuffle_v8i32_32104567:
1494 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1497 ; AVX2-LABEL: shuffle_v8i32_32104567:
1499 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1500 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1502 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1503 ret <8 x i32> %shuffle
1506 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
1507 ; AVX1-LABEL: shuffle_v8i32_00236744:
1509 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1512 ; AVX2-LABEL: shuffle_v8i32_00236744:
1514 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1515 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1517 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1518 ret <8 x i32> %shuffle
1521 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
1522 ; AVX1-LABEL: shuffle_v8i32_00226644:
1524 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1527 ; AVX2-LABEL: shuffle_v8i32_00226644:
1529 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1530 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1532 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1533 ret <8 x i32> %shuffle
1536 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
1537 ; AVX1-LABEL: shuffle_v8i32_10324567:
1539 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1542 ; AVX2-LABEL: shuffle_v8i32_10324567:
1544 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1545 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1547 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1548 ret <8 x i32> %shuffle
1551 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
1552 ; AVX1-LABEL: shuffle_v8i32_11334567:
1554 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1557 ; AVX2-LABEL: shuffle_v8i32_11334567:
1559 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1560 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1562 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1563 ret <8 x i32> %shuffle
1566 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
1567 ; AVX1-LABEL: shuffle_v8i32_01235467:
1569 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1572 ; AVX2-LABEL: shuffle_v8i32_01235467:
1574 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1575 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1577 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1578 ret <8 x i32> %shuffle
1581 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
1582 ; AVX1-LABEL: shuffle_v8i32_01235466:
1584 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1587 ; AVX2-LABEL: shuffle_v8i32_01235466:
1589 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1590 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1592 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1593 ret <8 x i32> %shuffle
1596 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
1597 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
1599 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1602 ; AVX2-LABEL: shuffle_v8i32_002u6u44:
1604 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1605 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1607 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1608 ret <8 x i32> %shuffle
1611 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
1612 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
1614 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1617 ; AVX2-LABEL: shuffle_v8i32_00uu66uu:
1619 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1620 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1622 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1623 ret <8 x i32> %shuffle
1626 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
1627 ; AVX1-LABEL: shuffle_v8i32_103245uu:
1629 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1632 ; AVX2-LABEL: shuffle_v8i32_103245uu:
1634 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1635 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1637 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1638 ret <8 x i32> %shuffle
1641 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
1642 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
1644 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1647 ; AVX2-LABEL: shuffle_v8i32_1133uu67:
1649 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1650 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1652 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1653 ret <8 x i32> %shuffle
1656 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
1657 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
1659 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1662 ; AVX2-LABEL: shuffle_v8i32_0uu354uu:
1664 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1665 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1667 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1668 ret <8 x i32> %shuffle
1671 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
1672 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
1674 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1677 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
1679 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1680 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1682 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1683 ret <8 x i32> %shuffle
1686 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
1687 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
1689 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1690 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1691 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1692 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1693 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1694 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1697 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1699 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
1700 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1701 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1702 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1704 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1705 ret <8 x i32> %shuffle
1708 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
1709 ; AVX1-LABEL: shuffle_v8i32_32103210:
1711 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1712 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1715 ; AVX2-LABEL: shuffle_v8i32_32103210:
1717 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
1718 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1720 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1721 ret <8 x i32> %shuffle
1724 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
1725 ; AVX1-LABEL: shuffle_v8i32_76547654:
1727 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1728 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1729 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1732 ; AVX2-LABEL: shuffle_v8i32_76547654:
1734 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
1735 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1737 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1738 ret <8 x i32> %shuffle
1741 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
1742 ; AVX1-LABEL: shuffle_v8i32_76543210:
1744 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1745 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1748 ; AVX2-LABEL: shuffle_v8i32_76543210:
1750 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
1751 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1753 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1754 ret <8 x i32> %shuffle
1757 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
1758 ; AVX1-LABEL: shuffle_v8i32_3210ba98:
1760 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1761 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1764 ; AVX2-LABEL: shuffle_v8i32_3210ba98:
1766 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1767 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1769 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1770 ret <8 x i32> %shuffle
1773 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
1774 ; AVX1-LABEL: shuffle_v8i32_3210fedc:
1776 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1777 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1780 ; AVX2-LABEL: shuffle_v8i32_3210fedc:
1782 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1783 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1785 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1786 ret <8 x i32> %shuffle
1789 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
1790 ; AVX1-LABEL: shuffle_v8i32_7654fedc:
1792 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1793 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1796 ; AVX2-LABEL: shuffle_v8i32_7654fedc:
1798 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1799 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1801 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1802 ret <8 x i32> %shuffle
1805 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
1806 ; AVX1-LABEL: shuffle_v8i32_fedc7654:
1808 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1809 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1812 ; AVX2-LABEL: shuffle_v8i32_fedc7654:
1814 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1815 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1817 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1818 ret <8 x i32> %shuffle
1821 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
1822 ; AVX1-LABEL: shuffle_v8i32_ba987654:
1824 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1825 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1828 ; AVX2-LABEL: shuffle_v8i32_ba987654:
1830 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1831 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1833 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1834 ret <8 x i32> %shuffle
1837 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
1838 ; AVX1-LABEL: shuffle_v8i32_ba983210:
1840 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1841 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1844 ; AVX2-LABEL: shuffle_v8i32_ba983210:
1846 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1847 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1849 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1850 ret <8 x i32> %shuffle
1853 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
1854 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
1856 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1857 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
1860 ; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
1862 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
1864 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
1865 ret <8 x i32> %shuffle
1868 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
1869 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
1871 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1872 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
1873 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1876 ; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
1878 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
1880 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
1881 ret <8 x i32> %shuffle
1884 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
1885 ; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
1887 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1890 ; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
1892 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1894 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
1895 ret <8 x i32> %shuffle
1898 define <8 x float> @splat_mem_v8f32_2(float* %p) {
1899 ; ALL-LABEL: splat_mem_v8f32_2:
1901 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
1903 %1 = load float, float* %p
1904 %2 = insertelement <4 x float> undef, float %1, i32 0
1905 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
1909 define <8 x float> @splat_v8f32(<4 x float> %r) {
1910 ; AVX1-LABEL: splat_v8f32:
1912 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1913 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1916 ; AVX2-LABEL: splat_v8f32:
1918 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
1920 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
1925 ; Shuffle to logical bit shifts
1928 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
1929 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
1931 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1932 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
1933 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
1936 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
1938 ; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
1940 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
1941 ret <8 x i32> %shuffle
1944 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
1945 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
1947 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1948 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1949 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1952 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
1954 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
1956 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
1957 ret <8 x i32> %shuffle
1960 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
1961 ; AVX1-LABEL: shuffle_v8i32_B012F456:
1963 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
1964 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
1967 ; AVX2-LABEL: shuffle_v8i32_B012F456:
1969 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
1971 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
1972 ret <8 x i32> %shuffle
1975 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
1976 ; AVX1-LABEL: shuffle_v8i32_1238567C:
1978 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
1979 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1982 ; AVX2-LABEL: shuffle_v8i32_1238567C:
1984 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
1986 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
1987 ret <8 x i32> %shuffle
1990 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
1991 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
1993 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
1994 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
1997 ; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
1999 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
2001 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
2002 ret <8 x i32> %shuffle
2005 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
2006 ; AVX1-LABEL: shuffle_v8i32_389A7CDE:
2008 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
2009 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
2012 ; AVX2-LABEL: shuffle_v8i32_389A7CDE:
2014 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
2016 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
2017 ret <8 x i32> %shuffle
2020 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
2021 ; AVX1-LABEL: shuffle_v8i32_30127456:
2023 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2026 ; AVX2-LABEL: shuffle_v8i32_30127456:
2028 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2030 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
2031 ret <8 x i32> %shuffle
2034 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
2035 ; AVX1-LABEL: shuffle_v8i32_12305674:
2037 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2040 ; AVX2-LABEL: shuffle_v8i32_12305674:
2042 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2044 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
2045 ret <8 x i32> %shuffle
2048 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2049 ; ALL-LABEL: concat_v2f32_1:
2050 ; ALL: # BB#0: # %entry
2051 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2052 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2055 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2056 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2057 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2058 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2059 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
2060 ret <8 x float> %tmp76
2063 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2064 ; ALL-LABEL: concat_v2f32_2:
2065 ; ALL: # BB#0: # %entry
2066 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2067 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2070 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2071 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2072 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2073 ret <8 x float> %tmp76
2076 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2077 ; ALL-LABEL: concat_v2f32_3:
2078 ; ALL: # BB#0: # %entry
2079 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2080 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2083 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2084 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2085 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2086 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2087 ret <8 x float> %res
2090 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
2091 ; ALL-LABEL: insert_mem_and_zero_v8i32:
2093 ; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2095 %a = load i32, i32* %ptr
2096 %v = insertelement <8 x i32> undef, i32 %a, i32 0
2097 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2098 ret <8 x i32> %shuffle