1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4 target triple = "x86_64-unknown-unknown"
6 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
7 ; AVX1-LABEL: shuffle_v8f32_00000000:
9 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX2-LABEL: shuffle_v8f32_00000000:
15 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
17 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
18 ret <8 x float> %shuffle
21 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
22 ; AVX1-LABEL: shuffle_v8f32_00000010:
24 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
25 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
26 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
29 ; AVX2-LABEL: shuffle_v8f32_00000010:
31 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
32 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
34 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
35 ret <8 x float> %shuffle
38 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
39 ; AVX1-LABEL: shuffle_v8f32_00000200:
41 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
42 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
43 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
46 ; AVX2-LABEL: shuffle_v8f32_00000200:
48 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
49 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
51 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
52 ret <8 x float> %shuffle
55 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
56 ; AVX1-LABEL: shuffle_v8f32_00003000:
58 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
59 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
60 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
63 ; AVX2-LABEL: shuffle_v8f32_00003000:
65 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
66 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
68 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
69 ret <8 x float> %shuffle
72 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
73 ; AVX1-LABEL: shuffle_v8f32_00040000:
75 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
76 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
77 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
78 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
81 ; AVX2-LABEL: shuffle_v8f32_00040000:
83 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
84 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
86 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
87 ret <8 x float> %shuffle
90 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
91 ; AVX1-LABEL: shuffle_v8f32_00500000:
93 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
94 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
95 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
98 ; AVX2-LABEL: shuffle_v8f32_00500000:
100 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
101 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
103 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
104 ret <8 x float> %shuffle
107 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
108 ; AVX1-LABEL: shuffle_v8f32_06000000:
110 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
111 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
112 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
115 ; AVX2-LABEL: shuffle_v8f32_06000000:
117 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
118 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
120 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
121 ret <8 x float> %shuffle
124 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
125 ; AVX1-LABEL: shuffle_v8f32_70000000:
127 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
128 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
129 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
132 ; AVX2-LABEL: shuffle_v8f32_70000000:
134 ; AVX2-NEXT: movl $7, %eax
135 ; AVX2-NEXT: vmovd %eax, %xmm1
136 ; AVX2-NEXT: vxorps %ymm2, %ymm2, %ymm2
137 ; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
138 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
140 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
141 ret <8 x float> %shuffle
144 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
145 ; ALL-LABEL: shuffle_v8f32_01014545:
147 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
149 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
150 ret <8 x float> %shuffle
153 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
154 ; AVX1-LABEL: shuffle_v8f32_00112233:
156 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
157 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
158 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
161 ; AVX2-LABEL: shuffle_v8f32_00112233:
163 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
164 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
166 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
167 ret <8 x float> %shuffle
170 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
171 ; AVX1-LABEL: shuffle_v8f32_00001111:
173 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
174 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
175 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
178 ; AVX2-LABEL: shuffle_v8f32_00001111:
180 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
181 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
183 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
184 ret <8 x float> %shuffle
187 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
188 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
190 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
192 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
193 ret <8 x float> %shuffle
196 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
197 ; AVX1-LABEL: shuffle_v8f32_08080808:
199 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
200 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
201 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
202 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
203 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
206 ; AVX2-LABEL: shuffle_v8f32_08080808:
208 ; AVX2-NEXT: vbroadcastss %xmm1, %ymm1
209 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
210 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
212 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
213 ret <8 x float> %shuffle
216 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
217 ; ALL-LABEL: shuffle_v8f32_08084c4c:
219 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
220 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
222 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
223 ret <8 x float> %shuffle
226 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
227 ; ALL-LABEL: shuffle_v8f32_8823cc67:
229 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
231 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
232 ret <8 x float> %shuffle
235 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
236 ; ALL-LABEL: shuffle_v8f32_9832dc76:
238 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
240 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
241 ret <8 x float> %shuffle
244 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
245 ; ALL-LABEL: shuffle_v8f32_9810dc54:
247 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
249 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
250 ret <8 x float> %shuffle
253 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
254 ; ALL-LABEL: shuffle_v8f32_08194c5d:
256 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
258 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
259 ret <8 x float> %shuffle
262 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
263 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
265 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
267 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
268 ret <8 x float> %shuffle
271 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
272 ; AVX1-LABEL: shuffle_v8f32_08192a3b:
274 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
275 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
276 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
279 ; AVX2-LABEL: shuffle_v8f32_08192a3b:
281 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
282 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
283 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
284 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
285 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
287 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
288 ret <8 x float> %shuffle
291 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
292 ; AVX1-LABEL: shuffle_v8f32_08991abb:
294 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
295 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
296 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
297 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
298 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
301 ; AVX2-LABEL: shuffle_v8f32_08991abb:
303 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
304 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
305 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
306 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
307 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
309 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
310 ret <8 x float> %shuffle
313 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
314 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
316 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
317 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
318 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
319 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
322 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
324 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
325 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
326 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
328 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
329 ret <8 x float> %shuffle
332 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
333 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
335 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
336 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
337 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
340 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
342 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
343 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
344 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
346 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
347 ret <8 x float> %shuffle
350 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
351 ; ALL-LABEL: shuffle_v8f32_00014445:
353 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
355 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
356 ret <8 x float> %shuffle
359 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
360 ; ALL-LABEL: shuffle_v8f32_00204464:
362 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
364 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
365 ret <8 x float> %shuffle
368 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
369 ; ALL-LABEL: shuffle_v8f32_03004744:
371 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
373 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
374 ret <8 x float> %shuffle
377 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
378 ; ALL-LABEL: shuffle_v8f32_10005444:
380 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
382 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
383 ret <8 x float> %shuffle
386 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
387 ; ALL-LABEL: shuffle_v8f32_22006644:
389 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
391 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
392 ret <8 x float> %shuffle
395 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
396 ; ALL-LABEL: shuffle_v8f32_33307774:
398 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
400 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
401 ret <8 x float> %shuffle
404 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
405 ; ALL-LABEL: shuffle_v8f32_32107654:
407 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
409 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
410 ret <8 x float> %shuffle
413 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
414 ; ALL-LABEL: shuffle_v8f32_00234467:
416 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
418 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
419 ret <8 x float> %shuffle
422 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
423 ; ALL-LABEL: shuffle_v8f32_00224466:
425 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
427 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
428 ret <8 x float> %shuffle
431 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
432 ; ALL-LABEL: shuffle_v8f32_10325476:
434 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
436 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
437 ret <8 x float> %shuffle
440 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
441 ; ALL-LABEL: shuffle_v8f32_11335577:
443 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
445 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
446 ret <8 x float> %shuffle
449 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
450 ; ALL-LABEL: shuffle_v8f32_10235467:
452 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
454 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
455 ret <8 x float> %shuffle
458 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
459 ; ALL-LABEL: shuffle_v8f32_10225466:
461 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
463 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
464 ret <8 x float> %shuffle
467 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
468 ; ALL-LABEL: shuffle_v8f32_00015444:
470 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
472 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
473 ret <8 x float> %shuffle
476 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
477 ; ALL-LABEL: shuffle_v8f32_00204644:
479 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
481 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
482 ret <8 x float> %shuffle
485 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
486 ; ALL-LABEL: shuffle_v8f32_03004474:
488 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
490 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
491 ret <8 x float> %shuffle
494 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
495 ; ALL-LABEL: shuffle_v8f32_10004444:
497 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
499 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
500 ret <8 x float> %shuffle
503 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
504 ; ALL-LABEL: shuffle_v8f32_22006446:
506 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
508 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
509 ret <8 x float> %shuffle
512 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
513 ; ALL-LABEL: shuffle_v8f32_33307474:
515 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
517 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
518 ret <8 x float> %shuffle
521 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
522 ; ALL-LABEL: shuffle_v8f32_32104567:
524 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
526 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
527 ret <8 x float> %shuffle
530 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
531 ; ALL-LABEL: shuffle_v8f32_00236744:
533 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
535 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
536 ret <8 x float> %shuffle
539 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
540 ; ALL-LABEL: shuffle_v8f32_00226644:
542 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
544 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
545 ret <8 x float> %shuffle
548 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
549 ; ALL-LABEL: shuffle_v8f32_10324567:
551 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
553 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
554 ret <8 x float> %shuffle
557 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
558 ; ALL-LABEL: shuffle_v8f32_11334567:
560 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
562 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
563 ret <8 x float> %shuffle
566 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
567 ; ALL-LABEL: shuffle_v8f32_01235467:
569 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
571 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
572 ret <8 x float> %shuffle
575 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
576 ; ALL-LABEL: shuffle_v8f32_01235466:
578 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
580 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
581 ret <8 x float> %shuffle
584 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
585 ; ALL-LABEL: shuffle_v8f32_002u6u44:
587 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
589 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
590 ret <8 x float> %shuffle
593 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
594 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
596 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
598 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
599 ret <8 x float> %shuffle
602 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
603 ; ALL-LABEL: shuffle_v8f32_103245uu:
605 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
607 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
608 ret <8 x float> %shuffle
611 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
612 ; ALL-LABEL: shuffle_v8f32_1133uu67:
614 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
616 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
617 ret <8 x float> %shuffle
620 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
621 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
623 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
625 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
626 ret <8 x float> %shuffle
629 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
630 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
632 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
634 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
635 ret <8 x float> %shuffle
638 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
639 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
641 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
642 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
643 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
644 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
645 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
646 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
649 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
651 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
652 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
653 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
654 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
655 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
657 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
658 ret <8 x float> %shuffle
661 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
662 ; AVX1-LABEL: shuffle_v8f32_f511235a:
664 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
665 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
666 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
667 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
668 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
669 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
670 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
673 ; AVX2-LABEL: shuffle_v8f32_f511235a:
675 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
676 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
677 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
678 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
679 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
681 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
682 ret <8 x float> %shuffle
685 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
686 ; AVX1-LABEL: shuffle_v8f32_32103210:
688 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
689 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
692 ; AVX2-LABEL: shuffle_v8f32_32103210:
694 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
695 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
697 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
698 ret <8 x float> %shuffle
701 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
702 ; AVX1-LABEL: shuffle_v8f32_76547654:
704 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
705 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
706 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
709 ; AVX2-LABEL: shuffle_v8f32_76547654:
711 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
712 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
714 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
715 ret <8 x float> %shuffle
718 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
719 ; AVX1-LABEL: shuffle_v8f32_76543210:
721 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
722 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
725 ; AVX2-LABEL: shuffle_v8f32_76543210:
727 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
728 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
730 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
731 ret <8 x float> %shuffle
734 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
735 ; ALL-LABEL: shuffle_v8f32_3210ba98:
737 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
738 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
740 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
741 ret <8 x float> %shuffle
744 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
745 ; ALL-LABEL: shuffle_v8f32_3210fedc:
747 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
748 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
750 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
751 ret <8 x float> %shuffle
754 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
755 ; ALL-LABEL: shuffle_v8f32_7654fedc:
757 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
758 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
760 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
761 ret <8 x float> %shuffle
764 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
765 ; ALL-LABEL: shuffle_v8f32_fedc7654:
767 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
768 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
770 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
771 ret <8 x float> %shuffle
774 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
775 ; AVX1-LABEL: PR21138:
777 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
778 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
779 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
780 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
781 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
782 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
785 ; AVX2-LABEL: PR21138:
787 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7>
788 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
789 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u>
790 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
791 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
793 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
794 ret <8 x float> %shuffle
797 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
798 ; ALL-LABEL: shuffle_v8f32_ba987654:
800 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
801 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
803 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
804 ret <8 x float> %shuffle
807 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
808 ; ALL-LABEL: shuffle_v8f32_ba983210:
810 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
811 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
813 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
814 ret <8 x float> %shuffle
817 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
818 ; ALL-LABEL: shuffle_v8f32_80u1c4u5:
820 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
822 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
823 ret <8 x float> %shuffle
826 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
827 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
829 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
831 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
832 ret <8 x float> %shuffle
835 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
836 ; AVX1-LABEL: shuffle_v8i32_00000000:
838 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
839 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
842 ; AVX2-LABEL: shuffle_v8i32_00000000:
844 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
846 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
847 ret <8 x i32> %shuffle
850 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
851 ; AVX1-LABEL: shuffle_v8i32_00000010:
853 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
854 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
855 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
858 ; AVX2-LABEL: shuffle_v8i32_00000010:
860 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
861 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
863 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
864 ret <8 x i32> %shuffle
867 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
868 ; AVX1-LABEL: shuffle_v8i32_00000200:
870 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
871 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
872 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
875 ; AVX2-LABEL: shuffle_v8i32_00000200:
877 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
878 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
880 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
881 ret <8 x i32> %shuffle
884 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
885 ; AVX1-LABEL: shuffle_v8i32_00003000:
887 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
888 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
889 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
892 ; AVX2-LABEL: shuffle_v8i32_00003000:
894 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
895 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
897 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
898 ret <8 x i32> %shuffle
901 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
902 ; AVX1-LABEL: shuffle_v8i32_00040000:
904 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
905 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
906 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
907 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
910 ; AVX2-LABEL: shuffle_v8i32_00040000:
912 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
913 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
915 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
916 ret <8 x i32> %shuffle
919 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
920 ; AVX1-LABEL: shuffle_v8i32_00500000:
922 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
923 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
924 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
927 ; AVX2-LABEL: shuffle_v8i32_00500000:
929 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
930 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
932 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
933 ret <8 x i32> %shuffle
936 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
937 ; AVX1-LABEL: shuffle_v8i32_06000000:
939 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
940 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
941 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
944 ; AVX2-LABEL: shuffle_v8i32_06000000:
946 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
947 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
949 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
950 ret <8 x i32> %shuffle
953 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
954 ; AVX1-LABEL: shuffle_v8i32_70000000:
956 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
957 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
958 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
961 ; AVX2-LABEL: shuffle_v8i32_70000000:
963 ; AVX2-NEXT: movl $7, %eax
964 ; AVX2-NEXT: vmovd %eax, %xmm1
965 ; AVX2-NEXT: vxorps %ymm2, %ymm2, %ymm2
966 ; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
967 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
969 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
970 ret <8 x i32> %shuffle
973 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
974 ; AVX1-LABEL: shuffle_v8i32_01014545:
976 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
979 ; AVX2-LABEL: shuffle_v8i32_01014545:
981 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
983 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
984 ret <8 x i32> %shuffle
987 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
988 ; AVX1-LABEL: shuffle_v8i32_00112233:
990 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
991 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
992 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
995 ; AVX2-LABEL: shuffle_v8i32_00112233:
997 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
998 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1000 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
1001 ret <8 x i32> %shuffle
1004 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
1005 ; AVX1-LABEL: shuffle_v8i32_00001111:
1007 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
1008 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1009 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1012 ; AVX2-LABEL: shuffle_v8i32_00001111:
1014 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1015 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1017 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1018 ret <8 x i32> %shuffle
1021 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
1022 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
1024 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1027 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
1029 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1031 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
1032 ret <8 x i32> %shuffle
1035 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
1036 ; AVX1-LABEL: shuffle_v8i32_08080808:
1038 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
1039 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
1040 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1041 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1042 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1045 ; AVX2-LABEL: shuffle_v8i32_08080808:
1047 ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
1048 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
1049 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1051 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
1052 ret <8 x i32> %shuffle
1055 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
1056 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
1058 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1059 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1062 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
1064 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1065 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1066 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1068 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
1069 ret <8 x i32> %shuffle
1072 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
1073 ; AVX1-LABEL: shuffle_v8i32_8823cc67:
1075 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1078 ; AVX2-LABEL: shuffle_v8i32_8823cc67:
1080 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1081 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1083 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
1084 ret <8 x i32> %shuffle
1087 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
1088 ; AVX1-LABEL: shuffle_v8i32_9832dc76:
1090 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1093 ; AVX2-LABEL: shuffle_v8i32_9832dc76:
1095 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1096 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1098 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
1099 ret <8 x i32> %shuffle
1102 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
1103 ; AVX1-LABEL: shuffle_v8i32_9810dc54:
1105 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1108 ; AVX2-LABEL: shuffle_v8i32_9810dc54:
1110 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1111 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1112 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1114 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
1115 ret <8 x i32> %shuffle
1118 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
1119 ; AVX1-LABEL: shuffle_v8i32_08194c5d:
1121 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1124 ; AVX2-LABEL: shuffle_v8i32_08194c5d:
1126 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1128 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1129 ret <8 x i32> %shuffle
1132 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
1133 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
1135 ; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1138 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
1140 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1142 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1143 ret <8 x i32> %shuffle
1146 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
1147 ; AVX1-LABEL: shuffle_v8i32_08192a3b:
1149 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1150 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1151 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1154 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
1156 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1157 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1158 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1159 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1161 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1162 ret <8 x i32> %shuffle
1165 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
1166 ; AVX1-LABEL: shuffle_v8i32_08991abb:
1168 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
1169 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
1170 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1171 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
1172 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1175 ; AVX2-LABEL: shuffle_v8i32_08991abb:
1177 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1178 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1179 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1180 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1181 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1183 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
1184 ret <8 x i32> %shuffle
1187 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
1188 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
1190 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
1191 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
1192 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1193 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1196 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
1198 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1199 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1201 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
1202 ret <8 x i32> %shuffle
1205 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
1206 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
1208 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1209 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1210 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1213 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
1215 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1216 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1217 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1219 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
1220 ret <8 x i32> %shuffle
1223 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
1224 ; AVX1-LABEL: shuffle_v8i32_00014445:
1226 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1229 ; AVX2-LABEL: shuffle_v8i32_00014445:
1231 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1233 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
1234 ret <8 x i32> %shuffle
1237 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
1238 ; AVX1-LABEL: shuffle_v8i32_00204464:
1240 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1243 ; AVX2-LABEL: shuffle_v8i32_00204464:
1245 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1247 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
1248 ret <8 x i32> %shuffle
1251 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
1252 ; AVX1-LABEL: shuffle_v8i32_03004744:
1254 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1257 ; AVX2-LABEL: shuffle_v8i32_03004744:
1259 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1261 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
1262 ret <8 x i32> %shuffle
1265 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
1266 ; AVX1-LABEL: shuffle_v8i32_10005444:
1268 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1271 ; AVX2-LABEL: shuffle_v8i32_10005444:
1273 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1275 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
1276 ret <8 x i32> %shuffle
1279 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
1280 ; AVX1-LABEL: shuffle_v8i32_22006644:
1282 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1285 ; AVX2-LABEL: shuffle_v8i32_22006644:
1287 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1289 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
1290 ret <8 x i32> %shuffle
1293 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
1294 ; AVX1-LABEL: shuffle_v8i32_33307774:
1296 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1299 ; AVX2-LABEL: shuffle_v8i32_33307774:
1301 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1303 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
1304 ret <8 x i32> %shuffle
1307 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
1308 ; AVX1-LABEL: shuffle_v8i32_32107654:
1310 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1313 ; AVX2-LABEL: shuffle_v8i32_32107654:
1315 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1317 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1318 ret <8 x i32> %shuffle
1321 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
1322 ; AVX1-LABEL: shuffle_v8i32_00234467:
1324 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1327 ; AVX2-LABEL: shuffle_v8i32_00234467:
1329 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1331 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
1332 ret <8 x i32> %shuffle
1335 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
1336 ; AVX1-LABEL: shuffle_v8i32_00224466:
1338 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1341 ; AVX2-LABEL: shuffle_v8i32_00224466:
1343 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1345 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1346 ret <8 x i32> %shuffle
1349 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
1350 ; AVX1-LABEL: shuffle_v8i32_10325476:
1352 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1355 ; AVX2-LABEL: shuffle_v8i32_10325476:
1357 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1359 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1360 ret <8 x i32> %shuffle
1363 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
1364 ; AVX1-LABEL: shuffle_v8i32_11335577:
1366 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1369 ; AVX2-LABEL: shuffle_v8i32_11335577:
1371 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1373 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1374 ret <8 x i32> %shuffle
1377 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
1378 ; AVX1-LABEL: shuffle_v8i32_10235467:
1380 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1383 ; AVX2-LABEL: shuffle_v8i32_10235467:
1385 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1387 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1388 ret <8 x i32> %shuffle
1391 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
1392 ; AVX1-LABEL: shuffle_v8i32_10225466:
1394 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1397 ; AVX2-LABEL: shuffle_v8i32_10225466:
1399 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1401 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
1402 ret <8 x i32> %shuffle
1405 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
1406 ; AVX1-LABEL: shuffle_v8i32_00015444:
1408 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1411 ; AVX2-LABEL: shuffle_v8i32_00015444:
1413 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1414 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1416 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
1417 ret <8 x i32> %shuffle
1420 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
1421 ; AVX1-LABEL: shuffle_v8i32_00204644:
1423 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1426 ; AVX2-LABEL: shuffle_v8i32_00204644:
1428 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1429 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1431 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
1432 ret <8 x i32> %shuffle
1435 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
1436 ; AVX1-LABEL: shuffle_v8i32_03004474:
1438 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1441 ; AVX2-LABEL: shuffle_v8i32_03004474:
1443 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1444 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1446 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
1447 ret <8 x i32> %shuffle
1450 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
1451 ; AVX1-LABEL: shuffle_v8i32_10004444:
1453 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1456 ; AVX2-LABEL: shuffle_v8i32_10004444:
1458 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1459 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1461 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1462 ret <8 x i32> %shuffle
1465 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
1466 ; AVX1-LABEL: shuffle_v8i32_22006446:
1468 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1471 ; AVX2-LABEL: shuffle_v8i32_22006446:
1473 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1474 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1476 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
1477 ret <8 x i32> %shuffle
1480 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
1481 ; AVX1-LABEL: shuffle_v8i32_33307474:
1483 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1486 ; AVX2-LABEL: shuffle_v8i32_33307474:
1488 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1489 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1491 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
1492 ret <8 x i32> %shuffle
1495 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
1496 ; AVX1-LABEL: shuffle_v8i32_32104567:
1498 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1501 ; AVX2-LABEL: shuffle_v8i32_32104567:
1503 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1504 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1506 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
1507 ret <8 x i32> %shuffle
1510 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
1511 ; AVX1-LABEL: shuffle_v8i32_00236744:
1513 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1516 ; AVX2-LABEL: shuffle_v8i32_00236744:
1518 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1519 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1521 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
1522 ret <8 x i32> %shuffle
1525 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
1526 ; AVX1-LABEL: shuffle_v8i32_00226644:
1528 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1531 ; AVX2-LABEL: shuffle_v8i32_00226644:
1533 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1534 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1536 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
1537 ret <8 x i32> %shuffle
1540 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
1541 ; AVX1-LABEL: shuffle_v8i32_10324567:
1543 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1546 ; AVX2-LABEL: shuffle_v8i32_10324567:
1548 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1549 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1551 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1552 ret <8 x i32> %shuffle
1555 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
1556 ; AVX1-LABEL: shuffle_v8i32_11334567:
1558 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1561 ; AVX2-LABEL: shuffle_v8i32_11334567:
1563 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1564 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1566 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
1567 ret <8 x i32> %shuffle
1570 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
1571 ; AVX1-LABEL: shuffle_v8i32_01235467:
1573 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1576 ; AVX2-LABEL: shuffle_v8i32_01235467:
1578 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1579 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1581 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
1582 ret <8 x i32> %shuffle
1585 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
1586 ; AVX1-LABEL: shuffle_v8i32_01235466:
1588 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1591 ; AVX2-LABEL: shuffle_v8i32_01235466:
1593 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1594 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1596 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
1597 ret <8 x i32> %shuffle
1600 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
1601 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
1603 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1606 ; AVX2-LABEL: shuffle_v8i32_002u6u44:
1608 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1609 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1611 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
1612 ret <8 x i32> %shuffle
1615 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
1616 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
1618 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1621 ; AVX2-LABEL: shuffle_v8i32_00uu66uu:
1623 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1624 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1626 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
1627 ret <8 x i32> %shuffle
1630 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
1631 ; AVX1-LABEL: shuffle_v8i32_103245uu:
1633 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1636 ; AVX2-LABEL: shuffle_v8i32_103245uu:
1638 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1639 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1641 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
1642 ret <8 x i32> %shuffle
1645 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
1646 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
1648 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1651 ; AVX2-LABEL: shuffle_v8i32_1133uu67:
1653 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1654 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1656 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
1657 ret <8 x i32> %shuffle
1660 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
1661 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
1663 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1666 ; AVX2-LABEL: shuffle_v8i32_0uu354uu:
1668 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1669 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1671 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
1672 ret <8 x i32> %shuffle
1675 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
1676 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
1678 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1681 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
1683 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1684 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1686 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
1687 ret <8 x i32> %shuffle
1690 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
1691 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
1693 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1694 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1695 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1696 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1697 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1698 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1701 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1703 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
1704 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1705 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1706 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1708 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
1709 ret <8 x i32> %shuffle
1712 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
1713 ; AVX1-LABEL: shuffle_v8i32_32103210:
1715 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1716 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1719 ; AVX2-LABEL: shuffle_v8i32_32103210:
1721 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
1722 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1724 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
1725 ret <8 x i32> %shuffle
1728 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
1729 ; AVX1-LABEL: shuffle_v8i32_76547654:
1731 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1732 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1733 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1736 ; AVX2-LABEL: shuffle_v8i32_76547654:
1738 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
1739 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1741 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
1742 ret <8 x i32> %shuffle
1745 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
1746 ; AVX1-LABEL: shuffle_v8i32_76543210:
1748 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1749 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1752 ; AVX2-LABEL: shuffle_v8i32_76543210:
1754 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
1755 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1757 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1758 ret <8 x i32> %shuffle
1761 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
1762 ; AVX1-LABEL: shuffle_v8i32_3210ba98:
1764 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1765 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1768 ; AVX2-LABEL: shuffle_v8i32_3210ba98:
1770 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1771 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1773 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
1774 ret <8 x i32> %shuffle
1777 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
1778 ; AVX1-LABEL: shuffle_v8i32_3210fedc:
1780 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1781 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1784 ; AVX2-LABEL: shuffle_v8i32_3210fedc:
1786 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1787 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1789 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
1790 ret <8 x i32> %shuffle
1793 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
1794 ; AVX1-LABEL: shuffle_v8i32_7654fedc:
1796 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1797 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1800 ; AVX2-LABEL: shuffle_v8i32_7654fedc:
1802 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1803 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1805 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
1806 ret <8 x i32> %shuffle
1809 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
1810 ; AVX1-LABEL: shuffle_v8i32_fedc7654:
1812 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1813 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1816 ; AVX2-LABEL: shuffle_v8i32_fedc7654:
1818 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1819 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1821 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
1822 ret <8 x i32> %shuffle
1825 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
1826 ; AVX1-LABEL: shuffle_v8i32_ba987654:
1828 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1829 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1832 ; AVX2-LABEL: shuffle_v8i32_ba987654:
1834 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1835 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1837 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1838 ret <8 x i32> %shuffle
1841 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
1842 ; AVX1-LABEL: shuffle_v8i32_ba983210:
1844 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1845 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1848 ; AVX2-LABEL: shuffle_v8i32_ba983210:
1850 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1851 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1853 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
1854 ret <8 x i32> %shuffle
1857 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
1858 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
1860 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1861 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
1864 ; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
1866 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
1868 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
1869 ret <8 x i32> %shuffle
1872 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
1873 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
1875 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1876 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
1877 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1880 ; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
1882 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
1884 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
1885 ret <8 x i32> %shuffle
1888 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
1889 ; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
1891 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1894 ; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
1896 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1898 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
1899 ret <8 x i32> %shuffle
1902 define <8 x float> @splat_mem_v8f32_2(float* %p) {
1903 ; ALL-LABEL: splat_mem_v8f32_2:
1905 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
1907 %1 = load float, float* %p
1908 %2 = insertelement <4 x float> undef, float %1, i32 0
1909 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
1913 define <8 x float> @splat_v8f32(<4 x float> %r) {
1914 ; AVX1-LABEL: splat_v8f32:
1916 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
1917 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1920 ; AVX2-LABEL: splat_v8f32:
1922 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
1924 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
1929 ; Shuffle to logical bit shifts
1932 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
1933 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
1935 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1936 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
1937 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
1940 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
1942 ; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
1944 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
1945 ret <8 x i32> %shuffle
1948 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
1949 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
1951 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1952 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1953 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1956 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
1958 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
1960 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
1961 ret <8 x i32> %shuffle
1964 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
1965 ; AVX1-LABEL: shuffle_v8i32_B012F456:
1967 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
1968 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
1971 ; AVX2-LABEL: shuffle_v8i32_B012F456:
1973 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
1975 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
1976 ret <8 x i32> %shuffle
1979 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
1980 ; AVX1-LABEL: shuffle_v8i32_1238567C:
1982 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
1983 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1986 ; AVX2-LABEL: shuffle_v8i32_1238567C:
1988 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
1990 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
1991 ret <8 x i32> %shuffle
1994 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
1995 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
1997 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
1998 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
2001 ; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
2003 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
2005 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
2006 ret <8 x i32> %shuffle
2009 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
2010 ; AVX1-LABEL: shuffle_v8i32_389A7CDE:
2012 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
2013 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
2016 ; AVX2-LABEL: shuffle_v8i32_389A7CDE:
2018 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
2020 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
2021 ret <8 x i32> %shuffle
2024 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
2025 ; AVX1-LABEL: shuffle_v8i32_30127456:
2027 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2030 ; AVX2-LABEL: shuffle_v8i32_30127456:
2032 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2034 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
2035 ret <8 x i32> %shuffle
2038 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
2039 ; AVX1-LABEL: shuffle_v8i32_12305674:
2041 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2044 ; AVX2-LABEL: shuffle_v8i32_12305674:
2046 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2048 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
2049 ret <8 x i32> %shuffle
2052 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2053 ; ALL-LABEL: concat_v2f32_1:
2054 ; ALL: # BB#0: # %entry
2055 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2056 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2059 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2060 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2061 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2062 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2063 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
2064 ret <8 x float> %tmp76
2067 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2068 ; ALL-LABEL: concat_v2f32_2:
2069 ; ALL: # BB#0: # %entry
2070 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2071 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2074 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2075 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2076 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2077 ret <8 x float> %tmp76
2080 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
2081 ; ALL-LABEL: concat_v2f32_3:
2082 ; ALL: # BB#0: # %entry
2083 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2084 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
2087 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
2088 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
2089 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2090 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
2091 ret <8 x float> %res
2094 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
2095 ; AVX1-LABEL: insert_mem_and_zero_v8i32:
2097 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2100 ; AVX2-LABEL: insert_mem_and_zero_v8i32:
2102 ; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2104 %a = load i32, i32* %ptr
2105 %v = insertelement <8 x i32> undef, i32 %a, i32 0
2106 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2107 ret <8 x i32> %shuffle