1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
5 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6 target triple = "x86_64-unknown-unknown"
8 define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) {
9 ; ALL-LABEL: @shuffle_v4i32_0001
10 ; ALL: pshufd {{.*}} # xmm0 = xmm0[0,0,0,1]
12 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
13 ret <4 x i32> %shuffle
15 define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) {
16 ; ALL-LABEL: @shuffle_v4i32_0020
17 ; ALL: pshufd {{.*}} # xmm0 = xmm0[0,0,2,0]
19 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
20 ret <4 x i32> %shuffle
22 define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
23 ; ALL-LABEL: @shuffle_v4i32_0112
24 ; ALL: pshufd {{.*}} # xmm0 = xmm0[0,1,1,2]
26 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
27 ret <4 x i32> %shuffle
29 define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
30 ; ALL-LABEL: @shuffle_v4i32_0300
31 ; ALL: pshufd {{.*}} # xmm0 = xmm0[0,3,0,0]
33 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
34 ret <4 x i32> %shuffle
36 define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) {
37 ; ALL-LABEL: @shuffle_v4i32_1000
38 ; ALL: pshufd {{.*}} # xmm0 = xmm0[1,0,0,0]
40 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
41 ret <4 x i32> %shuffle
43 define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) {
44 ; ALL-LABEL: @shuffle_v4i32_2200
45 ; ALL: pshufd {{.*}} # xmm0 = xmm0[2,2,0,0]
47 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
48 ret <4 x i32> %shuffle
50 define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) {
51 ; ALL-LABEL: @shuffle_v4i32_3330
52 ; ALL: pshufd {{.*}} # xmm0 = xmm0[3,3,3,0]
54 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
55 ret <4 x i32> %shuffle
57 define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
58 ; ALL-LABEL: @shuffle_v4i32_3210
59 ; ALL: pshufd {{.*}} # xmm0 = xmm0[3,2,1,0]
61 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
62 ret <4 x i32> %shuffle
65 define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
66 ; ALL-LABEL: @shuffle_v4i32_2121
67 ; ALL: pshufd {{.*}} # xmm0 = xmm0[2,1,2,1]
69 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
70 ret <4 x i32> %shuffle
73 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
74 ; ALL-LABEL: @shuffle_v4f32_0001
75 ; ALL: shufps {{.*}} # xmm0 = xmm0[0,0,0,1]
77 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
78 ret <4 x float> %shuffle
80 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
81 ; ALL-LABEL: @shuffle_v4f32_0020
82 ; ALL: shufps {{.*}} # xmm0 = xmm0[0,0,2,0]
84 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
85 ret <4 x float> %shuffle
87 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
88 ; ALL-LABEL: @shuffle_v4f32_0300
89 ; ALL: shufps {{.*}} # xmm0 = xmm0[0,3,0,0]
91 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
92 ret <4 x float> %shuffle
94 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
95 ; ALL-LABEL: @shuffle_v4f32_1000
96 ; ALL: shufps {{.*}} # xmm0 = xmm0[1,0,0,0]
98 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
99 ret <4 x float> %shuffle
101 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
102 ; ALL-LABEL: @shuffle_v4f32_2200
103 ; ALL: shufps {{.*}} # xmm0 = xmm0[2,2,0,0]
105 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
106 ret <4 x float> %shuffle
108 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
109 ; ALL-LABEL: @shuffle_v4f32_3330
110 ; ALL: shufps {{.*}} # xmm0 = xmm0[3,3,3,0]
112 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
113 ret <4 x float> %shuffle
115 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
116 ; ALL-LABEL: @shuffle_v4f32_3210
117 ; ALL: shufps {{.*}} # xmm0 = xmm0[3,2,1,0]
119 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
120 ret <4 x float> %shuffle
123 define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
124 ; SSE2-LABEL: @shuffle_v4i32_0124
125 ; SSE2: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
126 ; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0]
129 ; SSE41-LABEL: @shuffle_v4i32_0124
130 ; SSE41: insertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0]
133 ; AVX1-LABEL: @shuffle_v4i32_0124
134 ; AVX1: vinsertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0]
136 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
137 ret <4 x i32> %shuffle
139 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
140 ; ALL-LABEL: @shuffle_v4i32_0142
141 ; ALL: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
142 ; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,2]
144 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
145 ret <4 x i32> %shuffle
147 define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) {
148 ; SSE2-LABEL: @shuffle_v4i32_0412
149 ; SSE2: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
150 ; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[2,0],xmm0[1,2]
151 ; SSE2-NEXT: movaps %xmm1, %xmm0
154 ; AVX1-LABEL: @shuffle_v4i32_0412
155 ; AVX1: vshufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
156 ; AVX1-NEXT: vshufps {{.*}} # xmm0 = xmm1[2,0],xmm0[1,2]
158 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
159 ret <4 x i32> %shuffle
161 define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
162 ; SSE2-LABEL: @shuffle_v4i32_4012
163 ; SSE2: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
164 ; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2]
165 ; SSE2-NEXT: movaps %xmm1, %xmm0
168 ; AVX1-LABEL: @shuffle_v4i32_4012
169 ; AVX1: vshufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
170 ; AVX1-NEXT: vshufps {{.*}} # xmm0 = xmm1[0,2],xmm0[1,2]
172 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
173 ret <4 x i32> %shuffle
175 define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
176 ; ALL-LABEL: @shuffle_v4i32_0145
177 ; ALL: punpcklqdq {{.*}} # xmm0 = xmm0[0],xmm1[0]
179 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
180 ret <4 x i32> %shuffle
182 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
183 ; ALL-LABEL: @shuffle_v4i32_0451
184 ; ALL: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,1]
185 ; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,3,1]
187 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
188 ret <4 x i32> %shuffle
190 define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
191 ; SSE2-LABEL: @shuffle_v4i32_4501
192 ; SSE2: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm0[0]
193 ; SSE2-NEXT: movdqa %xmm1, %xmm0
196 ; AVX1-LABEL: @shuffle_v4i32_4501
197 ; AVX1: punpcklqdq {{.*}} # xmm0 = xmm1[0],xmm0[0]
199 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
200 ret <4 x i32> %shuffle
202 define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
203 ; ALL-LABEL: @shuffle_v4i32_4015
204 ; ALL: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,1]
205 ; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0,1,3]
207 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
208 ret <4 x i32> %shuffle
211 define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
212 ; SSE2-LABEL: @shuffle_v4f32_4zzz
213 ; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
214 ; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][1,0]
215 ; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X]][2,3]
218 ; SSE41-LABEL: @shuffle_v4f32_4zzz
219 ; SSE41: insertps {{.*}} # xmm0 = xmm0[0],zero,zero,zero
222 ; AVX1-LABEL: @shuffle_v4f32_4zzz
223 ; AVX1: vinsertps {{.*}} # xmm0 = xmm0[0],zero,zero,zero
225 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
226 ret <4 x float> %shuffle
229 define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
230 ; SSE2-LABEL: @shuffle_v4f32_z4zz
231 ; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
232 ; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][2,0]
233 ; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][3,0]
236 ; SSE41-LABEL: @shuffle_v4f32_z4zz
237 ; SSE41: insertps {{.*}} # xmm0 = zero,xmm0[0],zero,zero
240 ; AVX1-LABEL: @shuffle_v4f32_z4zz
241 ; AVX1: vinsertps {{.*}} # xmm0 = zero,xmm0[0],zero,zero
243 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
244 ret <4 x float> %shuffle
247 define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
248 ; SSE2-LABEL: @shuffle_v4f32_zz4z
249 ; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
250 ; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][0,0]
251 ; SSE2-NEXT: shufps {{.*}} # [[X]] = [[X]][0,0],xmm0[0,2]
252 ; SSE2-NEXT: movaps %[[X]], %xmm0
255 ; SSE41-LABEL: @shuffle_v4f32_zz4z
256 ; SSE41: insertps {{.*}} # xmm0 = zero,zero,xmm0[0],zero
259 ; AVX1-LABEL: @shuffle_v4f32_zz4z
260 ; AVX1: vinsertps {{.*}} # xmm0 = zero,zero,xmm0[0],zero
262 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
263 ret <4 x float> %shuffle
266 define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
267 ; SSE2-LABEL: @shuffle_v4f32_zuu4
268 ; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
269 ; SSE2-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0]
270 ; SSE2-NEXT: movaps %[[X]], %xmm0
273 ; SSE41-LABEL: @shuffle_v4f32_zuu4
274 ; SSE41: insertps {{.*}} # xmm0 = zero,zero,zero,xmm0[0]
277 ; AVX1-LABEL: @shuffle_v4f32_zuu4
278 ; AVX1: vinsertps {{.*}} # xmm0 = zero,zero,zero,xmm0[0]
280 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
281 ret <4 x float> %shuffle
284 define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
285 ; SSE2-LABEL: @shuffle_v4f32_zzz7
286 ; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
287 ; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],[[X]][2,0]
288 ; SSE2-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0]
289 ; SSE2-NEXT: movaps %[[X]], %xmm0
292 ; SSE41-LABEL: @shuffle_v4f32_zzz7
293 ; SSE41: insertps {{.*}} # xmm0 = zero,zero,zero,xmm0[3]
296 ; AVX1-LABEL: @shuffle_v4f32_zzz7
297 ; AVX1: vinsertps {{.*}} # xmm0 = zero,zero,zero,xmm0[3]
299 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
300 ret <4 x float> %shuffle
303 define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
304 ; SSE2-LABEL: @shuffle_v4f32_z6zz
305 ; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
306 ; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][0,0]
307 ; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][2,3]
310 ; SSE41-LABEL: @shuffle_v4f32_z6zz
311 ; SSE41: insertps {{.*}} # xmm0 = zero,xmm0[2],zero,zero
314 ; AVX1-LABEL: @shuffle_v4f32_z6zz
315 ; AVX1: vinsertps {{.*}} # xmm0 = zero,xmm0[2],zero,zero
317 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
318 ret <4 x float> %shuffle
321 define <4 x i32> @shuffle_v4i32_4zzz(i32 %i) {
322 ; ALL-LABEL: @shuffle_v4i32_4zzz
323 ; ALL: movd {{.*}}, %xmm0
325 %a = insertelement <4 x i32> undef, i32 %i, i32 0
326 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
327 ret <4 x i32> %shuffle
330 define <4 x i32> @shuffle_v4i32_z4zz(i32 %i) {
331 ; ALL-LABEL: @shuffle_v4i32_z4zz
332 ; ALL: movd {{.*}}, %xmm0
333 ; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,0,1,1]
335 %a = insertelement <4 x i32> undef, i32 %i, i32 0
336 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
337 ret <4 x i32> %shuffle
340 define <4 x i32> @shuffle_v4i32_zz4z(i32 %i) {
341 ; ALL-LABEL: @shuffle_v4i32_zz4z
342 ; ALL: movd {{.*}}, %xmm0
343 ; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,1,0,1]
345 %a = insertelement <4 x i32> undef, i32 %i, i32 0
346 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
347 ret <4 x i32> %shuffle
350 define <4 x i32> @shuffle_v4i32_zuu4(i32 %i) {
351 ; ALL-LABEL: @shuffle_v4i32_zuu4
352 ; ALL: movd {{.*}}, %xmm0
353 ; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,1,1,0]
355 %a = insertelement <4 x i32> undef, i32 %i, i32 0
356 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
357 ret <4 x i32> %shuffle
360 define <4 x i32> @shuffle_v4i32_z6zz(i32 %i) {
361 ; ALL-LABEL: @shuffle_v4i32_z6zz
362 ; ALL: movd {{.*}}, %xmm0
363 ; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,0,1,1]
365 %a = insertelement <4 x i32> undef, i32 %i, i32 2
366 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
367 ret <4 x i32> %shuffle