1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
3 define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
4 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
5 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
13 define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
14 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
15 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
23 define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
24 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
25 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
33 define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
34 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
35 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
38 ; FIXME: this should be lowered as a single movhlps. However, the backend
39 ; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we
40 ; end up with the sub-optimal sequence 'shufps, palignr'.
47 define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
48 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
49 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
58 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
59 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
60 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
68 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
69 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
70 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
78 define <4 x i32> @test8(<4 x i32> %a, <4 x i32> %b) {
79 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
80 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
88 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
89 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
90 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
93 ; FIXME: this should be lowered as a single movhlps. However, the backend thinks that
94 ; shuffle mask [6,7,2,3] is not legal.
101 define <4 x i32> @test10(<4 x i32> %a, <4 x i32> %b) {
102 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
103 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
106 ; CHECK-LABEL: test10
111 define <4 x float> @test11(<4 x float> %a, <4 x float> %b) {
112 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
113 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
116 ; CHECK-LABEL: test11
122 define <4 x float> @test12(<4 x float> %a, <4 x float> %b) {
123 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
124 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
127 ; CHECK-LABEL: test12
132 define <4 x float> @test13(<4 x float> %a, <4 x float> %b) {
133 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
134 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
137 ; CHECK-LABEL: test13
142 define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
143 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
144 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
147 ; FIXME: this should be lowered as a single movhlps. However, the backend
148 ; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we
149 ; end up with the sub-optimal sequence 'pshufd, blendps'.
150 ; CHECK-LABEL: test14
156 define <4 x float> @test15(<4 x float> %a, <4 x float> %b) {
157 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
158 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
161 ; CHECK-LABEL: test15
166 define <4 x i32> @test16(<4 x i32> %a, <4 x i32> %b) {
167 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
168 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
171 ; CHECK-LABEL: test16
177 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
178 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
179 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
182 ; CHECK-LABEL: test17
187 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
188 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
189 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
192 ; CHECK-LABEL: test18
197 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
198 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
199 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
202 ; FIXME: this should be lowered as a single movhlps. However, the backend
203 ; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we
204 ; end up with the sub-optimal sequence 'shufps, palignr'.
205 ; CHECK-LABEL: test19
211 define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b) {
212 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
213 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
216 ; CHECK-LABEL: test20
221 ; Check some negative cases.
222 define <4 x float> @test1b(<4 x float> %a, <4 x float> %b) {
223 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
224 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 0>
227 ; CHECK-LABEL: test1b
232 define <4 x float> @test2b(<4 x float> %a, <4 x float> %b) {
233 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
234 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
237 ; CHECK-LABEL: test2b
242 define <4 x float> @test3b(<4 x float> %a, <4 x float> %b) {
243 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 3>
244 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 7>
247 ; CHECK-LABEL: test3b
252 define <4 x float> @test4b(<4 x float> %a, <4 x float> %b) {
253 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
254 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 5, i32 5, i32 2, i32 7>
257 ; CHECK-LABEL: test4b
263 ; Verify that we correctly fold shuffles even when we use illegal vector types.
264 define <4 x i8> @test1c(<4 x i8>* %a, <4 x i8>* %b) {
265 %A = load <4 x i8>* %a
266 %B = load <4 x i8>* %b
267 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
268 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
271 ; CHECK-LABEL: test1c
276 define <4 x i8> @test2c(<4 x i8>* %a, <4 x i8>* %b) {
277 %A = load <4 x i8>* %a
278 %B = load <4 x i8>* %b
279 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5>
280 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
283 ; CHECK-LABEL: test2c
288 define <4 x i8> @test3c(<4 x i8>* %a, <4 x i8>* %b) {
289 %A = load <4 x i8>* %a
290 %B = load <4 x i8>* %b
291 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
292 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
295 ; FIXME: this should be lowered as a single movhlps. However, the backend
296 ; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we end up
297 ; with a sub-optimal sequence of 'shufps+palignr'.
299 ; CHECK-LABEL: test3c
305 define <4 x i8> @test4c(<4 x i8>* %a, <4 x i8>* %b) {
306 %A = load <4 x i8>* %a
307 %B = load <4 x i8>* %b
308 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
309 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
312 ; CHECK-LABEL: test4c
317 ; The following test cases are generated from this C++ code
319 ;__m128 blend_01(__m128 a, __m128 b)
322 ; s = _mm_blend_ps( s, b, 1<<0 );
323 ; s = _mm_blend_ps( s, b, 1<<1 );
327 ;__m128 blend_02(__m128 a, __m128 b)
330 ; s = _mm_blend_ps( s, b, 1<<0 );
331 ; s = _mm_blend_ps( s, b, 1<<2 );
335 ;__m128 blend_123(__m128 a, __m128 b)
338 ; s = _mm_blend_ps( s, b, 1<<1 );
339 ; s = _mm_blend_ps( s, b, 1<<2 );
340 ; s = _mm_blend_ps( s, b, 1<<3 );
344 ; Ideally, we should collapse the following shuffles into a single one.
346 define <4 x float> @blend_01(<4 x float> %a, <4 x float> %b) {
347 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
348 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
349 ret <4 x float> %shuffle6
351 ; CHECK-LABEL: blend_01
355 define <4 x float> @blend_02(<4 x float> %a, <4 x float> %b) {
356 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 undef, i32 3>
357 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
358 ret <4 x float> %shuffle6
360 ; CHECK-LABEL: blend_02
364 define <4 x float> @blend_123(<4 x float> %a, <4 x float> %b) {
365 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
366 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
367 %shuffle12 = shufflevector <4 x float> %shuffle6, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
368 ret <4 x float> %shuffle12
370 ; CHECK-LABEL: blend_123