1 ; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
2 ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
4 %shifttype = type <2 x i16>
5 define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
8 ; SSE2: cost of 20 {{.*}} ashr
9 ; SSE2-CODEGEN: shift2i16
10 ; SSE2-CODEGEN: sarq %cl
12 %0 = ashr %shifttype %a , %b
16 %shifttype4i16 = type <4 x i16>
17 define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
20 ; SSE2: cost of 40 {{.*}} ashr
21 ; SSE2-CODEGEN: shift4i16
22 ; SSE2-CODEGEN: sarl %cl
24 %0 = ashr %shifttype4i16 %a , %b
28 %shifttype8i16 = type <8 x i16>
29 define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
32 ; SSE2: cost of 80 {{.*}} ashr
33 ; SSE2-CODEGEN: shift8i16
34 ; SSE2-CODEGEN: sarw %cl
36 %0 = ashr %shifttype8i16 %a , %b
40 %shifttype16i16 = type <16 x i16>
41 define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
44 ; SSE2: cost of 160 {{.*}} ashr
45 ; SSE2-CODEGEN: shift16i16
46 ; SSE2-CODEGEN: sarw %cl
48 %0 = ashr %shifttype16i16 %a , %b
49 ret %shifttype16i16 %0
52 %shifttype32i16 = type <32 x i16>
53 define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
56 ; SSE2: cost of 320 {{.*}} ashr
57 ; SSE2-CODEGEN: shift32i16
58 ; SSE2-CODEGEN: sarw %cl
60 %0 = ashr %shifttype32i16 %a , %b
61 ret %shifttype32i16 %0
64 %shifttype2i32 = type <2 x i32>
65 define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
68 ; SSE2: cost of 20 {{.*}} ashr
69 ; SSE2-CODEGEN: shift2i32
70 ; SSE2-CODEGEN: sarq %cl
72 %0 = ashr %shifttype2i32 %a , %b
76 %shifttype4i32 = type <4 x i32>
77 define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
80 ; SSE2: cost of 40 {{.*}} ashr
81 ; SSE2-CODEGEN: shift4i32
82 ; SSE2-CODEGEN: sarl %cl
84 %0 = ashr %shifttype4i32 %a , %b
88 %shifttype8i32 = type <8 x i32>
89 define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
92 ; SSE2: cost of 80 {{.*}} ashr
93 ; SSE2-CODEGEN: shift8i32
94 ; SSE2-CODEGEN: sarl %cl
96 %0 = ashr %shifttype8i32 %a , %b
100 %shifttype16i32 = type <16 x i32>
101 define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
104 ; SSE2: cost of 160 {{.*}} ashr
105 ; SSE2-CODEGEN: shift16i32
106 ; SSE2-CODEGEN: sarl %cl
108 %0 = ashr %shifttype16i32 %a , %b
109 ret %shifttype16i32 %0
112 %shifttype32i32 = type <32 x i32>
113 define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
116 ; SSE2: cost of 256 {{.*}} ashr
117 ; SSE2-CODEGEN: shift32i32
118 ; SSE2-CODEGEN: sarl %cl
120 %0 = ashr %shifttype32i32 %a , %b
121 ret %shifttype32i32 %0
124 %shifttype2i64 = type <2 x i64>
125 define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
128 ; SSE2: cost of 20 {{.*}} ashr
129 ; SSE2-CODEGEN: shift2i64
130 ; SSE2-CODEGEN: sarq %cl
132 %0 = ashr %shifttype2i64 %a , %b
133 ret %shifttype2i64 %0
136 %shifttype4i64 = type <4 x i64>
137 define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
140 ; SSE2: cost of 40 {{.*}} ashr
141 ; SSE2-CODEGEN: shift4i64
142 ; SSE2-CODEGEN: sarq %cl
144 %0 = ashr %shifttype4i64 %a , %b
145 ret %shifttype4i64 %0
148 %shifttype8i64 = type <8 x i64>
149 define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
152 ; SSE2: cost of 80 {{.*}} ashr
153 ; SSE2-CODEGEN: shift8i64
154 ; SSE2-CODEGEN: sarq %cl
156 %0 = ashr %shifttype8i64 %a , %b
157 ret %shifttype8i64 %0
160 %shifttype16i64 = type <16 x i64>
161 define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
164 ; SSE2: cost of 160 {{.*}} ashr
165 ; SSE2-CODEGEN: shift16i64
166 ; SSE2-CODEGEN: sarq %cl
168 %0 = ashr %shifttype16i64 %a , %b
169 ret %shifttype16i64 %0
172 %shifttype32i64 = type <32 x i64>
173 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
176 ; SSE2: cost of 256 {{.*}} ashr
177 ; SSE2-CODEGEN: shift32i64
178 ; SSE2-CODEGEN: sarq %cl
180 %0 = ashr %shifttype32i64 %a , %b
181 ret %shifttype32i64 %0
184 %shifttype2i8 = type <2 x i8>
185 define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
188 ; SSE2: cost of 20 {{.*}} ashr
189 ; SSE2-CODEGEN: shift2i8
190 ; SSE2-CODEGEN: sarq %cl
192 %0 = ashr %shifttype2i8 %a , %b
196 %shifttype4i8 = type <4 x i8>
197 define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
200 ; SSE2: cost of 40 {{.*}} ashr
201 ; SSE2-CODEGEN: shift4i8
202 ; SSE2-CODEGEN: sarl %cl
204 %0 = ashr %shifttype4i8 %a , %b
208 %shifttype8i8 = type <8 x i8>
209 define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
212 ; SSE2: cost of 80 {{.*}} ashr
213 ; SSE2-CODEGEN: shift8i8
214 ; SSE2-CODEGEN: sarw %cl
216 %0 = ashr %shifttype8i8 %a , %b
220 %shifttype16i8 = type <16 x i8>
221 define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
224 ; SSE2: cost of 160 {{.*}} ashr
225 ; SSE2-CODEGEN: shift16i8
226 ; SSE2-CODEGEN: sarb %cl
228 %0 = ashr %shifttype16i8 %a , %b
229 ret %shifttype16i8 %0
232 %shifttype32i8 = type <32 x i8>
233 define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
236 ; SSE2: cost of 320 {{.*}} ashr
237 ; SSE2-CODEGEN: shift32i8
238 ; SSE2-CODEGEN: sarb %cl
240 %0 = ashr %shifttype32i8 %a , %b
241 ret %shifttype32i8 %0
244 ; Test shift by a constant a value.
246 %shifttypec = type <2 x i16>
247 define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
249 ; SSE2: shift2i16const
250 ; SSE2: cost of 20 {{.*}} ashr
251 ; SSE2-CODEGEN: shift2i16const
252 ; SSE2-CODEGEN: sarq $
254 %0 = ashr %shifttypec %a , <i16 3, i16 3>
258 %shifttypec4i16 = type <4 x i16>
259 define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
261 ; SSE2: shift4i16const
262 ; SSE2: cost of 1 {{.*}} ashr
263 ; SSE2-CODEGEN: shift4i16const
264 ; SSE2-CODEGEN: psrad $3
266 %0 = ashr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
267 ret %shifttypec4i16 %0
270 %shifttypec8i16 = type <8 x i16>
271 define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
273 ; SSE2: shift8i16const
274 ; SSE2: cost of 1 {{.*}} ashr
275 ; SSE2-CODEGEN: shift8i16const
276 ; SSE2-CODEGEN: psraw $3
278 %0 = ashr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
279 i16 3, i16 3, i16 3, i16 3>
280 ret %shifttypec8i16 %0
283 %shifttypec16i16 = type <16 x i16>
284 define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
285 %shifttypec16i16 %b) {
287 ; SSE2: shift16i16const
288 ; SSE2: cost of 2 {{.*}} ashr
289 ; SSE2-CODEGEN: shift16i16const
290 ; SSE2-CODEGEN: psraw $3
292 %0 = ashr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
293 i16 3, i16 3, i16 3, i16 3,
294 i16 3, i16 3, i16 3, i16 3,
295 i16 3, i16 3, i16 3, i16 3>
296 ret %shifttypec16i16 %0
299 %shifttypec32i16 = type <32 x i16>
300 define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
301 %shifttypec32i16 %b) {
303 ; SSE2: shift32i16const
304 ; SSE2: cost of 4 {{.*}} ashr
305 ; SSE2-CODEGEN: shift32i16const
306 ; SSE2-CODEGEN: psraw $3
308 %0 = ashr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
309 i16 3, i16 3, i16 3, i16 3,
310 i16 3, i16 3, i16 3, i16 3,
311 i16 3, i16 3, i16 3, i16 3,
312 i16 3, i16 3, i16 3, i16 3,
313 i16 3, i16 3, i16 3, i16 3,
314 i16 3, i16 3, i16 3, i16 3,
315 i16 3, i16 3, i16 3, i16 3>
316 ret %shifttypec32i16 %0
319 %shifttypec2i32 = type <2 x i32>
320 define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
323 ; SSE2: cost of 20 {{.*}} ashr
324 ; SSE2-CODEGEN: shift2i32c
325 ; SSE2-CODEGEN: sarq $3
327 %0 = ashr %shifttypec2i32 %a , <i32 3, i32 3>
328 ret %shifttypec2i32 %0
331 %shifttypec4i32 = type <4 x i32>
332 define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
335 ; SSE2: cost of 1 {{.*}} ashr
336 ; SSE2-CODEGEN: shift4i32c
337 ; SSE2-CODEGEN: psrad $3
339 %0 = ashr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
340 ret %shifttypec4i32 %0
343 %shifttypec8i32 = type <8 x i32>
344 define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
347 ; SSE2: cost of 2 {{.*}} ashr
348 ; SSE2-CODEGEN: shift8i32c
349 ; SSE2-CODEGEN: psrad $3
351 %0 = ashr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
352 i32 3, i32 3, i32 3, i32 3>
353 ret %shifttypec8i32 %0
356 %shifttypec16i32 = type <16 x i32>
357 define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
360 ; SSE2: cost of 4 {{.*}} ashr
361 ; SSE2-CODEGEN: shift16i32c
362 ; SSE2-CODEGEN: psrad $3
364 %0 = ashr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
365 i32 3, i32 3, i32 3, i32 3,
366 i32 3, i32 3, i32 3, i32 3,
367 i32 3, i32 3, i32 3, i32 3>
368 ret %shifttypec16i32 %0
371 %shifttypec32i32 = type <32 x i32>
372 define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
375 ; getTypeConversion fails here and promotes this to a i64.
376 ; SSE2: cost of 256 {{.*}} ashr
377 ; SSE2-CODEGEN: shift32i32c
378 ; SSE2-CODEGEN: psrad $3
379 %0 = ashr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
380 i32 3, i32 3, i32 3, i32 3,
381 i32 3, i32 3, i32 3, i32 3,
382 i32 3, i32 3, i32 3, i32 3,
383 i32 3, i32 3, i32 3, i32 3,
384 i32 3, i32 3, i32 3, i32 3,
385 i32 3, i32 3, i32 3, i32 3,
386 i32 3, i32 3, i32 3, i32 3>
387 ret %shifttypec32i32 %0
390 %shifttypec2i64 = type <2 x i64>
391 define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
394 ; SSE2: cost of 20 {{.*}} ashr
395 ; SSE2-CODEGEN: shift2i64c
396 ; SSE2-CODEGEN: sarq $3
398 %0 = ashr %shifttypec2i64 %a , <i64 3, i64 3>
399 ret %shifttypec2i64 %0
402 %shifttypec4i64 = type <4 x i64>
403 define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
406 ; SSE2: cost of 40 {{.*}} ashr
407 ; SSE2-CODEGEN: shift4i64c
408 ; SSE2-CODEGEN: sarq $3
410 %0 = ashr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
411 ret %shifttypec4i64 %0
414 %shifttypec8i64 = type <8 x i64>
415 define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
418 ; SSE2: cost of 80 {{.*}} ashr
419 ; SSE2-CODEGEN: shift8i64c
420 ; SSE2-CODEGEN: sarq $3
422 %0 = ashr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
423 i64 3, i64 3, i64 3, i64 3>
424 ret %shifttypec8i64 %0
427 %shifttypec16i64 = type <16 x i64>
428 define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
431 ; SSE2: cost of 160 {{.*}} ashr
432 ; SSE2-CODEGEN: shift16i64c
433 ; SSE2-CODEGEN: sarq $3
435 %0 = ashr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
436 i64 3, i64 3, i64 3, i64 3,
437 i64 3, i64 3, i64 3, i64 3,
438 i64 3, i64 3, i64 3, i64 3>
439 ret %shifttypec16i64 %0
442 %shifttypec32i64 = type <32 x i64>
443 define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
446 ; SSE2: cost of 256 {{.*}} ashr
447 ; SSE2-CODEGEN: shift32i64c
448 ; SSE2-CODEGEN: sarq $3
450 %0 = ashr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
451 i64 3, i64 3, i64 3, i64 3,
452 i64 3, i64 3, i64 3, i64 3,
453 i64 3, i64 3, i64 3, i64 3,
454 i64 3, i64 3, i64 3, i64 3,
455 i64 3, i64 3, i64 3, i64 3,
456 i64 3, i64 3, i64 3, i64 3,
457 i64 3, i64 3, i64 3, i64 3>
458 ret %shifttypec32i64 %0
461 %shifttypec2i8 = type <2 x i8>
462 define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
465 ; SSE2: cost of 20 {{.*}} ashr
466 ; SSE2-CODEGEN: shift2i8c
467 ; SSE2-CODEGEN: sarq $3
469 %0 = ashr %shifttypec2i8 %a , <i8 3, i8 3>
470 ret %shifttypec2i8 %0
473 %shifttypec4i8 = type <4 x i8>
474 define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
477 ; SSE2: cost of 1 {{.*}} ashr
478 ; SSE2-CODEGEN: shift4i8c
479 ; SSE2-CODEGEN: psrad $3
481 %0 = ashr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
482 ret %shifttypec4i8 %0
485 %shifttypec8i8 = type <8 x i8>
486 define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
489 ; SSE2: cost of 1 {{.*}} ashr
490 ; SSE2-CODEGEN: shift8i8c
491 ; SSE2-CODEGEN: psraw $3
493 %0 = ashr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
494 i8 3, i8 3, i8 3, i8 3>
495 ret %shifttypec8i8 %0
498 %shifttypec16i8 = type <16 x i8>
499 define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
502 ; SSE2: cost of 4 {{.*}} ashr
503 ; SSE2-CODEGEN: shift16i8c
504 ; SSE2-CODEGEN: psrlw $3
506 %0 = ashr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
507 i8 3, i8 3, i8 3, i8 3,
508 i8 3, i8 3, i8 3, i8 3,
509 i8 3, i8 3, i8 3, i8 3>
510 ret %shifttypec16i8 %0
513 %shifttypec32i8 = type <32 x i8>
514 define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
517 ; SSE2: cost of 8 {{.*}} ashr
518 ; SSE2-CODEGEN: shift32i8c
519 ; SSE2-CODEGEN: psrlw $3
521 %0 = ashr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
522 i8 3, i8 3, i8 3, i8 3,
523 i8 3, i8 3, i8 3, i8 3,
524 i8 3, i8 3, i8 3, i8 3,
525 i8 3, i8 3, i8 3, i8 3,
526 i8 3, i8 3, i8 3, i8 3,
527 i8 3, i8 3, i8 3, i8 3,
528 i8 3, i8 3, i8 3, i8 3>
529 ret %shifttypec32i8 %0