1 ; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
2 ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
4 %shifttype = type <2 x i16>
5 define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
8 ; SSE2: cost of 20 {{.*}} lshr
9 ; SSE2-CODEGEN: shift2i16
10 ; SSE2-CODEGEN: shrq %cl
12 %0 = lshr %shifttype %a , %b
16 %shifttype4i16 = type <4 x i16>
17 define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
20 ; SSE2: cost of 40 {{.*}} lshr
21 ; SSE2-CODEGEN: shift4i16
22 ; SSE2-CODEGEN: shrl %cl
24 %0 = lshr %shifttype4i16 %a , %b
28 %shifttype8i16 = type <8 x i16>
29 define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
32 ; SSE2: cost of 80 {{.*}} lshr
33 ; SSE2-CODEGEN: shift8i16
34 ; SSE2-CODEGEN: shrl %cl
36 %0 = lshr %shifttype8i16 %a , %b
40 %shifttype16i16 = type <16 x i16>
41 define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
44 ; SSE2: cost of 160 {{.*}} lshr
45 ; SSE2-CODEGEN: shift16i16
46 ; SSE2-CODEGEN: shrl %cl
48 %0 = lshr %shifttype16i16 %a , %b
49 ret %shifttype16i16 %0
52 %shifttype32i16 = type <32 x i16>
53 define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
56 ; SSE2: cost of 320 {{.*}} lshr
57 ; SSE2-CODEGEN: shift32i16
58 ; SSE2-CODEGEN: shrl %cl
60 %0 = lshr %shifttype32i16 %a , %b
61 ret %shifttype32i16 %0
64 %shifttype2i32 = type <2 x i32>
65 define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
68 ; SSE2: cost of 20 {{.*}} lshr
69 ; SSE2-CODEGEN: shift2i32
70 ; SSE2-CODEGEN: shrq %cl
72 %0 = lshr %shifttype2i32 %a , %b
76 %shifttype4i32 = type <4 x i32>
77 define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
80 ; SSE2: cost of 40 {{.*}} lshr
81 ; SSE2-CODEGEN: shift4i32
82 ; SSE2-CODEGEN: shrl %cl
84 %0 = lshr %shifttype4i32 %a , %b
88 %shifttype8i32 = type <8 x i32>
89 define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
92 ; SSE2: cost of 80 {{.*}} lshr
93 ; SSE2-CODEGEN: shift8i32
94 ; SSE2-CODEGEN: shrl %cl
96 %0 = lshr %shifttype8i32 %a , %b
100 %shifttype16i32 = type <16 x i32>
101 define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
104 ; SSE2: cost of 160 {{.*}} lshr
105 ; SSE2-CODEGEN: shift16i32
106 ; SSE2-CODEGEN: shrl %cl
108 %0 = lshr %shifttype16i32 %a , %b
109 ret %shifttype16i32 %0
112 %shifttype32i32 = type <32 x i32>
113 define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
116 ; SSE2: cost of 256 {{.*}} lshr
117 ; SSE2-CODEGEN: shift32i32
118 ; SSE2-CODEGEN: shrl %cl
120 %0 = lshr %shifttype32i32 %a , %b
121 ret %shifttype32i32 %0
124 %shifttype2i64 = type <2 x i64>
125 define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
128 ; SSE2: cost of 20 {{.*}} lshr
129 ; SSE2-CODEGEN: shift2i64
130 ; SSE2-CODEGEN: shrq %cl
132 %0 = lshr %shifttype2i64 %a , %b
133 ret %shifttype2i64 %0
136 %shifttype4i64 = type <4 x i64>
137 define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
140 ; SSE2: cost of 40 {{.*}} lshr
141 ; SSE2-CODEGEN: shift4i64
142 ; SSE2-CODEGEN: shrq %cl
144 %0 = lshr %shifttype4i64 %a , %b
145 ret %shifttype4i64 %0
148 %shifttype8i64 = type <8 x i64>
149 define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
152 ; SSE2: cost of 80 {{.*}} lshr
153 ; SSE2-CODEGEN: shift8i64
154 ; SSE2-CODEGEN: shrq %cl
156 %0 = lshr %shifttype8i64 %a , %b
157 ret %shifttype8i64 %0
160 %shifttype16i64 = type <16 x i64>
161 define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
164 ; SSE2: cost of 160 {{.*}} lshr
165 ; SSE2-CODEGEN: shift16i64
166 ; SSE2-CODEGEN: shrq %cl
168 %0 = lshr %shifttype16i64 %a , %b
169 ret %shifttype16i64 %0
172 %shifttype32i64 = type <32 x i64>
173 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
176 ; SSE2: cost of 256 {{.*}} lshr
177 ; SSE2-CODEGEN: shift32i64
178 ; SSE2-CODEGEN: shrq %cl
180 %0 = lshr %shifttype32i64 %a , %b
181 ret %shifttype32i64 %0
184 %shifttype2i8 = type <2 x i8>
185 define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
188 ; SSE2: cost of 20 {{.*}} lshr
189 ; SSE2-CODEGEN: shift2i8
190 ; SSE2-CODEGEN: shrq %cl
192 %0 = lshr %shifttype2i8 %a , %b
196 %shifttype4i8 = type <4 x i8>
197 define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
200 ; SSE2: cost of 40 {{.*}} lshr
201 ; SSE2-CODEGEN: shift4i8
202 ; SSE2-CODEGEN: shrl %cl
204 %0 = lshr %shifttype4i8 %a , %b
208 %shifttype8i8 = type <8 x i8>
209 define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
212 ; SSE2: cost of 80 {{.*}} lshr
213 ; SSE2-CODEGEN: shift8i8
214 ; SSE2-CODEGEN: shrl %cl
216 %0 = lshr %shifttype8i8 %a , %b
220 %shifttype16i8 = type <16 x i8>
221 define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
224 ; SSE2: cost of 160 {{.*}} lshr
225 ; SSE2-CODEGEN: shift16i8
226 ; SSE2-CODEGEN: shrb %cl
228 %0 = lshr %shifttype16i8 %a , %b
229 ret %shifttype16i8 %0
232 %shifttype32i8 = type <32 x i8>
233 define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
236 ; SSE2: cost of 320 {{.*}} lshr
237 ; SSE2-CODEGEN: shift32i8
238 ; SSE2-CODEGEN: shrb %cl
240 %0 = lshr %shifttype32i8 %a , %b
241 ret %shifttype32i8 %0