1 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
3 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
5 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
6 target triple = "x86_64-apple-macosx10.8.0"
8 define i32 @add(i32 %arg) {
9 ; CHECK-LABEL: for function 'add'
10 ; -- Same size registeres --
11 ;CHECK: cost of 1 {{.*}} zext
12 %A = zext <4 x i1> undef to <4 x i32>
13 ;CHECK: cost of 2 {{.*}} sext
14 %B = sext <4 x i1> undef to <4 x i32>
15 ;CHECK: cost of 0 {{.*}} trunc
16 %C = trunc <4 x i32> undef to <4 x i1>
18 ; -- Different size registers --
19 ;CHECK-NOT: cost of 1 {{.*}} zext
20 %D = zext <8 x i1> undef to <8 x i32>
21 ;CHECK-NOT: cost of 2 {{.*}} sext
22 %E = sext <8 x i1> undef to <8 x i32>
23 ;CHECK-NOT: cost of 2 {{.*}} trunc
24 %F = trunc <8 x i32> undef to <8 x i1>
28 ;CHECK: cost of 1 {{.*}} zext
29 %G = zext i1 undef to i32
30 ;CHECK: cost of 0 {{.*}} trunc
31 %H = trunc i32 undef to i1
33 ;CHECK: cost of 0 {{.*}} ret
37 define i32 @zext_sext(<8 x i1> %in) {
38 ; CHECK-AVX2-LABEL: for function 'zext_sext'
39 ; CHECK-AVX-LABEL: for function 'zext_sext'
40 ;CHECK-AVX2: cost of 3 {{.*}} zext
41 ;CHECK-AVX: cost of 4 {{.*}} zext
42 %Z = zext <8 x i1> %in to <8 x i32>
43 ;CHECK-AVX2: cost of 3 {{.*}} sext
44 ;CHECK-AVX: cost of 7 {{.*}} sext
45 %S = sext <8 x i1> %in to <8 x i32>
47 ;CHECK-AVX2: cost of 1 {{.*}} zext
48 ;CHECK-AVX: cost of 4 {{.*}} zext
49 %A1 = zext <16 x i8> undef to <16 x i16>
50 ;CHECK-AVX2: cost of 1 {{.*}} sext
51 ;CHECK-AVX: cost of 4 {{.*}} sext
52 %A2 = sext <16 x i8> undef to <16 x i16>
53 ;CHECK-AVX2: cost of 1 {{.*}} sext
54 ;CHECK-AVX: cost of 4 {{.*}} sext
55 %A = sext <8 x i16> undef to <8 x i32>
56 ;CHECK-AVX2: cost of 1 {{.*}} zext
57 ;CHECK-AVX: cost of 4 {{.*}} zext
58 %B = zext <8 x i16> undef to <8 x i32>
59 ;CHECK-AVX2: cost of 1 {{.*}} sext
60 ;CHECK-AVX: cost of 4 {{.*}} sext
61 %C = sext <4 x i32> undef to <4 x i64>
63 ;CHECK-AVX2: cost of 3 {{.*}} zext
64 ;CHECK-AVX: cost of 4 {{.*}} zext
65 %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
66 ;CHECK-AVX2: cost of 3 {{.*}} sext
67 ;CHECK-AVX: cost of 7 {{.*}} sext
68 %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
69 ;CHECK-AVX2: cost of 3 {{.*}} zext
70 ;CHECK-AVX: cost of 3 {{.*}} zext
71 %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
72 ;CHECK-AVX2: cost of 3 {{.*}} sext
73 ;CHECK-AVX: cost of 6 {{.*}} sext
74 %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
76 ;CHECK-AVX2: cost of 3 {{.*}} zext
77 ;CHECK-AVX: cost of 4 {{.*}} zext
78 %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
79 ;CHECK-AVX2: cost of 3 {{.*}} sext
80 ;CHECK-AVX: cost of 6 {{.*}} sext
81 %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
83 ;CHECK-AVX2: cost of 1 {{.*}} zext
84 ;CHECK-AVX: cost of 4 {{.*}} zext
85 %D = zext <4 x i32> undef to <4 x i64>
87 ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext
88 %D1 = zext <16 x i32> undef to <16 x i64>
90 ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext
91 %D2 = sext <16 x i32> undef to <16 x i64>
93 ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
94 %D3 = zext <16 x i16> undef to <16 x i32>
95 ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
96 %D4 = zext <16 x i8> undef to <16 x i32>
97 ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
98 %D5 = zext <16 x i1> undef to <16 x i32>
100 ;CHECK-AVX2: cost of 2 {{.*}} trunc
101 ;CHECK-AVX: cost of 4 {{.*}} trunc
102 %E = trunc <4 x i64> undef to <4 x i32>
103 ;CHECK-AVX2: cost of 2 {{.*}} trunc
104 ;CHECK-AVX: cost of 5 {{.*}} trunc
105 %F = trunc <8 x i32> undef to <8 x i16>
106 ;CHECK-AVX2: cost of 4 {{.*}} trunc
107 ;CHECK-AVX: cost of 4 {{.*}} trunc
108 %F1 = trunc <16 x i16> undef to <16 x i8>
109 ;CHECK-AVX2: cost of 2 {{.*}} trunc
110 ;CHECK-AVX: cost of 4 {{.*}} trunc
111 %F2 = trunc <8 x i32> undef to <8 x i8>
112 ;CHECK-AVX2: cost of 2 {{.*}} trunc
113 ;CHECK-AVX: cost of 4 {{.*}} trunc
114 %F3 = trunc <4 x i64> undef to <4 x i8>
116 ;CHECK-AVX2: cost of 4 {{.*}} trunc
117 ;CHECK-AVX: cost of 9 {{.*}} trunc
118 ;CHECK_AVX512: cost of 1 {{.*}} G = trunc
119 %G = trunc <8 x i64> undef to <8 x i32>
121 ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc
122 %G1 = trunc <16 x i64> undef to <16 x i32>
127 define i32 @masks8(<8 x i1> %in) {
128 ; CHECK-AVX2-LABEL: for function 'masks8'
129 ; CHECK-AVX-LABEL: for function 'masks8'
131 ;CHECK-AVX2: cost of 3 {{.*}} zext
132 ;CHECK-AVX: cost of 4 {{.*}} zext
133 %Z = zext <8 x i1> %in to <8 x i32>
134 ;CHECK-AVX2: cost of 3 {{.*}} sext
135 ;CHECK-AVX: cost of 7 {{.*}} sext
136 %S = sext <8 x i1> %in to <8 x i32>
140 define i32 @masks4(<4 x i1> %in) {
141 ; CHECK-AVX2-LABEL: for function 'masks4'
142 ; CHECK-AVX-LABEL: for function 'masks4'
144 ;CHECK-AVX2: cost of 3 {{.*}} zext
145 ;CHECK-AVX: cost of 4 {{.*}} zext
146 %Z = zext <4 x i1> %in to <4 x i64>
147 ;CHECK-AVX2: cost of 3 {{.*}} sext
148 ;CHECK-AVX: cost of 6 {{.*}} sext
149 %S = sext <4 x i1> %in to <4 x i64>
153 define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
154 ; CHECK-LABEL: for function 'sitofp4'
155 ; CHECK: cost of 3 {{.*}} sitofp
156 %A1 = sitofp <4 x i1> %a to <4 x float>
157 ; CHECK: cost of 3 {{.*}} sitofp
158 %A2 = sitofp <4 x i1> %a to <4 x double>
160 ; CHECK: cost of 3 {{.*}} sitofp
161 %B1 = sitofp <4 x i8> %b to <4 x float>
162 ; CHECK: cost of 3 {{.*}} sitofp
163 %B2 = sitofp <4 x i8> %b to <4 x double>
165 ; CHECK: cost of 3 {{.*}} sitofp
166 %C1 = sitofp <4 x i16> %c to <4 x float>
167 ; CHECK: cost of 3 {{.*}} sitofp
168 %C2 = sitofp <4 x i16> %c to <4 x double>
170 ; CHECK: cost of 1 {{.*}} sitofp
171 %D1 = sitofp <4 x i32> %d to <4 x float>
172 ; CHECK: cost of 1 {{.*}} sitofp
173 %D2 = sitofp <4 x i32> %d to <4 x double>
177 define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
178 ; CHECK-LABEL: for function 'sitofp8'
179 ; CHECK: cost of 8 {{.*}} sitofp
180 %A1 = sitofp <8 x i1> %a to <8 x float>
182 ; CHECK: cost of 8 {{.*}} sitofp
183 %B1 = sitofp <8 x i8> %b to <8 x float>
185 ; CHECK: cost of 5 {{.*}} sitofp
186 %C1 = sitofp <8 x i16> %c to <8 x float>
188 ; CHECK: cost of 1 {{.*}} sitofp
189 %D1 = sitofp <8 x i32> %d to <8 x float>
193 define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
194 ; CHECK-LABEL: for function 'uitofp4'
195 ; CHECK: cost of 7 {{.*}} uitofp
196 %A1 = uitofp <4 x i1> %a to <4 x float>
197 ; CHECK: cost of 7 {{.*}} uitofp
198 %A2 = uitofp <4 x i1> %a to <4 x double>
200 ; CHECK: cost of 2 {{.*}} uitofp
201 %B1 = uitofp <4 x i8> %b to <4 x float>
202 ; CHECK: cost of 2 {{.*}} uitofp
203 %B2 = uitofp <4 x i8> %b to <4 x double>
205 ; CHECK: cost of 2 {{.*}} uitofp
206 %C1 = uitofp <4 x i16> %c to <4 x float>
207 ; CHECK: cost of 2 {{.*}} uitofp
208 %C2 = uitofp <4 x i16> %c to <4 x double>
210 ; CHECK: cost of 6 {{.*}} uitofp
211 %D1 = uitofp <4 x i32> %d to <4 x float>
212 ; CHECK: cost of 6 {{.*}} uitofp
213 %D2 = uitofp <4 x i32> %d to <4 x double>
217 define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
218 ; CHECK-LABEL: for function 'uitofp8'
219 ; CHECK: cost of 6 {{.*}} uitofp
220 %A1 = uitofp <8 x i1> %a to <8 x float>
222 ; CHECK: cost of 5 {{.*}} uitofp
223 %B1 = uitofp <8 x i8> %b to <8 x float>
225 ; CHECK: cost of 5 {{.*}} uitofp
226 %C1 = uitofp <8 x i16> %c to <8 x float>
228 ; CHECK-AVX2: cost of 8 {{.*}} uitofp
229 ; CHECK-AVX512: cost of 8 {{.*}} uitofp
230 ; CHECK-AVX: cost of 9 {{.*}} uitofp
231 %D1 = uitofp <8 x i32> %d to <8 x float>
235 define void @fp_conv(<8 x float> %a, <16 x float>%b) {
236 ;CHECK-LABEL: for function 'fp_conv'
237 ; CHECK-AVX512: cost of 1 {{.*}} fpext
238 %A1 = fpext <8 x float> %a to <8 x double>
240 ; CHECK-AVX512: cost of 3 {{.*}} fpext
241 %A2 = fpext <16 x float> %b to <16 x double>
243 ; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext
244 ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
245 %A3 = fpext <8 x float> %a to <8 x double>
247 ; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc
248 ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
249 %A4 = fptrunc <8 x double> undef to <8 x float>
251 ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc
252 %A5 = fptrunc <16 x double> undef to <16 x float>