AMDGPU: Add pass to detect used kernel features
[oota-llvm.git] / test / CodeGen / AMDGPU / llvm.memcpy.ll
1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3
4 declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
5 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
6
7
8 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
9 ; SI: ds_read_u8
10 ; SI: ds_read_u8
11 ; SI: ds_read_u8
12 ; SI: ds_read_u8
13 ; SI: ds_read_u8
14 ; SI: ds_read_u8
15 ; SI: ds_read_u8
16 ; SI: ds_read_u8
17
18 ; SI: ds_read_u8
19 ; SI: ds_read_u8
20 ; SI: ds_read_u8
21 ; SI: ds_read_u8
22 ; SI: ds_read_u8
23 ; SI: ds_read_u8
24 ; SI: ds_read_u8
25 ; SI: ds_read_u8
26
27 ; SI: ds_read_u8
28 ; SI: ds_read_u8
29 ; SI: ds_read_u8
30 ; SI: ds_read_u8
31 ; SI: ds_read_u8
32 ; SI: ds_read_u8
33 ; SI: ds_read_u8
34 ; SI: ds_read_u8
35
36 ; SI: ds_read_u8
37 ; SI: ds_read_u8
38 ; SI: ds_read_u8
39 ; SI: ds_read_u8
40 ; SI: ds_read_u8
41 ; SI: ds_read_u8
42 ; SI: ds_read_u8
43 ; SI: ds_read_u8
44
45 ; SI: ds_write_b8
46 ; SI: ds_write_b8
47 ; SI: ds_write_b8
48 ; SI: ds_write_b8
49 ; SI: ds_write_b8
50 ; SI: ds_write_b8
51 ; SI: ds_write_b8
52 ; SI: ds_write_b8
53
54 ; SI: ds_write_b8
55 ; SI: ds_write_b8
56 ; SI: ds_write_b8
57 ; SI: ds_write_b8
58 ; SI: ds_write_b8
59 ; SI: ds_write_b8
60 ; SI: ds_write_b8
61 ; SI: ds_write_b8
62
63 ; SI: ds_write_b8
64 ; SI: ds_write_b8
65 ; SI: ds_write_b8
66 ; SI: ds_write_b8
67 ; SI: ds_write_b8
68 ; SI: ds_write_b8
69 ; SI: ds_write_b8
70 ; SI: ds_write_b8
71
72 ; SI: ds_write_b8
73 ; SI: ds_write_b8
74 ; SI: ds_write_b8
75 ; SI: ds_write_b8
76 ; SI: ds_write_b8
77 ; SI: ds_write_b8
78 ; SI: ds_write_b8
79 ; SI: ds_write_b8
80
81 ; SI: s_endpgm
82 define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
83   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
84   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
85   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
86   ret void
87 }
88
89 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
90 ; SI: ds_read_u16
91 ; SI: ds_read_u16
92 ; SI: ds_read_u16
93 ; SI: ds_read_u16
94 ; SI: ds_read_u16
95 ; SI: ds_read_u16
96 ; SI: ds_read_u16
97 ; SI: ds_read_u16
98
99 ; SI: ds_read_u16
100 ; SI: ds_read_u16
101 ; SI: ds_read_u16
102 ; SI: ds_read_u16
103 ; SI: ds_read_u16
104 ; SI: ds_read_u16
105 ; SI: ds_read_u16
106 ; SI: ds_read_u16
107
108 ; SI: ds_write_b16
109 ; SI: ds_write_b16
110 ; SI: ds_write_b16
111 ; SI: ds_write_b16
112 ; SI: ds_write_b16
113 ; SI: ds_write_b16
114 ; SI: ds_write_b16
115 ; SI: ds_write_b16
116
117 ; SI: ds_write_b16
118 ; SI: ds_write_b16
119 ; SI: ds_write_b16
120 ; SI: ds_write_b16
121 ; SI: ds_write_b16
122 ; SI: ds_write_b16
123 ; SI: ds_write_b16
124 ; SI: ds_write_b16
125
126 ; SI: s_endpgm
127 define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
128   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
129   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
130   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
131   ret void
132 }
133
134 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
135 ; SI-DAG: ds_read_b32
136 ; SI-DAG: ds_write_b32
137
138 ; SI-DAG: ds_read_b32
139 ; SI-DAG: ds_write_b32
140
141 ; SI-DAG: ds_read_b32
142 ; SI-DAG: ds_write_b32
143
144 ; SI-DAG: ds_read_b32
145 ; SI-DAG: ds_write_b32
146
147 ; SI-DAG: ds_read_b32
148 ; SI-DAG: ds_write_b32
149
150 ; SI-DAG: ds_read_b32
151 ; SI-DAG: ds_write_b32
152
153 ; SI-DAG: ds_read_b32
154 ; SI-DAG: ds_write_b32
155
156 ; SI-DAG: ds_read_b32
157 ; SI-DAG: ds_write_b32
158
159 ; SI-DAG: ds_read_b32
160 ; SI-DAG: ds_write_b32
161
162 ; SI: s_endpgm
163 define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
164   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
165   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
166   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
167   ret void
168 }
169
170 ; FIXME: Use 64-bit ops
171 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
172
173 ; SI-DAG: ds_read_b32
174 ; SI-DAG: ds_write_b32
175
176 ; SI-DAG: ds_read_b32
177 ; SI-DAG: ds_write_b32
178
179 ; SI-DAG: ds_read_b32
180 ; SI-DAG: ds_write_b32
181
182 ; SI-DAG: ds_read_b32
183 ; SI-DAG: ds_write_b32
184
185 ; SI-DAG: ds_read_b32
186 ; SI-DAG: ds_write_b32
187
188 ; SI-DAG: ds_read_b32
189 ; SI-DAG: ds_write_b32
190
191 ; SI-DAG: ds_read_b32
192 ; SI-DAG: ds_write_b32
193
194 ; SI-DAG: ds_read_b32
195 ; SI-DAG: ds_write_b32
196
197 ; SI-DAG: ds_read_b32
198 ; SI-DAG: ds_write_b32
199
200 ; SI-DAG: s_endpgm
201 define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
202   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
203   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
204   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
205   ret void
206 }
207
208 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
209 ; SI-DAG: buffer_load_ubyte
210 ; SI-DAG: buffer_store_byte
211 ; SI-DAG: buffer_load_ubyte
212 ; SI-DAG: buffer_store_byte
213 ; SI-DAG: buffer_load_ubyte
214 ; SI-DAG: buffer_store_byte
215 ; SI-DAG: buffer_load_ubyte
216 ; SI-DAG: buffer_store_byte
217 ; SI-DAG: buffer_load_ubyte
218 ; SI-DAG: buffer_store_byte
219 ; SI-DAG: buffer_load_ubyte
220 ; SI-DAG: buffer_store_byte
221 ; SI-DAG: buffer_load_ubyte
222 ; SI-DAG: buffer_store_byte
223 ; SI-DAG: buffer_load_ubyte
224 ; SI-DAG: buffer_store_byte
225
226 ; SI-DAG: buffer_load_ubyte
227 ; SI-DAG: buffer_store_byte
228 ; SI-DAG: buffer_load_ubyte
229 ; SI-DAG: buffer_store_byte
230 ; SI-DAG: buffer_load_ubyte
231 ; SI-DAG: buffer_store_byte
232 ; SI-DAG: buffer_load_ubyte
233 ; SI-DAG: buffer_store_byte
234 ; SI-DAG: buffer_load_ubyte
235 ; SI-DAG: buffer_store_byte
236 ; SI-DAG: buffer_load_ubyte
237 ; SI-DAG: buffer_store_byte
238 ; SI-DAG: buffer_load_ubyte
239 ; SI-DAG: buffer_store_byte
240 ; SI-DAG: buffer_load_ubyte
241 ; SI-DAG: buffer_store_byte
242
243 ; SI-DAG: buffer_load_ubyte
244 ; SI-DAG: buffer_store_byte
245 ; SI-DAG: buffer_load_ubyte
246 ; SI-DAG: buffer_store_byte
247 ; SI-DAG: buffer_load_ubyte
248 ; SI-DAG: buffer_store_byte
249 ; SI-DAG: buffer_load_ubyte
250 ; SI-DAG: buffer_store_byte
251 ; SI-DAG: buffer_load_ubyte
252 ; SI-DAG: buffer_store_byte
253 ; SI-DAG: buffer_load_ubyte
254 ; SI-DAG: buffer_store_byte
255 ; SI-DAG: buffer_load_ubyte
256 ; SI-DAG: buffer_store_byte
257 ; SI-DAG: buffer_load_ubyte
258 ; SI-DAG: buffer_store_byte
259
260 ; SI-DAG: buffer_load_ubyte
261 ; SI-DAG: buffer_store_byte
262 ; SI-DAG: buffer_load_ubyte
263 ; SI-DAG: buffer_store_byte
264 ; SI-DAG: buffer_load_ubyte
265 ; SI-DAG: buffer_store_byte
266 ; SI-DAG: buffer_load_ubyte
267 ; SI-DAG: buffer_store_byte
268 ; SI-DAG: buffer_load_ubyte
269 ; SI-DAG: buffer_store_byte
270 ; SI-DAG: buffer_load_ubyte
271 ; SI-DAG: buffer_store_byte
272 ; SI-DAG: buffer_load_ubyte
273 ; SI-DAG: buffer_store_byte
274 ; SI-DAG: buffer_load_ubyte
275 ; SI-DAG: buffer_store_byte
276
277 ; SI: s_endpgm
278 define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
279   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
280   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
281   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
282   ret void
283 }
284
285 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
286 ; SI-DAG: buffer_load_ushort
287 ; SI-DAG: buffer_load_ushort
288 ; SI-DAG: buffer_load_ushort
289 ; SI-DAG: buffer_load_ushort
290 ; SI-DAG: buffer_load_ushort
291 ; SI-DAG: buffer_load_ushort
292 ; SI-DAG: buffer_load_ushort
293 ; SI-DAG: buffer_load_ushort
294 ; SI-DAG: buffer_load_ushort
295 ; SI-DAG: buffer_load_ushort
296 ; SI-DAG: buffer_load_ushort
297 ; SI-DAG: buffer_load_ushort
298 ; SI-DAG: buffer_load_ushort
299 ; SI-DAG: buffer_load_ushort
300 ; SI-DAG: buffer_load_ushort
301 ; SI-DAG: buffer_load_ushort
302
303 ; SI-DAG: buffer_store_short
304 ; SI-DAG: buffer_store_short
305 ; SI-DAG: buffer_store_short
306 ; SI-DAG: buffer_store_short
307 ; SI-DAG: buffer_store_short
308 ; SI-DAG: buffer_store_short
309 ; SI-DAG: buffer_store_short
310 ; SI-DAG: buffer_store_short
311 ; SI-DAG: buffer_store_short
312 ; SI-DAG: buffer_store_short
313 ; SI-DAG: buffer_store_short
314 ; SI-DAG: buffer_store_short
315 ; SI-DAG: buffer_store_short
316 ; SI-DAG: buffer_store_short
317 ; SI-DAG: buffer_store_short
318 ; SI-DAG: buffer_store_short
319
320 ; SI: s_endpgm
321 define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
322   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
323   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
324   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
325   ret void
326 }
327
328 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
329 ; SI: buffer_load_dwordx4
330 ; SI: buffer_load_dwordx4
331 ; SI: buffer_store_dwordx4
332 ; SI: buffer_store_dwordx4
333 ; SI: s_endpgm
334 define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
335   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
336   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
337   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
338   ret void
339 }
340
341 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
342 ; SI: buffer_load_dwordx4
343 ; SI: buffer_load_dwordx4
344 ; SI: buffer_store_dwordx4
345 ; SI: buffer_store_dwordx4
346 ; SI: s_endpgm
347 define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
348   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
349   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
350   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
351   ret void
352 }
353
354 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
355 ; SI: buffer_load_dwordx4
356 ; SI: buffer_load_dwordx4
357 ; SI: buffer_store_dwordx4
358 ; SI: buffer_store_dwordx4
359 ; SI: s_endpgm
360 define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
361   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
362   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
363   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
364   ret void
365 }