1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
4 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
7 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align1
81 define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
82 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
83 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
84 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
88 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align2
126 define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
127 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
128 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
129 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
133 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align4
134 ; SI-DAG: DS_READ_B32
135 ; SI-DAG: DS_WRITE_B32
137 ; SI-DAG: DS_READ_B32
138 ; SI-DAG: DS_WRITE_B32
140 ; SI-DAG: DS_READ_B32
141 ; SI-DAG: DS_WRITE_B32
143 ; SI-DAG: DS_READ_B32
144 ; SI-DAG: DS_WRITE_B32
146 ; SI-DAG: DS_READ_B32
147 ; SI-DAG: DS_WRITE_B32
149 ; SI-DAG: DS_READ_B32
150 ; SI-DAG: DS_WRITE_B32
152 ; SI-DAG: DS_READ_B32
153 ; SI-DAG: DS_WRITE_B32
155 ; SI-DAG: DS_READ_B32
156 ; SI-DAG: DS_WRITE_B32
158 ; SI-DAG: DS_READ_B32
159 ; SI-DAG: DS_WRITE_B32
162 define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
163 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
164 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
165 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
169 ; FIXME: Use 64-bit ops
170 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align8
172 ; SI-DAG: DS_READ_B32
173 ; SI-DAG: DS_WRITE_B32
175 ; SI-DAG: DS_READ_B32
176 ; SI-DAG: DS_WRITE_B32
178 ; SI-DAG: DS_READ_B32
179 ; SI-DAG: DS_WRITE_B32
181 ; SI-DAG: DS_READ_B32
182 ; SI-DAG: DS_WRITE_B32
184 ; SI-DAG: DS_READ_B32
185 ; SI-DAG: DS_WRITE_B32
187 ; SI-DAG: DS_READ_B32
188 ; SI-DAG: DS_WRITE_B32
190 ; SI-DAG: DS_READ_B32
191 ; SI-DAG: DS_WRITE_B32
193 ; SI-DAG: DS_READ_B32
194 ; SI-DAG: DS_WRITE_B32
196 ; SI-DAG: DS_READ_B32
197 ; SI-DAG: DS_WRITE_B32
200 define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
201 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
202 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
203 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
207 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align1
208 ; SI-DAG: BUFFER_LOAD_UBYTE
209 ; SI-DAG: BUFFER_STORE_BYTE
210 ; SI-DAG: BUFFER_LOAD_UBYTE
211 ; SI-DAG: BUFFER_STORE_BYTE
212 ; SI-DAG: BUFFER_LOAD_UBYTE
213 ; SI-DAG: BUFFER_STORE_BYTE
214 ; SI-DAG: BUFFER_LOAD_UBYTE
215 ; SI-DAG: BUFFER_STORE_BYTE
216 ; SI-DAG: BUFFER_LOAD_UBYTE
217 ; SI-DAG: BUFFER_STORE_BYTE
218 ; SI-DAG: BUFFER_LOAD_UBYTE
219 ; SI-DAG: BUFFER_STORE_BYTE
220 ; SI-DAG: BUFFER_LOAD_UBYTE
221 ; SI-DAG: BUFFER_STORE_BYTE
222 ; SI-DAG: BUFFER_LOAD_UBYTE
223 ; SI-DAG: BUFFER_STORE_BYTE
225 ; SI-DAG: BUFFER_LOAD_UBYTE
226 ; SI-DAG: BUFFER_STORE_BYTE
227 ; SI-DAG: BUFFER_LOAD_UBYTE
228 ; SI-DAG: BUFFER_STORE_BYTE
229 ; SI-DAG: BUFFER_LOAD_UBYTE
230 ; SI-DAG: BUFFER_STORE_BYTE
231 ; SI-DAG: BUFFER_LOAD_UBYTE
232 ; SI-DAG: BUFFER_STORE_BYTE
233 ; SI-DAG: BUFFER_LOAD_UBYTE
234 ; SI-DAG: BUFFER_STORE_BYTE
235 ; SI-DAG: BUFFER_LOAD_UBYTE
236 ; SI-DAG: BUFFER_STORE_BYTE
237 ; SI-DAG: BUFFER_LOAD_UBYTE
238 ; SI-DAG: BUFFER_STORE_BYTE
239 ; SI-DAG: BUFFER_LOAD_UBYTE
240 ; SI-DAG: BUFFER_STORE_BYTE
242 ; SI-DAG: BUFFER_LOAD_UBYTE
243 ; SI-DAG: BUFFER_STORE_BYTE
244 ; SI-DAG: BUFFER_LOAD_UBYTE
245 ; SI-DAG: BUFFER_STORE_BYTE
246 ; SI-DAG: BUFFER_LOAD_UBYTE
247 ; SI-DAG: BUFFER_STORE_BYTE
248 ; SI-DAG: BUFFER_LOAD_UBYTE
249 ; SI-DAG: BUFFER_STORE_BYTE
250 ; SI-DAG: BUFFER_LOAD_UBYTE
251 ; SI-DAG: BUFFER_STORE_BYTE
252 ; SI-DAG: BUFFER_LOAD_UBYTE
253 ; SI-DAG: BUFFER_STORE_BYTE
254 ; SI-DAG: BUFFER_LOAD_UBYTE
255 ; SI-DAG: BUFFER_STORE_BYTE
256 ; SI-DAG: BUFFER_LOAD_UBYTE
257 ; SI-DAG: BUFFER_STORE_BYTE
259 ; SI-DAG: BUFFER_LOAD_UBYTE
260 ; SI-DAG: BUFFER_STORE_BYTE
261 ; SI-DAG: BUFFER_LOAD_UBYTE
262 ; SI-DAG: BUFFER_STORE_BYTE
263 ; SI-DAG: BUFFER_LOAD_UBYTE
264 ; SI-DAG: BUFFER_STORE_BYTE
265 ; SI-DAG: BUFFER_LOAD_UBYTE
266 ; SI-DAG: BUFFER_STORE_BYTE
267 ; SI-DAG: BUFFER_LOAD_UBYTE
268 ; SI-DAG: BUFFER_STORE_BYTE
269 ; SI-DAG: BUFFER_LOAD_UBYTE
270 ; SI-DAG: BUFFER_STORE_BYTE
271 ; SI-DAG: BUFFER_LOAD_UBYTE
272 ; SI-DAG: BUFFER_STORE_BYTE
273 ; SI-DAG: BUFFER_LOAD_UBYTE
274 ; SI-DAG: BUFFER_STORE_BYTE
277 define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
278 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
279 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
280 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
284 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align2
285 ; SI-DAG: BUFFER_LOAD_USHORT
286 ; SI-DAG: BUFFER_LOAD_USHORT
287 ; SI-DAG: BUFFER_LOAD_USHORT
288 ; SI-DAG: BUFFER_LOAD_USHORT
289 ; SI-DAG: BUFFER_LOAD_USHORT
290 ; SI-DAG: BUFFER_LOAD_USHORT
291 ; SI-DAG: BUFFER_LOAD_USHORT
292 ; SI-DAG: BUFFER_LOAD_USHORT
293 ; SI-DAG: BUFFER_LOAD_USHORT
294 ; SI-DAG: BUFFER_LOAD_USHORT
295 ; SI-DAG: BUFFER_LOAD_USHORT
296 ; SI-DAG: BUFFER_LOAD_USHORT
297 ; SI-DAG: BUFFER_LOAD_USHORT
298 ; SI-DAG: BUFFER_LOAD_USHORT
299 ; SI-DAG: BUFFER_LOAD_USHORT
300 ; SI-DAG: BUFFER_LOAD_USHORT
302 ; SI-DAG: BUFFER_STORE_SHORT
303 ; SI-DAG: BUFFER_STORE_SHORT
304 ; SI-DAG: BUFFER_STORE_SHORT
305 ; SI-DAG: BUFFER_STORE_SHORT
306 ; SI-DAG: BUFFER_STORE_SHORT
307 ; SI-DAG: BUFFER_STORE_SHORT
308 ; SI-DAG: BUFFER_STORE_SHORT
309 ; SI-DAG: BUFFER_STORE_SHORT
310 ; SI-DAG: BUFFER_STORE_SHORT
311 ; SI-DAG: BUFFER_STORE_SHORT
312 ; SI-DAG: BUFFER_STORE_SHORT
313 ; SI-DAG: BUFFER_STORE_SHORT
314 ; SI-DAG: BUFFER_STORE_SHORT
315 ; SI-DAG: BUFFER_STORE_SHORT
316 ; SI-DAG: BUFFER_STORE_SHORT
317 ; SI-DAG: BUFFER_STORE_SHORT
320 define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
321 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
322 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
323 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
327 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align4
328 ; SI: BUFFER_LOAD_DWORDX4
329 ; SI: BUFFER_LOAD_DWORDX4
330 ; SI: BUFFER_STORE_DWORDX4
331 ; SI: BUFFER_STORE_DWORDX4
333 define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
334 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
335 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
336 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
340 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align8
341 ; SI: BUFFER_LOAD_DWORDX4
342 ; SI: BUFFER_LOAD_DWORDX4
343 ; SI: BUFFER_STORE_DWORDX4
344 ; SI: BUFFER_STORE_DWORDX4
346 define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
347 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
348 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
349 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
353 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align16
354 ; SI: BUFFER_LOAD_DWORDX4
355 ; SI: BUFFER_LOAD_DWORDX4
356 ; SI: BUFFER_STORE_DWORDX4
357 ; SI: BUFFER_STORE_DWORDX4
359 define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
360 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
361 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
362 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind