1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
4 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
7 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align1
77 define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
78 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
79 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
80 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
84 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align2
120 define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
121 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
122 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
123 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
127 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align4
128 ; SI-DAG: DS_READ_B32
129 ; SI-DAG: DS_WRITE_B32
131 ; SI-DAG: DS_READ_B32
132 ; SI-DAG: DS_WRITE_B32
134 ; SI-DAG: DS_READ_B32
135 ; SI-DAG: DS_WRITE_B32
137 ; SI-DAG: DS_READ_B32
138 ; SI-DAG: DS_WRITE_B32
140 ; SI-DAG: DS_READ_B32
141 ; SI-DAG: DS_WRITE_B32
143 ; SI-DAG: DS_READ_B32
144 ; SI-DAG: DS_WRITE_B32
146 ; SI-DAG: DS_READ_B32
147 ; SI-DAG: DS_WRITE_B32
149 ; SI-DAG: DS_READ_B32
150 ; SI-DAG: DS_WRITE_B32
152 ; SI-DAG: DS_READ_B32
153 ; SI-DAG: DS_WRITE_B32
156 define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
157 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
158 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
159 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
163 ; FIXME: Use 64-bit ops
164 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align8
166 ; SI-DAG: DS_READ_B32
167 ; SI-DAG: DS_WRITE_B32
169 ; SI-DAG: DS_READ_B32
170 ; SI-DAG: DS_WRITE_B32
172 ; SI-DAG: DS_READ_B32
173 ; SI-DAG: DS_WRITE_B32
175 ; SI-DAG: DS_READ_B32
176 ; SI-DAG: DS_WRITE_B32
178 ; SI-DAG: DS_READ_B32
179 ; SI-DAG: DS_WRITE_B32
181 ; SI-DAG: DS_READ_B32
182 ; SI-DAG: DS_WRITE_B32
184 ; SI-DAG: DS_READ_B32
185 ; SI-DAG: DS_WRITE_B32
187 ; SI-DAG: DS_READ_B32
188 ; SI-DAG: DS_WRITE_B32
190 ; SI-DAG: DS_READ_B32
191 ; SI-DAG: DS_WRITE_B32
194 define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
195 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
196 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
197 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
201 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align1
202 ; SI-DAG: BUFFER_LOAD_UBYTE
203 ; SI-DAG: BUFFER_STORE_BYTE
204 ; SI-DAG: BUFFER_LOAD_UBYTE
205 ; SI-DAG: BUFFER_STORE_BYTE
206 ; SI-DAG: BUFFER_LOAD_UBYTE
207 ; SI-DAG: BUFFER_STORE_BYTE
208 ; SI-DAG: BUFFER_LOAD_UBYTE
209 ; SI-DAG: BUFFER_STORE_BYTE
210 ; SI-DAG: BUFFER_LOAD_UBYTE
211 ; SI-DAG: BUFFER_STORE_BYTE
212 ; SI-DAG: BUFFER_LOAD_UBYTE
213 ; SI-DAG: BUFFER_STORE_BYTE
214 ; SI-DAG: BUFFER_LOAD_UBYTE
215 ; SI-DAG: BUFFER_STORE_BYTE
216 ; SI-DAG: BUFFER_LOAD_UBYTE
217 ; SI-DAG: BUFFER_STORE_BYTE
219 ; SI-DAG: BUFFER_LOAD_UBYTE
220 ; SI-DAG: BUFFER_STORE_BYTE
221 ; SI-DAG: BUFFER_LOAD_UBYTE
222 ; SI-DAG: BUFFER_STORE_BYTE
223 ; SI-DAG: BUFFER_LOAD_UBYTE
224 ; SI-DAG: BUFFER_STORE_BYTE
225 ; SI-DAG: BUFFER_LOAD_UBYTE
226 ; SI-DAG: BUFFER_STORE_BYTE
227 ; SI-DAG: BUFFER_LOAD_UBYTE
228 ; SI-DAG: BUFFER_STORE_BYTE
229 ; SI-DAG: BUFFER_LOAD_UBYTE
230 ; SI-DAG: BUFFER_STORE_BYTE
231 ; SI-DAG: BUFFER_LOAD_UBYTE
232 ; SI-DAG: BUFFER_STORE_BYTE
233 ; SI-DAG: BUFFER_LOAD_UBYTE
234 ; SI-DAG: BUFFER_STORE_BYTE
236 ; SI-DAG: BUFFER_LOAD_UBYTE
237 ; SI-DAG: BUFFER_STORE_BYTE
238 ; SI-DAG: BUFFER_LOAD_UBYTE
239 ; SI-DAG: BUFFER_STORE_BYTE
240 ; SI-DAG: BUFFER_LOAD_UBYTE
241 ; SI-DAG: BUFFER_STORE_BYTE
242 ; SI-DAG: BUFFER_LOAD_UBYTE
243 ; SI-DAG: BUFFER_STORE_BYTE
244 ; SI-DAG: BUFFER_LOAD_UBYTE
245 ; SI-DAG: BUFFER_STORE_BYTE
246 ; SI-DAG: BUFFER_LOAD_UBYTE
247 ; SI-DAG: BUFFER_STORE_BYTE
248 ; SI-DAG: BUFFER_LOAD_UBYTE
249 ; SI-DAG: BUFFER_STORE_BYTE
250 ; SI-DAG: BUFFER_LOAD_UBYTE
251 ; SI-DAG: BUFFER_STORE_BYTE
253 ; SI-DAG: BUFFER_LOAD_UBYTE
254 ; SI-DAG: BUFFER_STORE_BYTE
255 ; SI-DAG: BUFFER_LOAD_UBYTE
256 ; SI-DAG: BUFFER_STORE_BYTE
257 ; SI-DAG: BUFFER_LOAD_UBYTE
258 ; SI-DAG: BUFFER_STORE_BYTE
259 ; SI-DAG: BUFFER_LOAD_UBYTE
260 ; SI-DAG: BUFFER_STORE_BYTE
261 ; SI-DAG: BUFFER_LOAD_UBYTE
262 ; SI-DAG: BUFFER_STORE_BYTE
263 ; SI-DAG: BUFFER_LOAD_UBYTE
264 ; SI-DAG: BUFFER_STORE_BYTE
265 ; SI-DAG: BUFFER_LOAD_UBYTE
266 ; SI-DAG: BUFFER_STORE_BYTE
267 ; SI-DAG: BUFFER_LOAD_UBYTE
268 ; SI-DAG: BUFFER_STORE_BYTE
271 define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
272 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
273 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
274 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
278 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align2
279 ; SI-DAG: BUFFER_LOAD_USHORT
280 ; SI-DAG: BUFFER_STORE_SHORT
281 ; SI-DAG: BUFFER_LOAD_USHORT
282 ; SI-DAG: BUFFER_STORE_SHORT
283 ; SI-DAG: BUFFER_LOAD_USHORT
284 ; SI-DAG: BUFFER_STORE_SHORT
285 ; SI-DAG: BUFFER_LOAD_USHORT
286 ; SI-DAG: BUFFER_STORE_SHORT
287 ; SI-DAG: BUFFER_LOAD_USHORT
288 ; SI-DAG: BUFFER_STORE_SHORT
289 ; SI-DAG: BUFFER_LOAD_USHORT
290 ; SI-DAG: BUFFER_STORE_SHORT
291 ; SI-DAG: BUFFER_LOAD_USHORT
292 ; SI-DAG: BUFFER_STORE_SHORT
293 ; SI-DAG: BUFFER_LOAD_USHORT
294 ; SI-DAG: BUFFER_STORE_SHORT
296 ; SI-DAG: BUFFER_LOAD_USHORT
297 ; SI-DAG: BUFFER_STORE_SHORT
298 ; SI-DAG: BUFFER_LOAD_USHORT
299 ; SI-DAG: BUFFER_STORE_SHORT
300 ; SI-DAG: BUFFER_LOAD_USHORT
301 ; SI-DAG: BUFFER_STORE_SHORT
302 ; SI-DAG: BUFFER_LOAD_USHORT
303 ; SI-DAG: BUFFER_STORE_SHORT
304 ; SI-DAG: BUFFER_LOAD_USHORT
305 ; SI-DAG: BUFFER_STORE_SHORT
306 ; SI-DAG: BUFFER_LOAD_USHORT
307 ; SI-DAG: BUFFER_STORE_SHORT
308 ; SI-DAG: BUFFER_LOAD_USHORT
309 ; SI-DAG: BUFFER_STORE_SHORT
310 ; SI-DAG: BUFFER_LOAD_USHORT
311 ; SI-DAG: BUFFER_STORE_SHORT
314 define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
315 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
316 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
317 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
321 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align4
322 ; SI: BUFFER_LOAD_DWORDX4
323 ; SI: BUFFER_STORE_DWORDX4
324 ; SI: BUFFER_LOAD_DWORDX4
325 ; SI: BUFFER_STORE_DWORDX4
327 define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
328 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
329 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
330 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
334 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align8
335 ; SI: BUFFER_LOAD_DWORDX4
336 ; SI: BUFFER_STORE_DWORDX4
337 ; SI: BUFFER_LOAD_DWORDX4
338 ; SI: BUFFER_STORE_DWORDX4
340 define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
341 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
342 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
343 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
347 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align16
348 ; SI: BUFFER_LOAD_DWORDX4
349 ; SI: BUFFER_STORE_DWORDX4
350 ; SI: BUFFER_LOAD_DWORDX4
351 ; SI: BUFFER_STORE_DWORDX4
353 define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
354 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
355 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
356 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind