1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
4 declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
5 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
8 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
82 define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
83 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
84 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
85 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
89 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
127 define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
128 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
129 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
130 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
134 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
146 define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
147 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
148 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
149 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
153 ; FIXME: Use 64-bit ops
154 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
167 define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
168 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
169 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
170 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
174 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
175 ; SI-DAG: buffer_load_ubyte
176 ; SI-DAG: buffer_store_byte
177 ; SI-DAG: buffer_load_ubyte
178 ; SI-DAG: buffer_store_byte
179 ; SI-DAG: buffer_load_ubyte
180 ; SI-DAG: buffer_store_byte
181 ; SI-DAG: buffer_load_ubyte
182 ; SI-DAG: buffer_store_byte
183 ; SI-DAG: buffer_load_ubyte
184 ; SI-DAG: buffer_store_byte
185 ; SI-DAG: buffer_load_ubyte
186 ; SI-DAG: buffer_store_byte
187 ; SI-DAG: buffer_load_ubyte
188 ; SI-DAG: buffer_store_byte
189 ; SI-DAG: buffer_load_ubyte
190 ; SI-DAG: buffer_store_byte
192 ; SI-DAG: buffer_load_ubyte
193 ; SI-DAG: buffer_store_byte
194 ; SI-DAG: buffer_load_ubyte
195 ; SI-DAG: buffer_store_byte
196 ; SI-DAG: buffer_load_ubyte
197 ; SI-DAG: buffer_store_byte
198 ; SI-DAG: buffer_load_ubyte
199 ; SI-DAG: buffer_store_byte
200 ; SI-DAG: buffer_load_ubyte
201 ; SI-DAG: buffer_store_byte
202 ; SI-DAG: buffer_load_ubyte
203 ; SI-DAG: buffer_store_byte
204 ; SI-DAG: buffer_load_ubyte
205 ; SI-DAG: buffer_store_byte
206 ; SI-DAG: buffer_load_ubyte
207 ; SI-DAG: buffer_store_byte
209 ; SI-DAG: buffer_load_ubyte
210 ; SI-DAG: buffer_store_byte
211 ; SI-DAG: buffer_load_ubyte
212 ; SI-DAG: buffer_store_byte
213 ; SI-DAG: buffer_load_ubyte
214 ; SI-DAG: buffer_store_byte
215 ; SI-DAG: buffer_load_ubyte
216 ; SI-DAG: buffer_store_byte
217 ; SI-DAG: buffer_load_ubyte
218 ; SI-DAG: buffer_store_byte
219 ; SI-DAG: buffer_load_ubyte
220 ; SI-DAG: buffer_store_byte
221 ; SI-DAG: buffer_load_ubyte
222 ; SI-DAG: buffer_store_byte
223 ; SI-DAG: buffer_load_ubyte
224 ; SI-DAG: buffer_store_byte
226 ; SI-DAG: buffer_load_ubyte
227 ; SI-DAG: buffer_store_byte
228 ; SI-DAG: buffer_load_ubyte
229 ; SI-DAG: buffer_store_byte
230 ; SI-DAG: buffer_load_ubyte
231 ; SI-DAG: buffer_store_byte
232 ; SI-DAG: buffer_load_ubyte
233 ; SI-DAG: buffer_store_byte
234 ; SI-DAG: buffer_load_ubyte
235 ; SI-DAG: buffer_store_byte
236 ; SI-DAG: buffer_load_ubyte
237 ; SI-DAG: buffer_store_byte
238 ; SI-DAG: buffer_load_ubyte
239 ; SI-DAG: buffer_store_byte
240 ; SI-DAG: buffer_load_ubyte
241 ; SI-DAG: buffer_store_byte
244 define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
245 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
246 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
247 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
251 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
252 ; SI-DAG: buffer_load_ushort
253 ; SI-DAG: buffer_load_ushort
254 ; SI-DAG: buffer_load_ushort
255 ; SI-DAG: buffer_load_ushort
256 ; SI-DAG: buffer_load_ushort
257 ; SI-DAG: buffer_load_ushort
258 ; SI-DAG: buffer_load_ushort
259 ; SI-DAG: buffer_load_ushort
260 ; SI-DAG: buffer_load_ushort
261 ; SI-DAG: buffer_load_ushort
262 ; SI-DAG: buffer_load_ushort
263 ; SI-DAG: buffer_load_ushort
264 ; SI-DAG: buffer_load_ushort
265 ; SI-DAG: buffer_load_ushort
266 ; SI-DAG: buffer_load_ushort
267 ; SI-DAG: buffer_load_ushort
269 ; SI-DAG: buffer_store_short
270 ; SI-DAG: buffer_store_short
271 ; SI-DAG: buffer_store_short
272 ; SI-DAG: buffer_store_short
273 ; SI-DAG: buffer_store_short
274 ; SI-DAG: buffer_store_short
275 ; SI-DAG: buffer_store_short
276 ; SI-DAG: buffer_store_short
277 ; SI-DAG: buffer_store_short
278 ; SI-DAG: buffer_store_short
279 ; SI-DAG: buffer_store_short
280 ; SI-DAG: buffer_store_short
281 ; SI-DAG: buffer_store_short
282 ; SI-DAG: buffer_store_short
283 ; SI-DAG: buffer_store_short
284 ; SI-DAG: buffer_store_short
287 define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
288 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
289 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
290 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
294 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
295 ; SI: buffer_load_dwordx4
296 ; SI: buffer_load_dwordx4
297 ; SI: buffer_store_dwordx4
298 ; SI: buffer_store_dwordx4
300 define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
301 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
302 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
303 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
307 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
308 ; SI: buffer_load_dwordx4
309 ; SI: buffer_load_dwordx4
310 ; SI: buffer_store_dwordx4
311 ; SI: buffer_store_dwordx4
313 define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
314 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
315 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
316 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
320 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
321 ; SI: buffer_load_dwordx4
322 ; SI: buffer_load_dwordx4
323 ; SI: buffer_store_dwordx4
324 ; SI: buffer_store_dwordx4
326 define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
327 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
328 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
329 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind