R600/SI: Update concat_vectors.ll to check for scratch usage
[oota-llvm.git] / test / CodeGen / R600 / llvm.memcpy.ll
1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
3 declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
4 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
5
6
7 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align1
8 ; SI: DS_READ_U8
9 ; SI: DS_WRITE_B8
10 ; SI: DS_READ_U8
11 ; SI: DS_WRITE_B8
12 ; SI: DS_READ_U8
13 ; SI: DS_WRITE_B8
14 ; SI: DS_READ_U8
15 ; SI: DS_WRITE_B8
16 ; SI: DS_READ_U8
17 ; SI: DS_WRITE_B8
18
19 ; SI: DS_READ_U8
20 ; SI: DS_WRITE_B8
21 ; SI: DS_READ_U8
22 ; SI: DS_WRITE_B8
23 ; SI: DS_READ_U8
24 ; SI: DS_WRITE_B8
25 ; SI: DS_READ_U8
26 ; SI: DS_WRITE_B8
27 ; SI: DS_READ_U8
28 ; SI: DS_WRITE_B8
29
30 ; SI: DS_READ_U8
31 ; SI: DS_WRITE_B8
32 ; SI: DS_READ_U8
33 ; SI: DS_WRITE_B8
34 ; SI: DS_READ_U8
35 ; SI: DS_WRITE_B8
36 ; SI: DS_READ_U8
37 ; SI: DS_WRITE_B8
38 ; SI: DS_READ_U8
39 ; SI: DS_READ_U8
40
41
42 ; SI: DS_READ_U8
43 ; SI: DS_READ_U8
44 ; SI: DS_READ_U8
45 ; SI: DS_READ_U8
46 ; SI: DS_READ_U8
47 ; SI: DS_READ_U8
48 ; SI: DS_READ_U8
49 ; SI: DS_READ_U8
50
51 ; SI: DS_READ_U8
52 ; SI: DS_READ_U8
53 ; SI: DS_READ_U8
54 ; SI: DS_READ_U8
55 ; SI: DS_READ_U8
56 ; SI: DS_READ_U8
57 ; SI: DS_READ_U8
58 ; SI: DS_READ_U8
59
60 ; SI: DS_WRITE_B8
61 ; SI: DS_WRITE_B8
62 ; SI: DS_WRITE_B8
63 ; SI: DS_WRITE_B8
64 ; SI: DS_WRITE_B8
65 ; SI: DS_WRITE_B8
66 ; SI: DS_WRITE_B8
67 ; SI: DS_WRITE_B8
68 ; SI: DS_WRITE_B8
69
70 ; SI: DS_WRITE_B8
71 ; SI: DS_WRITE_B8
72 ; SI: DS_WRITE_B8
73 ; SI: DS_WRITE_B8
74 ; SI: DS_WRITE_B8
75 ; SI: DS_WRITE_B8
76 ; SI: DS_WRITE_B8
77 ; SI: DS_WRITE_B8
78 ; SI: DS_WRITE_B8
79
80 ; SI: S_ENDPGM
81 define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
82   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
83   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
84   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
85   ret void
86 }
87
88 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align2
89 ; SI: DS_READ_U16
90 ; SI: DS_READ_U16
91 ; SI: DS_READ_U16
92 ; SI: DS_READ_U16
93 ; SI: DS_READ_U16
94 ; SI: DS_READ_U16
95 ; SI: DS_READ_U16
96 ; SI: DS_READ_U16
97
98 ; SI: DS_READ_U16
99 ; SI: DS_READ_U16
100 ; SI: DS_READ_U16
101 ; SI: DS_READ_U16
102 ; SI: DS_READ_U16
103 ; SI: DS_READ_U16
104 ; SI: DS_READ_U16
105 ; SI: DS_READ_U16
106
107 ; SI: DS_WRITE_B16
108 ; SI: DS_WRITE_B16
109 ; SI: DS_WRITE_B16
110 ; SI: DS_WRITE_B16
111 ; SI: DS_WRITE_B16
112 ; SI: DS_WRITE_B16
113 ; SI: DS_WRITE_B16
114 ; SI: DS_WRITE_B16
115
116 ; SI: DS_WRITE_B16
117 ; SI: DS_WRITE_B16
118 ; SI: DS_WRITE_B16
119 ; SI: DS_WRITE_B16
120 ; SI: DS_WRITE_B16
121 ; SI: DS_WRITE_B16
122 ; SI: DS_WRITE_B16
123 ; SI: DS_WRITE_B16
124
125 ; SI: S_ENDPGM
126 define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
127   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
128   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
129   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
130   ret void
131 }
132
133 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align4
134 ; SI-DAG: DS_READ_B32
135 ; SI-DAG: DS_WRITE_B32
136
137 ; SI-DAG: DS_READ_B32
138 ; SI-DAG: DS_WRITE_B32
139
140 ; SI-DAG: DS_READ_B32
141 ; SI-DAG: DS_WRITE_B32
142
143 ; SI-DAG: DS_READ_B32
144 ; SI-DAG: DS_WRITE_B32
145
146 ; SI-DAG: DS_READ_B32
147 ; SI-DAG: DS_WRITE_B32
148
149 ; SI-DAG: DS_READ_B32
150 ; SI-DAG: DS_WRITE_B32
151
152 ; SI-DAG: DS_READ_B32
153 ; SI-DAG: DS_WRITE_B32
154
155 ; SI-DAG: DS_READ_B32
156 ; SI-DAG: DS_WRITE_B32
157
158 ; SI-DAG: DS_READ_B32
159 ; SI-DAG: DS_WRITE_B32
160
161 ; SI: S_ENDPGM
162 define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
163   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
164   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
165   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
166   ret void
167 }
168
169 ; FIXME: Use 64-bit ops
170 ; FUNC-LABEL: @test_small_memcpy_i64_lds_to_lds_align8
171
172 ; SI-DAG: DS_READ_B32
173 ; SI-DAG: DS_WRITE_B32
174
175 ; SI-DAG: DS_READ_B32
176 ; SI-DAG: DS_WRITE_B32
177
178 ; SI-DAG: DS_READ_B32
179 ; SI-DAG: DS_WRITE_B32
180
181 ; SI-DAG: DS_READ_B32
182 ; SI-DAG: DS_WRITE_B32
183
184 ; SI-DAG: DS_READ_B32
185 ; SI-DAG: DS_WRITE_B32
186
187 ; SI-DAG: DS_READ_B32
188 ; SI-DAG: DS_WRITE_B32
189
190 ; SI-DAG: DS_READ_B32
191 ; SI-DAG: DS_WRITE_B32
192
193 ; SI-DAG: DS_READ_B32
194 ; SI-DAG: DS_WRITE_B32
195
196 ; SI-DAG: DS_READ_B32
197 ; SI-DAG: DS_WRITE_B32
198
199 ; SI-DAG: S_ENDPGM
200 define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
201   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
202   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
203   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
204   ret void
205 }
206
207 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align1
208 ; SI-DAG: BUFFER_LOAD_UBYTE
209 ; SI-DAG: BUFFER_STORE_BYTE
210 ; SI-DAG: BUFFER_LOAD_UBYTE
211 ; SI-DAG: BUFFER_STORE_BYTE
212 ; SI-DAG: BUFFER_LOAD_UBYTE
213 ; SI-DAG: BUFFER_STORE_BYTE
214 ; SI-DAG: BUFFER_LOAD_UBYTE
215 ; SI-DAG: BUFFER_STORE_BYTE
216 ; SI-DAG: BUFFER_LOAD_UBYTE
217 ; SI-DAG: BUFFER_STORE_BYTE
218 ; SI-DAG: BUFFER_LOAD_UBYTE
219 ; SI-DAG: BUFFER_STORE_BYTE
220 ; SI-DAG: BUFFER_LOAD_UBYTE
221 ; SI-DAG: BUFFER_STORE_BYTE
222 ; SI-DAG: BUFFER_LOAD_UBYTE
223 ; SI-DAG: BUFFER_STORE_BYTE
224
225 ; SI-DAG: BUFFER_LOAD_UBYTE
226 ; SI-DAG: BUFFER_STORE_BYTE
227 ; SI-DAG: BUFFER_LOAD_UBYTE
228 ; SI-DAG: BUFFER_STORE_BYTE
229 ; SI-DAG: BUFFER_LOAD_UBYTE
230 ; SI-DAG: BUFFER_STORE_BYTE
231 ; SI-DAG: BUFFER_LOAD_UBYTE
232 ; SI-DAG: BUFFER_STORE_BYTE
233 ; SI-DAG: BUFFER_LOAD_UBYTE
234 ; SI-DAG: BUFFER_STORE_BYTE
235 ; SI-DAG: BUFFER_LOAD_UBYTE
236 ; SI-DAG: BUFFER_STORE_BYTE
237 ; SI-DAG: BUFFER_LOAD_UBYTE
238 ; SI-DAG: BUFFER_STORE_BYTE
239 ; SI-DAG: BUFFER_LOAD_UBYTE
240 ; SI-DAG: BUFFER_STORE_BYTE
241
242 ; SI-DAG: BUFFER_LOAD_UBYTE
243 ; SI-DAG: BUFFER_STORE_BYTE
244 ; SI-DAG: BUFFER_LOAD_UBYTE
245 ; SI-DAG: BUFFER_STORE_BYTE
246 ; SI-DAG: BUFFER_LOAD_UBYTE
247 ; SI-DAG: BUFFER_STORE_BYTE
248 ; SI-DAG: BUFFER_LOAD_UBYTE
249 ; SI-DAG: BUFFER_STORE_BYTE
250 ; SI-DAG: BUFFER_LOAD_UBYTE
251 ; SI-DAG: BUFFER_STORE_BYTE
252 ; SI-DAG: BUFFER_LOAD_UBYTE
253 ; SI-DAG: BUFFER_STORE_BYTE
254 ; SI-DAG: BUFFER_LOAD_UBYTE
255 ; SI-DAG: BUFFER_STORE_BYTE
256 ; SI-DAG: BUFFER_LOAD_UBYTE
257 ; SI-DAG: BUFFER_STORE_BYTE
258
259 ; SI-DAG: BUFFER_LOAD_UBYTE
260 ; SI-DAG: BUFFER_STORE_BYTE
261 ; SI-DAG: BUFFER_LOAD_UBYTE
262 ; SI-DAG: BUFFER_STORE_BYTE
263 ; SI-DAG: BUFFER_LOAD_UBYTE
264 ; SI-DAG: BUFFER_STORE_BYTE
265 ; SI-DAG: BUFFER_LOAD_UBYTE
266 ; SI-DAG: BUFFER_STORE_BYTE
267 ; SI-DAG: BUFFER_LOAD_UBYTE
268 ; SI-DAG: BUFFER_STORE_BYTE
269 ; SI-DAG: BUFFER_LOAD_UBYTE
270 ; SI-DAG: BUFFER_STORE_BYTE
271 ; SI-DAG: BUFFER_LOAD_UBYTE
272 ; SI-DAG: BUFFER_STORE_BYTE
273 ; SI-DAG: BUFFER_LOAD_UBYTE
274 ; SI-DAG: BUFFER_STORE_BYTE
275
276 ; SI: S_ENDPGM
277 define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
278   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
279   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
280   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
281   ret void
282 }
283
284 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align2
285 ; SI-DAG: BUFFER_LOAD_USHORT
286 ; SI-DAG: BUFFER_LOAD_USHORT
287 ; SI-DAG: BUFFER_LOAD_USHORT
288 ; SI-DAG: BUFFER_LOAD_USHORT
289 ; SI-DAG: BUFFER_LOAD_USHORT
290 ; SI-DAG: BUFFER_LOAD_USHORT
291 ; SI-DAG: BUFFER_LOAD_USHORT
292 ; SI-DAG: BUFFER_LOAD_USHORT
293 ; SI-DAG: BUFFER_LOAD_USHORT
294 ; SI-DAG: BUFFER_LOAD_USHORT
295 ; SI-DAG: BUFFER_LOAD_USHORT
296 ; SI-DAG: BUFFER_LOAD_USHORT
297 ; SI-DAG: BUFFER_LOAD_USHORT
298 ; SI-DAG: BUFFER_LOAD_USHORT
299 ; SI-DAG: BUFFER_LOAD_USHORT
300 ; SI-DAG: BUFFER_LOAD_USHORT
301
302 ; SI-DAG: BUFFER_STORE_SHORT
303 ; SI-DAG: BUFFER_STORE_SHORT
304 ; SI-DAG: BUFFER_STORE_SHORT
305 ; SI-DAG: BUFFER_STORE_SHORT
306 ; SI-DAG: BUFFER_STORE_SHORT
307 ; SI-DAG: BUFFER_STORE_SHORT
308 ; SI-DAG: BUFFER_STORE_SHORT
309 ; SI-DAG: BUFFER_STORE_SHORT
310 ; SI-DAG: BUFFER_STORE_SHORT
311 ; SI-DAG: BUFFER_STORE_SHORT
312 ; SI-DAG: BUFFER_STORE_SHORT
313 ; SI-DAG: BUFFER_STORE_SHORT
314 ; SI-DAG: BUFFER_STORE_SHORT
315 ; SI-DAG: BUFFER_STORE_SHORT
316 ; SI-DAG: BUFFER_STORE_SHORT
317 ; SI-DAG: BUFFER_STORE_SHORT
318
319 ; SI: S_ENDPGM
320 define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
321   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
322   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
323   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
324   ret void
325 }
326
327 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align4
328 ; SI: BUFFER_LOAD_DWORDX4
329 ; SI: BUFFER_LOAD_DWORDX4
330 ; SI: BUFFER_STORE_DWORDX4
331 ; SI: BUFFER_STORE_DWORDX4
332 ; SI: S_ENDPGM
333 define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
334   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
335   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
336   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
337   ret void
338 }
339
340 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align8
341 ; SI: BUFFER_LOAD_DWORDX4
342 ; SI: BUFFER_LOAD_DWORDX4
343 ; SI: BUFFER_STORE_DWORDX4
344 ; SI: BUFFER_STORE_DWORDX4
345 ; SI: S_ENDPGM
346 define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
347   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
348   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
349   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
350   ret void
351 }
352
353 ; FUNC-LABEL: @test_small_memcpy_i64_global_to_global_align16
354 ; SI: BUFFER_LOAD_DWORDX4
355 ; SI: BUFFER_LOAD_DWORDX4
356 ; SI: BUFFER_STORE_DWORDX4
357 ; SI: BUFFER_STORE_DWORDX4
358 ; SI: S_ENDPGM
359 define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
360   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
361   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
362   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
363   ret void
364 }