1 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
2 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
4 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
6 ; FUNC-LABEL: @mova_same_clause
8 ; R600-CHECK: LDS_WRITE
9 ; R600-CHECK: LDS_WRITE
10 ; R600-CHECK: LDS_READ
11 ; R600-CHECK: LDS_READ
13 ; SI-CHECK: DS_WRITE_B32
14 ; SI-CHECK: DS_WRITE_B32
15 ; SI-CHECK: DS_READ_B32
16 ; SI-CHECK: DS_READ_B32
17 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
19 %stack = alloca [5 x i32], align 4
20 %0 = load i32 addrspace(1)* %in, align 4
21 %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
22 store i32 4, i32* %arrayidx1, align 4
23 %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
24 %1 = load i32 addrspace(1)* %arrayidx2, align 4
25 %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
26 store i32 5, i32* %arrayidx3, align 4
27 %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
28 %2 = load i32* %arrayidx10, align 4
29 store i32 %2, i32 addrspace(1)* %out, align 4
30 %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
31 %3 = load i32* %arrayidx12
32 %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
33 store i32 %3, i32 addrspace(1)* %arrayidx13
37 ; This test checks that the stack offset is calculated correctly for structs.
38 ; All register loads/stores should be optimized away, so there shouldn't be
39 ; any MOVA instructions.
41 ; XXX: This generated code has unnecessary MOVs, we should be able to optimize
44 ; FUNC-LABEL: @multiple_structs
45 ; R600-CHECK-NOT: MOVA_INT
46 ; SI-CHECK-NOT: V_MOVREL
47 %struct.point = type { i32, i32 }
49 define void @multiple_structs(i32 addrspace(1)* %out) {
51 %a = alloca %struct.point
52 %b = alloca %struct.point
53 %a.x.ptr = getelementptr %struct.point* %a, i32 0, i32 0
54 %a.y.ptr = getelementptr %struct.point* %a, i32 0, i32 1
55 %b.x.ptr = getelementptr %struct.point* %b, i32 0, i32 0
56 %b.y.ptr = getelementptr %struct.point* %b, i32 0, i32 1
57 store i32 0, i32* %a.x.ptr
58 store i32 1, i32* %a.y.ptr
59 store i32 2, i32* %b.x.ptr
60 store i32 3, i32* %b.y.ptr
61 %a.indirect.ptr = getelementptr %struct.point* %a, i32 0, i32 0
62 %b.indirect.ptr = getelementptr %struct.point* %b, i32 0, i32 0
63 %a.indirect = load i32* %a.indirect.ptr
64 %b.indirect = load i32* %b.indirect.ptr
65 %0 = add i32 %a.indirect, %b.indirect
66 store i32 %0, i32 addrspace(1)* %out
70 ; Test direct access of a private array inside a loop. The private array
71 ; loads and stores should be lowered to copies, so there shouldn't be any
74 ; FUNC-LABEL: @direct_loop
75 ; R600-CHECK-NOT: MOVA_INT
76 ; SI-CHECK-NOT: V_MOVREL
78 define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
80 %prv_array_const = alloca [2 x i32]
81 %prv_array = alloca [2 x i32]
82 %a = load i32 addrspace(1)* %in
83 %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
84 %b = load i32 addrspace(1)* %b_src_ptr
85 %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
86 store i32 %a, i32* %a_dst_ptr
87 %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
88 store i32 %b, i32* %b_dst_ptr
92 %inc = phi i32 [0, %entry], [%count, %for.body]
93 %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
95 %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
98 store i32 %xy, i32* %y_ptr
99 %count = add i32 %inc, 1
100 %done = icmp eq i32 %count, 4095
101 br i1 %done, label %for.end, label %for.body
104 %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
105 %value = load i32* %value_ptr
106 store i32 %value, i32 addrspace(1)* %out
110 ; FUNC-LABEL: @short_array
112 ; R600-CHECK: MOVA_INT
114 ; SI-CHECK: V_MOVRELS_B32_e32
115 define void @short_array(i32 addrspace(1)* %out, i32 %index) {
117 %0 = alloca [2 x i16]
118 %1 = getelementptr [2 x i16]* %0, i32 0, i32 0
119 %2 = getelementptr [2 x i16]* %0, i32 0, i32 1
122 %3 = getelementptr [2 x i16]* %0, i32 0, i32 %index
124 %5 = sext i16 %4 to i32
125 store i32 %5, i32 addrspace(1)* %out
129 ; FUNC-LABEL: @char_array
131 ; R600-CHECK: MOVA_INT
133 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100
134 ; SI-CHECK: V_MOVRELS_B32_e32
135 define void @char_array(i32 addrspace(1)* %out, i32 %index) {
138 %1 = getelementptr [2 x i8]* %0, i32 0, i32 0
139 %2 = getelementptr [2 x i8]* %0, i32 0, i32 1
142 %3 = getelementptr [2 x i8]* %0, i32 0, i32 %index
144 %5 = sext i8 %4 to i32
145 store i32 %5, i32 addrspace(1)* %out
150 ; Make sure we don't overwrite workitem information with private memory
152 ; FUNC-LABEL: @work_item_info
153 ; R600-CHECK-NOT: MOV T0.X
154 ; Additional check in case the move ends up in the last slot
155 ; R600-CHECK-NOT: MOV * TO.X
157 ; SI-CHECK-NOT: V_MOV_B32_e{{(32|64)}} v0
158 define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
160 %0 = alloca [2 x i32]
161 %1 = getelementptr [2 x i32]* %0, i32 0, i32 0
162 %2 = getelementptr [2 x i32]* %0, i32 0, i32 1
165 %3 = getelementptr [2 x i32]* %0, i32 0, i32 %in
167 %5 = call i32 @llvm.r600.read.tidig.x()
169 store i32 %6, i32 addrspace(1)* %out
173 ; Test that two stack objects are not stored in the same register
174 ; The second stack object should be in T3.X
175 ; FUNC-LABEL: @no_overlap
177 ; R600_CHECK: [[CHAN:[XYZW]]]+
178 ; R600-CHECK-NOT: [[CHAN]]+
179 ; SI-CHECK: V_MOV_B32_e32 v3
180 define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
182 %0 = alloca [3 x i8], align 1
183 %1 = alloca [2 x i8], align 1
184 %2 = getelementptr [3 x i8]* %0, i32 0, i32 0
185 %3 = getelementptr [3 x i8]* %0, i32 0, i32 1
186 %4 = getelementptr [3 x i8]* %0, i32 0, i32 2
187 %5 = getelementptr [2 x i8]* %1, i32 0, i32 0
188 %6 = getelementptr [2 x i8]* %1, i32 0, i32 1
194 %7 = getelementptr [3 x i8]* %0, i32 0, i32 %in
195 %8 = getelementptr [2 x i8]* %1, i32 0, i32 %in
199 %12 = sext i8 %11 to i32
200 store i32 %12, i32 addrspace(1)* %out
204 define void @char_array_array(i32 addrspace(1)* %out, i32 %index) {
206 %alloca = alloca [2 x [2 x i8]]
207 %gep0 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
208 %gep1 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
209 store i8 0, i8* %gep0
210 store i8 1, i8* %gep1
211 %gep2 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
212 %load = load i8* %gep2
213 %sext = sext i8 %load to i32
214 store i32 %sext, i32 addrspace(1)* %out
218 define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) {
220 %alloca = alloca [2 x [2 x i32]]
221 %gep0 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
222 %gep1 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
223 store i32 0, i32* %gep0
224 store i32 1, i32* %gep1
225 %gep2 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
226 %load = load i32* %gep2
227 store i32 %load, i32 addrspace(1)* %out
231 define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) {
233 %alloca = alloca [2 x [2 x i64]]
234 %gep0 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
235 %gep1 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
236 store i64 0, i64* %gep0
237 store i64 1, i64* %gep1
238 %gep2 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
239 %load = load i64* %gep2
240 store i64 %load, i64 addrspace(1)* %out
244 %struct.pair32 = type { i32, i32 }
246 define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) {
248 %alloca = alloca [2 x [2 x %struct.pair32]]
249 %gep0 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
250 %gep1 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
251 store i32 0, i32* %gep0
252 store i32 1, i32* %gep1
253 %gep2 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
254 %load = load i32* %gep2
255 store i32 %load, i32 addrspace(1)* %out
259 define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) {
261 %alloca = alloca [2 x %struct.pair32]
262 %gep0 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
263 %gep1 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
264 store i32 0, i32* %gep0
265 store i32 1, i32* %gep1
266 %gep2 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
267 %load = load i32* %gep2
268 store i32 %load, i32 addrspace(1)* %out
272 define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
274 %tmp = alloca [2 x i32]
275 %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
276 %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
277 store i32 0, i32* %tmp1
278 store i32 1, i32* %tmp2
279 %cmp = icmp eq i32 %in, 0
280 %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
281 %load = load i32* %sel
282 store i32 %load, i32 addrspace(1)* %out