1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
3 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
4 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
6 ;===------------------------------------------------------------------------===;
8 ;===------------------------------------------------------------------------===;
10 ; Load an i8 value from the global address space.
11 ; FUNC-LABEL: {{^}}load_i8:
12 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
14 ; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
15 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
16 %1 = load i8 addrspace(1)* %in
17 %2 = zext i8 %1 to i32
18 store i32 %2, i32 addrspace(1)* %out
22 ; FUNC-LABEL: {{^}}load_i8_sext:
23 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
24 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
26 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
28 ; SI-CHECK: buffer_load_sbyte
29 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
31 %0 = load i8 addrspace(1)* %in
32 %1 = sext i8 %0 to i32
33 store i32 %1, i32 addrspace(1)* %out
37 ; FUNC-LABEL: {{^}}load_v2i8:
38 ; R600-CHECK: VTX_READ_8
39 ; R600-CHECK: VTX_READ_8
40 ; SI-CHECK: buffer_load_ubyte
41 ; SI-CHECK: buffer_load_ubyte
42 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
44 %0 = load <2 x i8> addrspace(1)* %in
45 %1 = zext <2 x i8> %0 to <2 x i32>
46 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
50 ; FUNC-LABEL: {{^}}load_v2i8_sext:
51 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
52 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
53 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
55 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
57 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
59 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
61 ; SI-CHECK: buffer_load_sbyte
62 ; SI-CHECK: buffer_load_sbyte
63 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
65 %0 = load <2 x i8> addrspace(1)* %in
66 %1 = sext <2 x i8> %0 to <2 x i32>
67 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
71 ; FUNC-LABEL: {{^}}load_v4i8:
72 ; R600-CHECK: VTX_READ_8
73 ; R600-CHECK: VTX_READ_8
74 ; R600-CHECK: VTX_READ_8
75 ; R600-CHECK: VTX_READ_8
76 ; SI-CHECK: buffer_load_ubyte
77 ; SI-CHECK: buffer_load_ubyte
78 ; SI-CHECK: buffer_load_ubyte
79 ; SI-CHECK: buffer_load_ubyte
80 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
82 %0 = load <4 x i8> addrspace(1)* %in
83 %1 = zext <4 x i8> %0 to <4 x i32>
84 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
88 ; FUNC-LABEL: {{^}}load_v4i8_sext:
89 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
90 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
91 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
92 ; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
93 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
95 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
97 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
99 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
101 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
103 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
105 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
107 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
109 ; SI-CHECK: buffer_load_sbyte
110 ; SI-CHECK: buffer_load_sbyte
111 ; SI-CHECK: buffer_load_sbyte
112 ; SI-CHECK: buffer_load_sbyte
113 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
115 %0 = load <4 x i8> addrspace(1)* %in
116 %1 = sext <4 x i8> %0 to <4 x i32>
117 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
121 ; Load an i16 value from the global address space.
122 ; FUNC-LABEL: {{^}}load_i16:
123 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
124 ; SI-CHECK: buffer_load_ushort
125 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
127 %0 = load i16 addrspace(1)* %in
128 %1 = zext i16 %0 to i32
129 store i32 %1, i32 addrspace(1)* %out
133 ; FUNC-LABEL: {{^}}load_i16_sext:
134 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
135 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
137 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
139 ; SI-CHECK: buffer_load_sshort
140 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
142 %0 = load i16 addrspace(1)* %in
143 %1 = sext i16 %0 to i32
144 store i32 %1, i32 addrspace(1)* %out
148 ; FUNC-LABEL: {{^}}load_v2i16:
149 ; R600-CHECK: VTX_READ_16
150 ; R600-CHECK: VTX_READ_16
151 ; SI-CHECK: buffer_load_ushort
152 ; SI-CHECK: buffer_load_ushort
153 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
155 %0 = load <2 x i16> addrspace(1)* %in
156 %1 = zext <2 x i16> %0 to <2 x i32>
157 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
161 ; FUNC-LABEL: {{^}}load_v2i16_sext:
162 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
163 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
164 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
166 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
168 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
170 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
172 ; SI-CHECK: buffer_load_sshort
173 ; SI-CHECK: buffer_load_sshort
174 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
176 %0 = load <2 x i16> addrspace(1)* %in
177 %1 = sext <2 x i16> %0 to <2 x i32>
178 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
182 ; FUNC-LABEL: {{^}}load_v4i16:
183 ; R600-CHECK: VTX_READ_16
184 ; R600-CHECK: VTX_READ_16
185 ; R600-CHECK: VTX_READ_16
186 ; R600-CHECK: VTX_READ_16
187 ; SI-CHECK: buffer_load_ushort
188 ; SI-CHECK: buffer_load_ushort
189 ; SI-CHECK: buffer_load_ushort
190 ; SI-CHECK: buffer_load_ushort
191 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
193 %0 = load <4 x i16> addrspace(1)* %in
194 %1 = zext <4 x i16> %0 to <4 x i32>
195 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
199 ; FUNC-LABEL: {{^}}load_v4i16_sext:
200 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
201 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
202 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
203 ; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
204 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
206 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
208 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
210 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
212 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
214 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
216 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
218 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
220 ; SI-CHECK: buffer_load_sshort
221 ; SI-CHECK: buffer_load_sshort
222 ; SI-CHECK: buffer_load_sshort
223 ; SI-CHECK: buffer_load_sshort
224 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
226 %0 = load <4 x i16> addrspace(1)* %in
227 %1 = sext <4 x i16> %0 to <4 x i32>
228 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
232 ; load an i32 value from the global address space.
233 ; FUNC-LABEL: {{^}}load_i32:
234 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
236 ; SI-CHECK: buffer_load_dword v{{[0-9]+}}
237 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
239 %0 = load i32 addrspace(1)* %in
240 store i32 %0, i32 addrspace(1)* %out
244 ; load a f32 value from the global address space.
245 ; FUNC-LABEL: {{^}}load_f32:
246 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
248 ; SI-CHECK: buffer_load_dword v{{[0-9]+}}
249 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
251 %0 = load float addrspace(1)* %in
252 store float %0, float addrspace(1)* %out
256 ; load a v2f32 value from the global address space
257 ; FUNC-LABEL: {{^}}load_v2f32:
258 ; R600-CHECK: MEM_RAT
259 ; R600-CHECK: VTX_READ_64
260 ; SI-CHECK: buffer_load_dwordx2
261 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
263 %0 = load <2 x float> addrspace(1)* %in
264 store <2 x float> %0, <2 x float> addrspace(1)* %out
268 ; FUNC-LABEL: {{^}}load_i64:
269 ; R600-CHECK: VTX_READ_64
270 ; SI-CHECK: buffer_load_dwordx2
271 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
273 %0 = load i64 addrspace(1)* %in
274 store i64 %0, i64 addrspace(1)* %out
278 ; FUNC-LABEL: {{^}}load_i64_sext:
279 ; R600-CHECK: MEM_RAT
280 ; R600-CHECK: MEM_RAT
281 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
283 ; SI-CHECK: buffer_load_dword
285 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
287 %0 = load i32 addrspace(1)* %in
288 %1 = sext i32 %0 to i64
289 store i64 %1, i64 addrspace(1)* %out
293 ; FUNC-LABEL: {{^}}load_i64_zext:
294 ; R600-CHECK: MEM_RAT
295 ; R600-CHECK: MEM_RAT
296 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
298 %0 = load i32 addrspace(1)* %in
299 %1 = zext i32 %0 to i64
300 store i64 %1, i64 addrspace(1)* %out
304 ; FUNC-LABEL: {{^}}load_v8i32:
305 ; R600-CHECK: VTX_READ_128
306 ; R600-CHECK: VTX_READ_128
307 ; XXX: We should be using DWORDX4 instructions on SI.
308 ; SI-CHECK: buffer_load_dword
309 ; SI-CHECK: buffer_load_dword
310 ; SI-CHECK: buffer_load_dword
311 ; SI-CHECK: buffer_load_dword
312 ; SI-CHECK: buffer_load_dword
313 ; SI-CHECK: buffer_load_dword
314 ; SI-CHECK: buffer_load_dword
315 ; SI-CHECK: buffer_load_dword
316 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
318 %0 = load <8 x i32> addrspace(1)* %in
319 store <8 x i32> %0, <8 x i32> addrspace(1)* %out
323 ; FUNC-LABEL: {{^}}load_v16i32:
324 ; R600-CHECK: VTX_READ_128
325 ; R600-CHECK: VTX_READ_128
326 ; R600-CHECK: VTX_READ_128
327 ; R600-CHECK: VTX_READ_128
328 ; XXX: We should be using DWORDX4 instructions on SI.
329 ; SI-CHECK: buffer_load_dword
330 ; SI-CHECK: buffer_load_dword
331 ; SI-CHECK: buffer_load_dword
332 ; SI-CHECK: buffer_load_dword
333 ; SI-CHECK: buffer_load_dword
334 ; SI-CHECK: buffer_load_dword
335 ; SI-CHECK: buffer_load_dword
336 ; SI-CHECK: buffer_load_dword
337 ; SI-CHECK: buffer_load_dword
338 ; SI-CHECK: buffer_load_dword
339 ; SI-CHECK: buffer_load_dword
340 ; SI-CHECK: buffer_load_dword
341 ; SI-CHECK: buffer_load_dword
342 ; SI-CHECK: buffer_load_dword
343 ; SI-CHECK: buffer_load_dword
344 ; SI-CHECK: buffer_load_dword
345 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
347 %0 = load <16 x i32> addrspace(1)* %in
348 store <16 x i32> %0, <16 x i32> addrspace(1)* %out
352 ;===------------------------------------------------------------------------===;
353 ; CONSTANT ADDRESS SPACE
354 ;===------------------------------------------------------------------------===;
356 ; Load a sign-extended i8 value
357 ; FUNC-LABEL: {{^}}load_const_i8_sext:
358 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
359 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
361 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
363 ; SI-CHECK: buffer_load_sbyte v{{[0-9]+}},
364 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
366 %0 = load i8 addrspace(2)* %in
367 %1 = sext i8 %0 to i32
368 store i32 %1, i32 addrspace(1)* %out
372 ; Load an aligned i8 value
373 ; FUNC-LABEL: {{^}}load_const_i8_aligned:
374 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
375 ; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
376 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
378 %0 = load i8 addrspace(2)* %in
379 %1 = zext i8 %0 to i32
380 store i32 %1, i32 addrspace(1)* %out
384 ; Load an un-aligned i8 value
385 ; FUNC-LABEL: {{^}}load_const_i8_unaligned:
386 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
387 ; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
388 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
390 %0 = getelementptr i8 addrspace(2)* %in, i32 1
391 %1 = load i8 addrspace(2)* %0
392 %2 = zext i8 %1 to i32
393 store i32 %2, i32 addrspace(1)* %out
397 ; Load a sign-extended i16 value
398 ; FUNC-LABEL: {{^}}load_const_i16_sext:
399 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
400 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
402 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
404 ; SI-CHECK: buffer_load_sshort
405 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
407 %0 = load i16 addrspace(2)* %in
408 %1 = sext i16 %0 to i32
409 store i32 %1, i32 addrspace(1)* %out
413 ; Load an aligned i16 value
414 ; FUNC-LABEL: {{^}}load_const_i16_aligned:
415 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
416 ; SI-CHECK: buffer_load_ushort
417 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
419 %0 = load i16 addrspace(2)* %in
420 %1 = zext i16 %0 to i32
421 store i32 %1, i32 addrspace(1)* %out
425 ; Load an un-aligned i16 value
426 ; FUNC-LABEL: {{^}}load_const_i16_unaligned:
427 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
428 ; SI-CHECK: buffer_load_ushort
429 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
431 %0 = getelementptr i16 addrspace(2)* %in, i32 1
432 %1 = load i16 addrspace(2)* %0
433 %2 = zext i16 %1 to i32
434 store i32 %2, i32 addrspace(1)* %out
438 ; Load an i32 value from the constant address space.
439 ; FUNC-LABEL: {{^}}load_const_addrspace_i32:
440 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
442 ; SI-CHECK: s_load_dword s{{[0-9]+}}
443 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
445 %0 = load i32 addrspace(2)* %in
446 store i32 %0, i32 addrspace(1)* %out
450 ; Load a f32 value from the constant address space.
451 ; FUNC-LABEL: {{^}}load_const_addrspace_f32:
452 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
454 ; SI-CHECK: s_load_dword s{{[0-9]+}}
455 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
456 %1 = load float addrspace(2)* %in
457 store float %1, float addrspace(1)* %out
461 ;===------------------------------------------------------------------------===;
462 ; LOCAL ADDRESS SPACE
463 ;===------------------------------------------------------------------------===;
465 ; Load an i8 value from the local address space.
466 ; FUNC-LABEL: {{^}}load_i8_local:
467 ; R600-CHECK: LDS_UBYTE_READ_RET
468 ; SI-CHECK-NOT: s_wqm_b64
469 ; SI-CHECK: s_mov_b32 m0
470 ; SI-CHECK: ds_read_u8
471 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
472 %1 = load i8 addrspace(3)* %in
473 %2 = zext i8 %1 to i32
474 store i32 %2, i32 addrspace(1)* %out
478 ; FUNC-LABEL: {{^}}load_i8_sext_local:
479 ; R600-CHECK: LDS_UBYTE_READ_RET
481 ; SI-CHECK-NOT: s_wqm_b64
482 ; SI-CHECK: s_mov_b32 m0
483 ; SI-CHECK: ds_read_i8
484 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
486 %0 = load i8 addrspace(3)* %in
487 %1 = sext i8 %0 to i32
488 store i32 %1, i32 addrspace(1)* %out
492 ; FUNC-LABEL: {{^}}load_v2i8_local:
493 ; R600-CHECK: LDS_UBYTE_READ_RET
494 ; R600-CHECK: LDS_UBYTE_READ_RET
495 ; SI-CHECK-NOT: s_wqm_b64
496 ; SI-CHECK: s_mov_b32 m0
497 ; SI-CHECK: ds_read_u8
498 ; SI-CHECK: ds_read_u8
499 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
501 %0 = load <2 x i8> addrspace(3)* %in
502 %1 = zext <2 x i8> %0 to <2 x i32>
503 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
507 ; FUNC-LABEL: {{^}}load_v2i8_sext_local:
508 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
509 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
510 ; R600-CHECK-DAG: ASHR
511 ; R600-CHECK-DAG: ASHR
512 ; SI-CHECK-NOT: s_wqm_b64
513 ; SI-CHECK: s_mov_b32 m0
514 ; SI-CHECK: ds_read_i8
515 ; SI-CHECK: ds_read_i8
516 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
518 %0 = load <2 x i8> addrspace(3)* %in
519 %1 = sext <2 x i8> %0 to <2 x i32>
520 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
524 ; FUNC-LABEL: {{^}}load_v4i8_local:
525 ; R600-CHECK: LDS_UBYTE_READ_RET
526 ; R600-CHECK: LDS_UBYTE_READ_RET
527 ; R600-CHECK: LDS_UBYTE_READ_RET
528 ; R600-CHECK: LDS_UBYTE_READ_RET
529 ; SI-CHECK-NOT: s_wqm_b64
530 ; SI-CHECK: s_mov_b32 m0
531 ; SI-CHECK: ds_read_u8
532 ; SI-CHECK: ds_read_u8
533 ; SI-CHECK: ds_read_u8
534 ; SI-CHECK: ds_read_u8
535 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
537 %0 = load <4 x i8> addrspace(3)* %in
538 %1 = zext <4 x i8> %0 to <4 x i32>
539 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
543 ; FUNC-LABEL: {{^}}load_v4i8_sext_local:
544 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
545 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
546 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
547 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
548 ; R600-CHECK-DAG: ASHR
549 ; R600-CHECK-DAG: ASHR
550 ; R600-CHECK-DAG: ASHR
551 ; R600-CHECK-DAG: ASHR
552 ; SI-CHECK-NOT: s_wqm_b64
553 ; SI-CHECK: s_mov_b32 m0
554 ; SI-CHECK: ds_read_i8
555 ; SI-CHECK: ds_read_i8
556 ; SI-CHECK: ds_read_i8
557 ; SI-CHECK: ds_read_i8
558 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
560 %0 = load <4 x i8> addrspace(3)* %in
561 %1 = sext <4 x i8> %0 to <4 x i32>
562 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
566 ; Load an i16 value from the local address space.
567 ; FUNC-LABEL: {{^}}load_i16_local:
568 ; R600-CHECK: LDS_USHORT_READ_RET
569 ; SI-CHECK-NOT: s_wqm_b64
570 ; SI-CHECK: s_mov_b32 m0
571 ; SI-CHECK: ds_read_u16
572 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
574 %0 = load i16 addrspace(3)* %in
575 %1 = zext i16 %0 to i32
576 store i32 %1, i32 addrspace(1)* %out
580 ; FUNC-LABEL: {{^}}load_i16_sext_local:
581 ; R600-CHECK: LDS_USHORT_READ_RET
583 ; SI-CHECK-NOT: s_wqm_b64
584 ; SI-CHECK: s_mov_b32 m0
585 ; SI-CHECK: ds_read_i16
586 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
588 %0 = load i16 addrspace(3)* %in
589 %1 = sext i16 %0 to i32
590 store i32 %1, i32 addrspace(1)* %out
594 ; FUNC-LABEL: {{^}}load_v2i16_local:
595 ; R600-CHECK: LDS_USHORT_READ_RET
596 ; R600-CHECK: LDS_USHORT_READ_RET
597 ; SI-CHECK-NOT: s_wqm_b64
598 ; SI-CHECK: s_mov_b32 m0
599 ; SI-CHECK: ds_read_u16
600 ; SI-CHECK: ds_read_u16
601 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
603 %0 = load <2 x i16> addrspace(3)* %in
604 %1 = zext <2 x i16> %0 to <2 x i32>
605 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
609 ; FUNC-LABEL: {{^}}load_v2i16_sext_local:
610 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
611 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
612 ; R600-CHECK-DAG: ASHR
613 ; R600-CHECK-DAG: ASHR
614 ; SI-CHECK-NOT: s_wqm_b64
615 ; SI-CHECK: s_mov_b32 m0
616 ; SI-CHECK: ds_read_i16
617 ; SI-CHECK: ds_read_i16
618 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
620 %0 = load <2 x i16> addrspace(3)* %in
621 %1 = sext <2 x i16> %0 to <2 x i32>
622 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
626 ; FUNC-LABEL: {{^}}load_v4i16_local:
627 ; R600-CHECK: LDS_USHORT_READ_RET
628 ; R600-CHECK: LDS_USHORT_READ_RET
629 ; R600-CHECK: LDS_USHORT_READ_RET
630 ; R600-CHECK: LDS_USHORT_READ_RET
631 ; SI-CHECK-NOT: s_wqm_b64
632 ; SI-CHECK: s_mov_b32 m0
633 ; SI-CHECK: ds_read_u16
634 ; SI-CHECK: ds_read_u16
635 ; SI-CHECK: ds_read_u16
636 ; SI-CHECK: ds_read_u16
637 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
639 %0 = load <4 x i16> addrspace(3)* %in
640 %1 = zext <4 x i16> %0 to <4 x i32>
641 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
645 ; FUNC-LABEL: {{^}}load_v4i16_sext_local:
646 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
647 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
648 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
649 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
650 ; R600-CHECK-DAG: ASHR
651 ; R600-CHECK-DAG: ASHR
652 ; R600-CHECK-DAG: ASHR
653 ; R600-CHECK-DAG: ASHR
654 ; SI-CHECK-NOT: s_wqm_b64
655 ; SI-CHECK: s_mov_b32 m0
656 ; SI-CHECK: ds_read_i16
657 ; SI-CHECK: ds_read_i16
658 ; SI-CHECK: ds_read_i16
659 ; SI-CHECK: ds_read_i16
660 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
662 %0 = load <4 x i16> addrspace(3)* %in
663 %1 = sext <4 x i16> %0 to <4 x i32>
664 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
668 ; load an i32 value from the local address space.
669 ; FUNC-LABEL: {{^}}load_i32_local:
670 ; R600-CHECK: LDS_READ_RET
671 ; SI-CHECK-NOT: s_wqm_b64
672 ; SI-CHECK: s_mov_b32 m0
673 ; SI-CHECK: ds_read_b32
674 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
676 %0 = load i32 addrspace(3)* %in
677 store i32 %0, i32 addrspace(1)* %out
681 ; load a f32 value from the local address space.
682 ; FUNC-LABEL: {{^}}load_f32_local:
683 ; R600-CHECK: LDS_READ_RET
684 ; SI-CHECK: s_mov_b32 m0
685 ; SI-CHECK: ds_read_b32
686 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
688 %0 = load float addrspace(3)* %in
689 store float %0, float addrspace(1)* %out
693 ; load a v2f32 value from the local address space
694 ; FUNC-LABEL: {{^}}load_v2f32_local:
695 ; R600-CHECK: LDS_READ_RET
696 ; R600-CHECK: LDS_READ_RET
697 ; SI-CHECK: s_mov_b32 m0
698 ; SI-CHECK: ds_read_b64
699 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
701 %0 = load <2 x float> addrspace(3)* %in
702 store <2 x float> %0, <2 x float> addrspace(1)* %out
706 ; Test loading a i32 and v2i32 value from the same base pointer.
707 ; FUNC-LABEL: {{^}}load_i32_v2i32_local:
708 ; R600-CHECK: LDS_READ_RET
709 ; R600-CHECK: LDS_READ_RET
710 ; R600-CHECK: LDS_READ_RET
711 ; SI-CHECK-DAG: ds_read_b32
712 ; SI-CHECK-DAG: ds_read2_b32
713 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
714 %scalar = load i32 addrspace(3)* %in
715 %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
716 %vec_ptr = getelementptr <2 x i32> addrspace(3)* %tmp0, i32 2
717 %vec0 = load <2 x i32> addrspace(3)* %vec_ptr, align 4
718 %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
719 %vec = add <2 x i32> %vec0, %vec1
720 store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
725 @lds = addrspace(3) global [512 x i32] undef, align 4
727 ; On SI we need to make sure that the base offset is a register and not
729 ; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
730 ; SI-CHECK: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
731 ; SI-CHECK: ds_read_b32 v0, v[[ZERO]] offset:4
732 ; R600-CHECK: LDS_READ_RET
733 define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
735 %tmp0 = getelementptr [512 x i32] addrspace(3)* @lds, i32 0, i32 1
736 %tmp1 = load i32 addrspace(3)* %tmp0
737 %tmp2 = getelementptr i32 addrspace(1)* %out, i32 1
738 store i32 %tmp1, i32 addrspace(1)* %tmp2