1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
3 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-NOHSA --check-prefix=FUNC %s
4 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefix=FUNC --check-prefix=CI-HSA --check-prefix=SI %s
5 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-NOHSA --check-prefix=FUNC %s
7 ;===------------------------------------------------------------------------===;
9 ;===------------------------------------------------------------------------===;
11 ; Load an i8 value from the global address space.
12 ; FUNC-LABEL: {{^}}load_i8:
13 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
15 ; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
16 ; CI-HSA: flat_load_ubyte
17 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
18 %1 = load i8, i8 addrspace(1)* %in
19 %2 = zext i8 %1 to i32
20 store i32 %2, i32 addrspace(1)* %out
24 ; FUNC-LABEL: {{^}}load_i8_sext:
25 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
26 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
28 ; SI-NOHSA: buffer_load_sbyte
29 ; CI-HSA: flat_load_sbyte
30 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
32 %0 = load i8, i8 addrspace(1)* %in
33 %1 = sext i8 %0 to i32
34 store i32 %1, i32 addrspace(1)* %out
38 ; FUNC-LABEL: {{^}}load_v2i8:
41 ; SI-NOHSA: buffer_load_ubyte
42 ; SI-NOHSA: buffer_load_ubyte
43 ; CI-HSA: flat_load_ubyte
44 ; CI-HSA: flat_load_ubyte
45 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
47 %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
48 %1 = zext <2 x i8> %0 to <2 x i32>
49 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
53 ; FUNC-LABEL: {{^}}load_v2i8_sext:
54 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
55 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
56 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
57 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
61 ; SI-NOHSA: buffer_load_sbyte
62 ; SI-NOHSA: buffer_load_sbyte
63 ; CI-HSA: flat_load_sbyte
64 ; CI-HSA: flat_load_sbyte
65 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
67 %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
68 %1 = sext <2 x i8> %0 to <2 x i32>
69 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
73 ; FUNC-LABEL: {{^}}load_v4i8:
78 ; SI-NOHSA: buffer_load_ubyte
79 ; SI-NOHSA: buffer_load_ubyte
80 ; SI-NOHSA: buffer_load_ubyte
81 ; SI-NOHSA: buffer_load_ubyte
82 ; CI-HSA: flat_load_ubyte
83 ; CI-HSA: flat_load_ubyte
84 ; CI-HSA: flat_load_ubyte
85 ; CI-HSA: flat_load_ubyte
86 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
88 %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
89 %1 = zext <4 x i8> %0 to <4 x i32>
90 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
94 ; FUNC-LABEL: {{^}}load_v4i8_sext:
95 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
96 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
97 ; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
98 ; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
99 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
100 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
101 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
102 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
107 ; SI-NOHSA: buffer_load_sbyte
108 ; SI-NOHSA: buffer_load_sbyte
109 ; SI-NOHSA: buffer_load_sbyte
110 ; SI-NOHSA: buffer_load_sbyte
111 ; CI-HSA: flat_load_sbyte
112 ; CI-HSA: flat_load_sbyte
113 ; CI-HSA: flat_load_sbyte
114 ; CI-HSA: flat_load_sbyte
115 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
117 %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
118 %1 = sext <4 x i8> %0 to <4 x i32>
119 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
123 ; Load an i16 value from the global address space.
124 ; FUNC-LABEL: {{^}}load_i16:
125 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
126 ; SI-NOHSA: buffer_load_ushort
127 ; CI-HSA: flat_load_ushort
128 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
130 %0 = load i16 , i16 addrspace(1)* %in
131 %1 = zext i16 %0 to i32
132 store i32 %1, i32 addrspace(1)* %out
136 ; FUNC-LABEL: {{^}}load_i16_sext:
137 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
138 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
140 ; SI-NOHSA: buffer_load_sshort
141 ; CI-HSA: flat_load_sshort
142 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
144 %0 = load i16, i16 addrspace(1)* %in
145 %1 = sext i16 %0 to i32
146 store i32 %1, i32 addrspace(1)* %out
150 ; FUNC-LABEL: {{^}}load_v2i16:
153 ; SI-NOHSA: buffer_load_ushort
154 ; SI-NOHSA: buffer_load_ushort
155 ; CI-HSA: flat_load_ushort
156 ; CI-HSA: flat_load_ushort
157 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
159 %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
160 %1 = zext <2 x i16> %0 to <2 x i32>
161 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
165 ; FUNC-LABEL: {{^}}load_v2i16_sext:
166 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
167 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
168 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
169 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
172 ; SI-NOHSA: buffer_load_sshort
173 ; SI-NOHSA: buffer_load_sshort
174 ; CI-HSA: flat_load_sshort
175 ; CI-HSA: flat_load_sshort
176 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
178 %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
179 %1 = sext <2 x i16> %0 to <2 x i32>
180 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
184 ; FUNC-LABEL: {{^}}load_v4i16:
189 ; SI-NOHSA: buffer_load_ushort
190 ; SI-NOHSA: buffer_load_ushort
191 ; SI-NOHSA: buffer_load_ushort
192 ; SI-NOHSA: buffer_load_ushort
193 ; CI-HSA: flat_load_ushort
194 ; CI-HSA: flat_load_ushort
195 ; CI-HSA: flat_load_ushort
196 ; CI-HSA: flat_load_ushort
197 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
199 %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
200 %1 = zext <4 x i16> %0 to <4 x i32>
201 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
205 ; FUNC-LABEL: {{^}}load_v4i16_sext:
206 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
207 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
208 ; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
209 ; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
210 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
211 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
212 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
213 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
218 ; SI-NOHSA: buffer_load_sshort
219 ; SI-NOHSA: buffer_load_sshort
220 ; SI-NOHSA: buffer_load_sshort
221 ; SI-NOHSA: buffer_load_sshort
222 ; CI-HSA: flat_load_sshort
223 ; CI-HSA: flat_load_sshort
224 ; CI-HSA: flat_load_sshort
225 ; CI-HSA: flat_load_sshort
226 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
228 %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
229 %1 = sext <4 x i16> %0 to <4 x i32>
230 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
234 ; load an i32 value from the global address space.
235 ; FUNC-LABEL: {{^}}load_i32:
236 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
238 ; SI-NOHSA: buffer_load_dword v{{[0-9]+}}
239 ; CI-HSA: flat_load_dword
240 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
242 %0 = load i32, i32 addrspace(1)* %in
243 store i32 %0, i32 addrspace(1)* %out
247 ; load a f32 value from the global address space.
248 ; FUNC-LABEL: {{^}}load_f32:
249 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
251 ; SI-NOHSA: buffer_load_dword v{{[0-9]+}}
252 ; CI-HSA: flat_load_dword
253 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
255 %0 = load float, float addrspace(1)* %in
256 store float %0, float addrspace(1)* %out
260 ; load a v2f32 value from the global address space
261 ; FUNC-LABEL: {{^}}load_v2f32:
264 ; SI-NOHSA: buffer_load_dwordx2
265 ; CI-HSA: flat_load_dwordx2
266 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
268 %0 = load <2 x float>, <2 x float> addrspace(1)* %in
269 store <2 x float> %0, <2 x float> addrspace(1)* %out
273 ; FUNC-LABEL: {{^}}load_i64:
275 ; SI-NOHSA: buffer_load_dwordx2
276 ; CI-HSA: flat_load_dwordx2
277 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
279 %0 = load i64, i64 addrspace(1)* %in
280 store i64 %0, i64 addrspace(1)* %out
284 ; FUNC-LABEL: {{^}}load_i64_sext:
287 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
289 ; SI-NOHSA: buffer_load_dword
290 ; CI-HSA: flat_load_dword
292 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
294 %0 = load i32, i32 addrspace(1)* %in
295 %1 = sext i32 %0 to i64
296 store i64 %1, i64 addrspace(1)* %out
300 ; FUNC-LABEL: {{^}}load_i64_zext:
303 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
305 %0 = load i32, i32 addrspace(1)* %in
306 %1 = zext i32 %0 to i64
307 store i64 %1, i64 addrspace(1)* %out
311 ; FUNC-LABEL: {{^}}load_v8i32:
315 ; SI-NOHSA: buffer_load_dwordx4
316 ; SI-NOHSA: buffer_load_dwordx4
317 ; CI-HSA: flat_load_dwordx4
318 ; CI-HSA: flat_load_dwordx4
319 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
321 %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
322 store <8 x i32> %0, <8 x i32> addrspace(1)* %out
326 ; FUNC-LABEL: {{^}}load_v16i32:
332 ; SI-NOHSA: buffer_load_dwordx4
333 ; SI-NOHSA: buffer_load_dwordx4
334 ; SI-NOHSA: buffer_load_dwordx4
335 ; SI-NOHSA: buffer_load_dwordx4
336 ; CI-HSA: flat_load_dwordx4
337 ; CI-HSA: flat_load_dwordx4
338 ; CI-HSA: flat_load_dwordx4
339 ; CI-HSA: flat_load_dwordx4
340 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
342 %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
343 store <16 x i32> %0, <16 x i32> addrspace(1)* %out
347 ;===------------------------------------------------------------------------===;
348 ; CONSTANT ADDRESS SPACE
349 ;===------------------------------------------------------------------------===;
351 ; Load a sign-extended i8 value
352 ; FUNC-LABEL: {{^}}load_const_i8_sext:
353 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
354 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
356 ; SI-NOHSA: buffer_load_sbyte v{{[0-9]+}},
357 ; CI-HSA: flat_load_sbyte v{{[0-9]+}},
358 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
360 %0 = load i8, i8 addrspace(2)* %in
361 %1 = sext i8 %0 to i32
362 store i32 %1, i32 addrspace(1)* %out
366 ; Load an aligned i8 value
367 ; FUNC-LABEL: {{^}}load_const_i8_aligned:
368 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
369 ; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
370 ; CI-HSA: flat_load_ubyte v{{[0-9]+}},
371 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
373 %0 = load i8, i8 addrspace(2)* %in
374 %1 = zext i8 %0 to i32
375 store i32 %1, i32 addrspace(1)* %out
379 ; Load an un-aligned i8 value
380 ; FUNC-LABEL: {{^}}load_const_i8_unaligned:
381 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
382 ; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
383 ; CI-HSA: flat_load_ubyte v{{[0-9]+}},
384 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
386 %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
387 %1 = load i8, i8 addrspace(2)* %0
388 %2 = zext i8 %1 to i32
389 store i32 %2, i32 addrspace(1)* %out
393 ; Load a sign-extended i16 value
394 ; FUNC-LABEL: {{^}}load_const_i16_sext:
395 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
396 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
398 ; SI-NOHSA: buffer_load_sshort
399 ; CI-HSA: flat_load_sshort
400 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
402 %0 = load i16, i16 addrspace(2)* %in
403 %1 = sext i16 %0 to i32
404 store i32 %1, i32 addrspace(1)* %out
408 ; Load an aligned i16 value
409 ; FUNC-LABEL: {{^}}load_const_i16_aligned:
410 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
411 ; SI-NOHSA: buffer_load_ushort
412 ; CI-HSA: flat_load_ushort
413 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
415 %0 = load i16, i16 addrspace(2)* %in
416 %1 = zext i16 %0 to i32
417 store i32 %1, i32 addrspace(1)* %out
421 ; Load an un-aligned i16 value
422 ; FUNC-LABEL: {{^}}load_const_i16_unaligned:
423 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
424 ; SI-NOHSA: buffer_load_ushort
425 ; CI-HSA: flat_load_ushort
426 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
428 %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
429 %1 = load i16, i16 addrspace(2)* %0
430 %2 = zext i16 %1 to i32
431 store i32 %2, i32 addrspace(1)* %out
435 ; Load an i32 value from the constant address space.
436 ; FUNC-LABEL: {{^}}load_const_addrspace_i32:
437 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
439 ; SI: s_load_dword s{{[0-9]+}}
440 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
442 %0 = load i32, i32 addrspace(2)* %in
443 store i32 %0, i32 addrspace(1)* %out
447 ; Load a f32 value from the constant address space.
448 ; FUNC-LABEL: {{^}}load_const_addrspace_f32:
449 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
451 ; SI: s_load_dword s{{[0-9]+}}
452 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
453 %1 = load float, float addrspace(2)* %in
454 store float %1, float addrspace(1)* %out
458 ;===------------------------------------------------------------------------===;
459 ; LOCAL ADDRESS SPACE
460 ;===------------------------------------------------------------------------===;
462 ; Load an i8 value from the local address space.
463 ; FUNC-LABEL: {{^}}load_i8_local:
464 ; R600: LDS_UBYTE_READ_RET
468 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
469 %1 = load i8, i8 addrspace(3)* %in
470 %2 = zext i8 %1 to i32
471 store i32 %2, i32 addrspace(1)* %out
475 ; FUNC-LABEL: {{^}}load_i8_sext_local:
476 ; R600: LDS_UBYTE_READ_RET
481 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
483 %0 = load i8, i8 addrspace(3)* %in
484 %1 = sext i8 %0 to i32
485 store i32 %1, i32 addrspace(1)* %out
489 ; FUNC-LABEL: {{^}}load_v2i8_local:
490 ; R600: LDS_UBYTE_READ_RET
491 ; R600: LDS_UBYTE_READ_RET
496 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
498 %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
499 %1 = zext <2 x i8> %0 to <2 x i32>
500 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
504 ; FUNC-LABEL: {{^}}load_v2i8_sext_local:
505 ; R600-DAG: LDS_UBYTE_READ_RET
506 ; R600-DAG: LDS_UBYTE_READ_RET
513 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
515 %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
516 %1 = sext <2 x i8> %0 to <2 x i32>
517 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
521 ; FUNC-LABEL: {{^}}load_v4i8_local:
522 ; R600: LDS_UBYTE_READ_RET
523 ; R600: LDS_UBYTE_READ_RET
524 ; R600: LDS_UBYTE_READ_RET
525 ; R600: LDS_UBYTE_READ_RET
532 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
534 %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
535 %1 = zext <4 x i8> %0 to <4 x i32>
536 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
540 ; FUNC-LABEL: {{^}}load_v4i8_sext_local:
541 ; R600-DAG: LDS_UBYTE_READ_RET
542 ; R600-DAG: LDS_UBYTE_READ_RET
543 ; R600-DAG: LDS_UBYTE_READ_RET
544 ; R600-DAG: LDS_UBYTE_READ_RET
555 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
557 %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
558 %1 = sext <4 x i8> %0 to <4 x i32>
559 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
563 ; Load an i16 value from the local address space.
564 ; FUNC-LABEL: {{^}}load_i16_local:
565 ; R600: LDS_USHORT_READ_RET
569 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
571 %0 = load i16 , i16 addrspace(3)* %in
572 %1 = zext i16 %0 to i32
573 store i32 %1, i32 addrspace(1)* %out
577 ; FUNC-LABEL: {{^}}load_i16_sext_local:
578 ; R600: LDS_USHORT_READ_RET
583 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
585 %0 = load i16, i16 addrspace(3)* %in
586 %1 = sext i16 %0 to i32
587 store i32 %1, i32 addrspace(1)* %out
591 ; FUNC-LABEL: {{^}}load_v2i16_local:
592 ; R600: LDS_USHORT_READ_RET
593 ; R600: LDS_USHORT_READ_RET
598 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
600 %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
601 %1 = zext <2 x i16> %0 to <2 x i32>
602 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
606 ; FUNC-LABEL: {{^}}load_v2i16_sext_local:
607 ; R600-DAG: LDS_USHORT_READ_RET
608 ; R600-DAG: LDS_USHORT_READ_RET
615 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
617 %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
618 %1 = sext <2 x i16> %0 to <2 x i32>
619 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
623 ; FUNC-LABEL: {{^}}load_v4i16_local:
624 ; R600: LDS_USHORT_READ_RET
625 ; R600: LDS_USHORT_READ_RET
626 ; R600: LDS_USHORT_READ_RET
627 ; R600: LDS_USHORT_READ_RET
634 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
636 %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
637 %1 = zext <4 x i16> %0 to <4 x i32>
638 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
642 ; FUNC-LABEL: {{^}}load_v4i16_sext_local:
643 ; R600-DAG: LDS_USHORT_READ_RET
644 ; R600-DAG: LDS_USHORT_READ_RET
645 ; R600-DAG: LDS_USHORT_READ_RET
646 ; R600-DAG: LDS_USHORT_READ_RET
657 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
659 %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
660 %1 = sext <4 x i16> %0 to <4 x i32>
661 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
665 ; load an i32 value from the local address space.
666 ; FUNC-LABEL: {{^}}load_i32_local:
671 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
673 %0 = load i32, i32 addrspace(3)* %in
674 store i32 %0, i32 addrspace(1)* %out
678 ; load a f32 value from the local address space.
679 ; FUNC-LABEL: {{^}}load_f32_local:
683 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
685 %0 = load float, float addrspace(3)* %in
686 store float %0, float addrspace(1)* %out
690 ; load a v2f32 value from the local address space
691 ; FUNC-LABEL: {{^}}load_v2f32_local:
696 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
698 %0 = load <2 x float>, <2 x float> addrspace(3)* %in
699 store <2 x float> %0, <2 x float> addrspace(1)* %out
703 ; Test loading a i32 and v2i32 value from the same base pointer.
704 ; FUNC-LABEL: {{^}}load_i32_v2i32_local:
708 ; SI-DAG: ds_read_b32
709 ; SI-DAG: ds_read2_b32
710 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
711 %scalar = load i32, i32 addrspace(3)* %in
712 %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
713 %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
714 %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
715 %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
716 %vec = add <2 x i32> %vec0, %vec1
717 store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
722 @lds = addrspace(3) global [512 x i32] undef, align 4
724 ; On SI we need to make sure that the base offset is a register and not
726 ; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
727 ; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
728 ; SI: ds_read_b32 v0, v[[ZERO]] offset:4
730 define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
732 %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
733 %tmp1 = load i32, i32 addrspace(3)* %tmp0
734 %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
735 store i32 %tmp1, i32 addrspace(1)* %tmp2