1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
3 ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
5 ;===------------------------------------------------------------------------===;
7 ;===------------------------------------------------------------------------===;
9 ; Load an i8 value from the global address space.
10 ; FUNC-LABEL: {{^}}load_i8:
11 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
13 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
14 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
15 %1 = load i8 addrspace(1)* %in
16 %2 = zext i8 %1 to i32
17 store i32 %2, i32 addrspace(1)* %out
21 ; FUNC-LABEL: {{^}}load_i8_sext:
22 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
23 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
25 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
27 ; SI-CHECK: BUFFER_LOAD_SBYTE
28 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
30 %0 = load i8 addrspace(1)* %in
31 %1 = sext i8 %0 to i32
32 store i32 %1, i32 addrspace(1)* %out
36 ; FUNC-LABEL: {{^}}load_v2i8:
37 ; R600-CHECK: VTX_READ_8
38 ; R600-CHECK: VTX_READ_8
39 ; SI-CHECK: BUFFER_LOAD_UBYTE
40 ; SI-CHECK: BUFFER_LOAD_UBYTE
41 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
43 %0 = load <2 x i8> addrspace(1)* %in
44 %1 = zext <2 x i8> %0 to <2 x i32>
45 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
49 ; FUNC-LABEL: {{^}}load_v2i8_sext:
50 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
51 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
52 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
54 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
56 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
58 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
60 ; SI-CHECK: BUFFER_LOAD_SBYTE
61 ; SI-CHECK: BUFFER_LOAD_SBYTE
62 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
64 %0 = load <2 x i8> addrspace(1)* %in
65 %1 = sext <2 x i8> %0 to <2 x i32>
66 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
70 ; FUNC-LABEL: {{^}}load_v4i8:
71 ; R600-CHECK: VTX_READ_8
72 ; R600-CHECK: VTX_READ_8
73 ; R600-CHECK: VTX_READ_8
74 ; R600-CHECK: VTX_READ_8
75 ; SI-CHECK: BUFFER_LOAD_UBYTE
76 ; SI-CHECK: BUFFER_LOAD_UBYTE
77 ; SI-CHECK: BUFFER_LOAD_UBYTE
78 ; SI-CHECK: BUFFER_LOAD_UBYTE
79 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
81 %0 = load <4 x i8> addrspace(1)* %in
82 %1 = zext <4 x i8> %0 to <4 x i32>
83 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
87 ; FUNC-LABEL: {{^}}load_v4i8_sext:
88 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
89 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
90 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
91 ; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
92 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
94 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
96 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
98 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
100 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
102 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
104 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
106 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
108 ; SI-CHECK: BUFFER_LOAD_SBYTE
109 ; SI-CHECK: BUFFER_LOAD_SBYTE
110 ; SI-CHECK: BUFFER_LOAD_SBYTE
111 ; SI-CHECK: BUFFER_LOAD_SBYTE
112 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
114 %0 = load <4 x i8> addrspace(1)* %in
115 %1 = sext <4 x i8> %0 to <4 x i32>
116 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
120 ; Load an i16 value from the global address space.
121 ; FUNC-LABEL: {{^}}load_i16:
122 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
123 ; SI-CHECK: BUFFER_LOAD_USHORT
124 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
126 %0 = load i16 addrspace(1)* %in
127 %1 = zext i16 %0 to i32
128 store i32 %1, i32 addrspace(1)* %out
132 ; FUNC-LABEL: {{^}}load_i16_sext:
133 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
134 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
136 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
138 ; SI-CHECK: BUFFER_LOAD_SSHORT
139 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
141 %0 = load i16 addrspace(1)* %in
142 %1 = sext i16 %0 to i32
143 store i32 %1, i32 addrspace(1)* %out
147 ; FUNC-LABEL: {{^}}load_v2i16:
148 ; R600-CHECK: VTX_READ_16
149 ; R600-CHECK: VTX_READ_16
150 ; SI-CHECK: BUFFER_LOAD_USHORT
151 ; SI-CHECK: BUFFER_LOAD_USHORT
152 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
154 %0 = load <2 x i16> addrspace(1)* %in
155 %1 = zext <2 x i16> %0 to <2 x i32>
156 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
160 ; FUNC-LABEL: {{^}}load_v2i16_sext:
161 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
162 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
163 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
165 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
167 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
169 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
171 ; SI-CHECK: BUFFER_LOAD_SSHORT
172 ; SI-CHECK: BUFFER_LOAD_SSHORT
173 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
175 %0 = load <2 x i16> addrspace(1)* %in
176 %1 = sext <2 x i16> %0 to <2 x i32>
177 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
181 ; FUNC-LABEL: {{^}}load_v4i16:
182 ; R600-CHECK: VTX_READ_16
183 ; R600-CHECK: VTX_READ_16
184 ; R600-CHECK: VTX_READ_16
185 ; R600-CHECK: VTX_READ_16
186 ; SI-CHECK: BUFFER_LOAD_USHORT
187 ; SI-CHECK: BUFFER_LOAD_USHORT
188 ; SI-CHECK: BUFFER_LOAD_USHORT
189 ; SI-CHECK: BUFFER_LOAD_USHORT
190 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
192 %0 = load <4 x i16> addrspace(1)* %in
193 %1 = zext <4 x i16> %0 to <4 x i32>
194 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
198 ; FUNC-LABEL: {{^}}load_v4i16_sext:
199 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
200 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
201 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
202 ; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
203 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
205 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
207 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
209 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
211 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
213 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
215 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
217 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
219 ; SI-CHECK: BUFFER_LOAD_SSHORT
220 ; SI-CHECK: BUFFER_LOAD_SSHORT
221 ; SI-CHECK: BUFFER_LOAD_SSHORT
222 ; SI-CHECK: BUFFER_LOAD_SSHORT
223 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
225 %0 = load <4 x i16> addrspace(1)* %in
226 %1 = sext <4 x i16> %0 to <4 x i32>
227 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
231 ; load an i32 value from the global address space.
232 ; FUNC-LABEL: {{^}}load_i32:
233 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
235 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
236 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
238 %0 = load i32 addrspace(1)* %in
239 store i32 %0, i32 addrspace(1)* %out
243 ; load a f32 value from the global address space.
244 ; FUNC-LABEL: {{^}}load_f32:
245 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
247 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
248 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
250 %0 = load float addrspace(1)* %in
251 store float %0, float addrspace(1)* %out
255 ; load a v2f32 value from the global address space
256 ; FUNC-LABEL: {{^}}load_v2f32:
257 ; R600-CHECK: MEM_RAT
258 ; R600-CHECK: VTX_READ_64
259 ; SI-CHECK: BUFFER_LOAD_DWORDX2
260 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
262 %0 = load <2 x float> addrspace(1)* %in
263 store <2 x float> %0, <2 x float> addrspace(1)* %out
267 ; FUNC-LABEL: {{^}}load_i64:
268 ; R600-CHECK: VTX_READ_64
269 ; SI-CHECK: BUFFER_LOAD_DWORDX2
270 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
272 %0 = load i64 addrspace(1)* %in
273 store i64 %0, i64 addrspace(1)* %out
277 ; FUNC-LABEL: {{^}}load_i64_sext:
278 ; R600-CHECK: MEM_RAT
279 ; R600-CHECK: MEM_RAT
280 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
282 ; SI-CHECK: BUFFER_LOAD_DWORD
284 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
286 %0 = load i32 addrspace(1)* %in
287 %1 = sext i32 %0 to i64
288 store i64 %1, i64 addrspace(1)* %out
292 ; FUNC-LABEL: {{^}}load_i64_zext:
293 ; R600-CHECK: MEM_RAT
294 ; R600-CHECK: MEM_RAT
295 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
297 %0 = load i32 addrspace(1)* %in
298 %1 = zext i32 %0 to i64
299 store i64 %1, i64 addrspace(1)* %out
303 ; FUNC-LABEL: {{^}}load_v8i32:
304 ; R600-CHECK: VTX_READ_128
305 ; R600-CHECK: VTX_READ_128
306 ; XXX: We should be using DWORDX4 instructions on SI.
307 ; SI-CHECK: BUFFER_LOAD_DWORD
308 ; SI-CHECK: BUFFER_LOAD_DWORD
309 ; SI-CHECK: BUFFER_LOAD_DWORD
310 ; SI-CHECK: BUFFER_LOAD_DWORD
311 ; SI-CHECK: BUFFER_LOAD_DWORD
312 ; SI-CHECK: BUFFER_LOAD_DWORD
313 ; SI-CHECK: BUFFER_LOAD_DWORD
314 ; SI-CHECK: BUFFER_LOAD_DWORD
315 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
317 %0 = load <8 x i32> addrspace(1)* %in
318 store <8 x i32> %0, <8 x i32> addrspace(1)* %out
322 ; FUNC-LABEL: {{^}}load_v16i32:
323 ; R600-CHECK: VTX_READ_128
324 ; R600-CHECK: VTX_READ_128
325 ; R600-CHECK: VTX_READ_128
326 ; R600-CHECK: VTX_READ_128
327 ; XXX: We should be using DWORDX4 instructions on SI.
328 ; SI-CHECK: BUFFER_LOAD_DWORD
329 ; SI-CHECK: BUFFER_LOAD_DWORD
330 ; SI-CHECK: BUFFER_LOAD_DWORD
331 ; SI-CHECK: BUFFER_LOAD_DWORD
332 ; SI-CHECK: BUFFER_LOAD_DWORD
333 ; SI-CHECK: BUFFER_LOAD_DWORD
334 ; SI-CHECK: BUFFER_LOAD_DWORD
335 ; SI-CHECK: BUFFER_LOAD_DWORD
336 ; SI-CHECK: BUFFER_LOAD_DWORD
337 ; SI-CHECK: BUFFER_LOAD_DWORD
338 ; SI-CHECK: BUFFER_LOAD_DWORD
339 ; SI-CHECK: BUFFER_LOAD_DWORD
340 ; SI-CHECK: BUFFER_LOAD_DWORD
341 ; SI-CHECK: BUFFER_LOAD_DWORD
342 ; SI-CHECK: BUFFER_LOAD_DWORD
343 ; SI-CHECK: BUFFER_LOAD_DWORD
344 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
346 %0 = load <16 x i32> addrspace(1)* %in
347 store <16 x i32> %0, <16 x i32> addrspace(1)* %out
351 ;===------------------------------------------------------------------------===;
352 ; CONSTANT ADDRESS SPACE
353 ;===------------------------------------------------------------------------===;
355 ; Load a sign-extended i8 value
356 ; FUNC-LABEL: {{^}}load_const_i8_sext:
357 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
358 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
360 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
362 ; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
363 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
365 %0 = load i8 addrspace(2)* %in
366 %1 = sext i8 %0 to i32
367 store i32 %1, i32 addrspace(1)* %out
371 ; Load an aligned i8 value
372 ; FUNC-LABEL: {{^}}load_const_i8_aligned:
373 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
374 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
375 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
377 %0 = load i8 addrspace(2)* %in
378 %1 = zext i8 %0 to i32
379 store i32 %1, i32 addrspace(1)* %out
383 ; Load an un-aligned i8 value
384 ; FUNC-LABEL: {{^}}load_const_i8_unaligned:
385 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
386 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
387 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
389 %0 = getelementptr i8 addrspace(2)* %in, i32 1
390 %1 = load i8 addrspace(2)* %0
391 %2 = zext i8 %1 to i32
392 store i32 %2, i32 addrspace(1)* %out
396 ; Load a sign-extended i16 value
397 ; FUNC-LABEL: {{^}}load_const_i16_sext:
398 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
399 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
401 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
403 ; SI-CHECK: BUFFER_LOAD_SSHORT
404 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
406 %0 = load i16 addrspace(2)* %in
407 %1 = sext i16 %0 to i32
408 store i32 %1, i32 addrspace(1)* %out
412 ; Load an aligned i16 value
413 ; FUNC-LABEL: {{^}}load_const_i16_aligned:
414 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
415 ; SI-CHECK: BUFFER_LOAD_USHORT
416 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
418 %0 = load i16 addrspace(2)* %in
419 %1 = zext i16 %0 to i32
420 store i32 %1, i32 addrspace(1)* %out
424 ; Load an un-aligned i16 value
425 ; FUNC-LABEL: {{^}}load_const_i16_unaligned:
426 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
427 ; SI-CHECK: BUFFER_LOAD_USHORT
428 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
430 %0 = getelementptr i16 addrspace(2)* %in, i32 1
431 %1 = load i16 addrspace(2)* %0
432 %2 = zext i16 %1 to i32
433 store i32 %2, i32 addrspace(1)* %out
437 ; Load an i32 value from the constant address space.
438 ; FUNC-LABEL: {{^}}load_const_addrspace_i32:
439 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
441 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
442 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
444 %0 = load i32 addrspace(2)* %in
445 store i32 %0, i32 addrspace(1)* %out
449 ; Load a f32 value from the constant address space.
450 ; FUNC-LABEL: {{^}}load_const_addrspace_f32:
451 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
453 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
454 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
455 %1 = load float addrspace(2)* %in
456 store float %1, float addrspace(1)* %out
460 ;===------------------------------------------------------------------------===;
461 ; LOCAL ADDRESS SPACE
462 ;===------------------------------------------------------------------------===;
464 ; Load an i8 value from the local address space.
465 ; FUNC-LABEL: {{^}}load_i8_local:
466 ; R600-CHECK: LDS_UBYTE_READ_RET
467 ; SI-CHECK-NOT: S_WQM_B64
468 ; SI-CHECK: S_MOV_B32 m0
469 ; SI-CHECK: DS_READ_U8
470 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
471 %1 = load i8 addrspace(3)* %in
472 %2 = zext i8 %1 to i32
473 store i32 %2, i32 addrspace(1)* %out
477 ; FUNC-LABEL: {{^}}load_i8_sext_local:
478 ; R600-CHECK: LDS_UBYTE_READ_RET
480 ; SI-CHECK-NOT: S_WQM_B64
481 ; SI-CHECK: S_MOV_B32 m0
482 ; SI-CHECK: DS_READ_I8
483 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
485 %0 = load i8 addrspace(3)* %in
486 %1 = sext i8 %0 to i32
487 store i32 %1, i32 addrspace(1)* %out
491 ; FUNC-LABEL: {{^}}load_v2i8_local:
492 ; R600-CHECK: LDS_UBYTE_READ_RET
493 ; R600-CHECK: LDS_UBYTE_READ_RET
494 ; SI-CHECK-NOT: S_WQM_B64
495 ; SI-CHECK: S_MOV_B32 m0
496 ; SI-CHECK: DS_READ_U8
497 ; SI-CHECK: DS_READ_U8
498 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
500 %0 = load <2 x i8> addrspace(3)* %in
501 %1 = zext <2 x i8> %0 to <2 x i32>
502 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
506 ; FUNC-LABEL: {{^}}load_v2i8_sext_local:
507 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
508 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
509 ; R600-CHECK-DAG: ASHR
510 ; R600-CHECK-DAG: ASHR
511 ; SI-CHECK-NOT: S_WQM_B64
512 ; SI-CHECK: S_MOV_B32 m0
513 ; SI-CHECK: DS_READ_I8
514 ; SI-CHECK: DS_READ_I8
515 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
517 %0 = load <2 x i8> addrspace(3)* %in
518 %1 = sext <2 x i8> %0 to <2 x i32>
519 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
523 ; FUNC-LABEL: {{^}}load_v4i8_local:
524 ; R600-CHECK: LDS_UBYTE_READ_RET
525 ; R600-CHECK: LDS_UBYTE_READ_RET
526 ; R600-CHECK: LDS_UBYTE_READ_RET
527 ; R600-CHECK: LDS_UBYTE_READ_RET
528 ; SI-CHECK-NOT: S_WQM_B64
529 ; SI-CHECK: S_MOV_B32 m0
530 ; SI-CHECK: DS_READ_U8
531 ; SI-CHECK: DS_READ_U8
532 ; SI-CHECK: DS_READ_U8
533 ; SI-CHECK: DS_READ_U8
534 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
536 %0 = load <4 x i8> addrspace(3)* %in
537 %1 = zext <4 x i8> %0 to <4 x i32>
538 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
542 ; FUNC-LABEL: {{^}}load_v4i8_sext_local:
543 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
544 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
545 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
546 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
547 ; R600-CHECK-DAG: ASHR
548 ; R600-CHECK-DAG: ASHR
549 ; R600-CHECK-DAG: ASHR
550 ; R600-CHECK-DAG: ASHR
551 ; SI-CHECK-NOT: S_WQM_B64
552 ; SI-CHECK: S_MOV_B32 m0
553 ; SI-CHECK: DS_READ_I8
554 ; SI-CHECK: DS_READ_I8
555 ; SI-CHECK: DS_READ_I8
556 ; SI-CHECK: DS_READ_I8
557 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
559 %0 = load <4 x i8> addrspace(3)* %in
560 %1 = sext <4 x i8> %0 to <4 x i32>
561 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
565 ; Load an i16 value from the local address space.
566 ; FUNC-LABEL: {{^}}load_i16_local:
567 ; R600-CHECK: LDS_USHORT_READ_RET
568 ; SI-CHECK-NOT: S_WQM_B64
569 ; SI-CHECK: S_MOV_B32 m0
570 ; SI-CHECK: DS_READ_U16
571 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
573 %0 = load i16 addrspace(3)* %in
574 %1 = zext i16 %0 to i32
575 store i32 %1, i32 addrspace(1)* %out
579 ; FUNC-LABEL: {{^}}load_i16_sext_local:
580 ; R600-CHECK: LDS_USHORT_READ_RET
582 ; SI-CHECK-NOT: S_WQM_B64
583 ; SI-CHECK: S_MOV_B32 m0
584 ; SI-CHECK: DS_READ_I16
585 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
587 %0 = load i16 addrspace(3)* %in
588 %1 = sext i16 %0 to i32
589 store i32 %1, i32 addrspace(1)* %out
593 ; FUNC-LABEL: {{^}}load_v2i16_local:
594 ; R600-CHECK: LDS_USHORT_READ_RET
595 ; R600-CHECK: LDS_USHORT_READ_RET
596 ; SI-CHECK-NOT: S_WQM_B64
597 ; SI-CHECK: S_MOV_B32 m0
598 ; SI-CHECK: DS_READ_U16
599 ; SI-CHECK: DS_READ_U16
600 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
602 %0 = load <2 x i16> addrspace(3)* %in
603 %1 = zext <2 x i16> %0 to <2 x i32>
604 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
608 ; FUNC-LABEL: {{^}}load_v2i16_sext_local:
609 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
610 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
611 ; R600-CHECK-DAG: ASHR
612 ; R600-CHECK-DAG: ASHR
613 ; SI-CHECK-NOT: S_WQM_B64
614 ; SI-CHECK: S_MOV_B32 m0
615 ; SI-CHECK: DS_READ_I16
616 ; SI-CHECK: DS_READ_I16
617 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
619 %0 = load <2 x i16> addrspace(3)* %in
620 %1 = sext <2 x i16> %0 to <2 x i32>
621 store <2 x i32> %1, <2 x i32> addrspace(1)* %out
625 ; FUNC-LABEL: {{^}}load_v4i16_local:
626 ; R600-CHECK: LDS_USHORT_READ_RET
627 ; R600-CHECK: LDS_USHORT_READ_RET
628 ; R600-CHECK: LDS_USHORT_READ_RET
629 ; R600-CHECK: LDS_USHORT_READ_RET
630 ; SI-CHECK-NOT: S_WQM_B64
631 ; SI-CHECK: S_MOV_B32 m0
632 ; SI-CHECK: DS_READ_U16
633 ; SI-CHECK: DS_READ_U16
634 ; SI-CHECK: DS_READ_U16
635 ; SI-CHECK: DS_READ_U16
636 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
638 %0 = load <4 x i16> addrspace(3)* %in
639 %1 = zext <4 x i16> %0 to <4 x i32>
640 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
644 ; FUNC-LABEL: {{^}}load_v4i16_sext_local:
645 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
646 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
647 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
648 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
649 ; R600-CHECK-DAG: ASHR
650 ; R600-CHECK-DAG: ASHR
651 ; R600-CHECK-DAG: ASHR
652 ; R600-CHECK-DAG: ASHR
653 ; SI-CHECK-NOT: S_WQM_B64
654 ; SI-CHECK: S_MOV_B32 m0
655 ; SI-CHECK: DS_READ_I16
656 ; SI-CHECK: DS_READ_I16
657 ; SI-CHECK: DS_READ_I16
658 ; SI-CHECK: DS_READ_I16
659 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
661 %0 = load <4 x i16> addrspace(3)* %in
662 %1 = sext <4 x i16> %0 to <4 x i32>
663 store <4 x i32> %1, <4 x i32> addrspace(1)* %out
667 ; load an i32 value from the local address space.
668 ; FUNC-LABEL: {{^}}load_i32_local:
669 ; R600-CHECK: LDS_READ_RET
670 ; SI-CHECK-NOT: S_WQM_B64
671 ; SI-CHECK: S_MOV_B32 m0
672 ; SI-CHECK: DS_READ_B32
673 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
675 %0 = load i32 addrspace(3)* %in
676 store i32 %0, i32 addrspace(1)* %out
680 ; load a f32 value from the local address space.
681 ; FUNC-LABEL: {{^}}load_f32_local:
682 ; R600-CHECK: LDS_READ_RET
683 ; SI-CHECK: S_MOV_B32 m0
684 ; SI-CHECK: DS_READ_B32
685 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
687 %0 = load float addrspace(3)* %in
688 store float %0, float addrspace(1)* %out
692 ; load a v2f32 value from the local address space
693 ; FUNC-LABEL: {{^}}load_v2f32_local:
694 ; R600-CHECK: LDS_READ_RET
695 ; R600-CHECK: LDS_READ_RET
696 ; SI-CHECK: S_MOV_B32 m0
697 ; SI-CHECK: DS_READ_B64
698 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
700 %0 = load <2 x float> addrspace(3)* %in
701 store <2 x float> %0, <2 x float> addrspace(1)* %out
705 ; Test loading a i32 and v2i32 value from the same base pointer.
706 ; FUNC-LABEL: {{^}}load_i32_v2i32_local:
707 ; R600-CHECK: LDS_READ_RET
708 ; R600-CHECK: LDS_READ_RET
709 ; R600-CHECK: LDS_READ_RET
710 ; SI-CHECK-DAG: DS_READ_B32
711 ; SI-CHECK-DAG: DS_READ2_B32
712 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
713 %scalar = load i32 addrspace(3)* %in
714 %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
715 %vec_ptr = getelementptr <2 x i32> addrspace(3)* %tmp0, i32 2
716 %vec0 = load <2 x i32> addrspace(3)* %vec_ptr, align 4
717 %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
718 %vec = add <2 x i32> %vec0, %vec1
719 store <2 x i32> %vec, <2 x i32> addrspace(1)* %out