1 ; RUN: llc -march=r600 -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 ; FUNC-LABEL: {{^}}test_copy_v4i8:
4 ; SI: buffer_load_dword [[REG:v[0-9]+]]
5 ; SI: buffer_store_dword [[REG]]
7 define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
8 %val = load <4 x i8> addrspace(1)* %in, align 4
9 store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
13 ; FUNC-LABEL: {{^}}test_copy_v4i8_x2:
14 ; SI: buffer_load_dword [[REG:v[0-9]+]]
15 ; SI: buffer_store_dword [[REG]]
16 ; SI: buffer_store_dword [[REG]]
18 define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
19 %val = load <4 x i8> addrspace(1)* %in, align 4
20 store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
21 store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
25 ; FUNC-LABEL: {{^}}test_copy_v4i8_x3:
26 ; SI: buffer_load_dword [[REG:v[0-9]+]]
27 ; SI: buffer_store_dword [[REG]]
28 ; SI: buffer_store_dword [[REG]]
29 ; SI: buffer_store_dword [[REG]]
31 define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
32 %val = load <4 x i8> addrspace(1)* %in, align 4
33 store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
34 store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
35 store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
39 ; FUNC-LABEL: {{^}}test_copy_v4i8_x4:
40 ; SI: buffer_load_dword [[REG:v[0-9]+]]
41 ; SI: buffer_store_dword [[REG]]
42 ; SI: buffer_store_dword [[REG]]
43 ; SI: buffer_store_dword [[REG]]
44 ; SI: buffer_store_dword [[REG]]
46 define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
47 %val = load <4 x i8> addrspace(1)* %in, align 4
48 store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
49 store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
50 store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
51 store <4 x i8> %val, <4 x i8> addrspace(1)* %out3, align 4
55 ; FUNC-LABEL: {{^}}test_copy_v4i8_extra_use:
56 ; SI: buffer_load_ubyte
57 ; SI: buffer_load_ubyte
58 ; SI: buffer_load_ubyte
59 ; SI: buffer_load_ubyte
64 ; SI-DAG: buffer_store_byte
65 ; SI-DAG: buffer_store_byte
66 ; SI-DAG: buffer_store_byte
67 ; SI-DAG: buffer_store_byte
68 ; SI-DAG: buffer_store_byte
69 ; SI-DAG: buffer_store_byte
70 ; SI-DAG: buffer_store_byte
71 ; SI_DAG: buffer_store_byte
73 ; After scalarizing v4i8 loads is fixed.
74 ; XSI: buffer_load_dword
79 ; XSI: buffer_store_dword
80 ; XSI: buffer_store_dword
83 define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
84 %val = load <4 x i8> addrspace(1)* %in, align 4
85 %add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
86 store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
87 store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
91 ; FUNC-LABEL: {{^}}test_copy_v4i8_x2_extra_use:
92 ; SI: buffer_load_ubyte
93 ; SI: buffer_load_ubyte
94 ; SI: buffer_load_ubyte
95 ; SI: buffer_load_ubyte
100 ; SI-DAG: buffer_store_byte
101 ; SI-DAG: buffer_store_byte
102 ; SI-DAG: buffer_store_byte
103 ; SI-DAG: buffer_store_byte
104 ; SI-DAG: buffer_store_byte
105 ; SI-DAG: buffer_store_byte
106 ; SI-DAG: buffer_store_byte
107 ; SI_DAG: buffer_store_byte
108 ; SI-DAG: buffer_store_byte
109 ; SI-DAG: buffer_store_byte
110 ; SI-DAG: buffer_store_byte
111 ; SI_DAG: buffer_store_byte
113 ; XSI: buffer_load_dword
115 ; XSI: buffer_store_dword
117 ; XSI: buffer_store_dword
118 ; XSI-NEXT: buffer_store_dword
121 define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
122 %val = load <4 x i8> addrspace(1)* %in, align 4
123 %add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
124 store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
125 store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
126 store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
130 ; FUNC-LABEL: {{^}}test_copy_v3i8:
134 define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
135 %val = load <3 x i8> addrspace(1)* %in, align 4
136 store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
140 ; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
141 ; SI: buffer_load_ubyte
142 ; SI: buffer_load_ubyte
143 ; SI: buffer_load_ubyte
144 ; SI: buffer_load_ubyte
146 define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
147 %val = load volatile <4 x i8> addrspace(1)* %in, align 4
148 store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
152 ; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_store:
153 ; SI: buffer_load_ubyte
154 ; SI: buffer_load_ubyte
155 ; SI: buffer_load_ubyte
156 ; SI: buffer_load_ubyte
157 ; SI: buffer_store_byte
158 ; SI: buffer_store_byte
159 ; SI: buffer_store_byte
160 ; SI: buffer_store_byte
162 define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
163 %val = load <4 x i8> addrspace(1)* %in, align 4
164 store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4