1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5 ; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
6 ; SI: buffer_load_dword v[[LO:[0-9]+]],
7 ; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
8 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
9 define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
10 %a = load i32, i32 addrspace(1)* %in
11 %ext = zext i32 %a to i64
12 store i64 %ext, i64 addrspace(1)* %out
16 ; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
17 ; SI: buffer_load_dword [[LOAD:v[0-9]+]],
18 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
19 ; SI: buffer_store_dwordx2
20 define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
21 %a = load i32, i32 addrspace(1)* %in
22 %ext = sext i32 %a to i64
23 store i64 %ext, i64 addrspace(1)* %out
27 ; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
28 ; SI: buffer_load_dword
29 ; SI: buffer_store_dwordx2
31 define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
32 %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
33 %ext = zext <1 x i32> %load to <1 x i64>
34 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
38 ; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
39 ; SI: buffer_load_dword
41 ; SI: buffer_store_dwordx2
43 define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
44 %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
45 %ext = sext <1 x i32> %load to <1 x i64>
46 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
50 ; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
51 ; SI: buffer_load_dwordx2
52 ; SI: buffer_store_dwordx2
53 ; SI: buffer_store_dwordx2
55 define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
56 %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
57 %ext = zext <2 x i32> %load to <2 x i64>
58 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
62 ; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
63 ; SI: buffer_load_dwordx2
64 ; SI-DAG: v_ashrrev_i32
65 ; SI-DAG: v_ashrrev_i32
66 ; SI-DAG: buffer_store_dwordx2
67 ; SI-DAG: buffer_store_dwordx2
69 define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
70 %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
71 %ext = sext <2 x i32> %load to <2 x i64>
72 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
76 ; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
77 ; SI: buffer_load_dwordx4
78 ; SI: buffer_store_dwordx2
79 ; SI: buffer_store_dwordx2
80 ; SI: buffer_store_dwordx2
81 ; SI: buffer_store_dwordx2
83 define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
84 %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
85 %ext = zext <4 x i32> %load to <4 x i64>
86 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
90 ; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
91 ; SI: buffer_load_dwordx4
92 ; SI-DAG: v_ashrrev_i32
93 ; SI-DAG: v_ashrrev_i32
94 ; SI-DAG: v_ashrrev_i32
95 ; SI-DAG: v_ashrrev_i32
96 ; SI-DAG: buffer_store_dwordx2
97 ; SI-DAG: buffer_store_dwordx2
98 ; SI-DAG: buffer_store_dwordx2
99 ; SI-DAG: buffer_store_dwordx2
101 define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
102 %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
103 %ext = sext <4 x i32> %load to <4 x i64>
104 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
108 ; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
109 ; SI: buffer_load_dwordx4
110 ; SI: buffer_load_dwordx4
111 ; SI-DAG: buffer_store_dwordx2
112 ; SI-DAG: buffer_store_dwordx2
113 ; SI-DAG: buffer_store_dwordx2
114 ; SI-DAG: buffer_store_dwordx2
115 ; SI-DAG: buffer_store_dwordx2
116 ; SI-DAG: buffer_store_dwordx2
117 ; SI-DAG: buffer_store_dwordx2
118 ; SI-DAG: buffer_store_dwordx2
120 define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
121 %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
122 %ext = zext <8 x i32> %load to <8 x i64>
123 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
127 ; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
128 ; SI: buffer_load_dwordx4
129 ; SI: buffer_load_dwordx4
131 ; SI-DAG: v_ashrrev_i32
132 ; SI-DAG: v_ashrrev_i32
133 ; SI-DAG: v_ashrrev_i32
134 ; SI-DAG: v_ashrrev_i32
135 ; SI-DAG: v_ashrrev_i32
136 ; SI-DAG: v_ashrrev_i32
137 ; SI-DAG: v_ashrrev_i32
138 ; SI-DAG: v_ashrrev_i32
139 ; SI-DAG: buffer_store_dwordx2
140 ; SI-DAG: buffer_store_dwordx2
141 ; SI-DAG: buffer_store_dwordx2
142 ; SI-DAG: buffer_store_dwordx2
143 ; SI-DAG: buffer_store_dwordx2
144 ; SI-DAG: buffer_store_dwordx2
145 ; SI-DAG: buffer_store_dwordx2
146 ; SI-DAG: buffer_store_dwordx2
149 define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
150 %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
151 %ext = sext <8 x i32> %load to <8 x i64>
152 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
156 ; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
157 ; SI: buffer_load_dwordx4
158 ; SI: buffer_load_dwordx4
159 ; SI: buffer_load_dwordx4
160 ; SI: buffer_load_dwordx4
162 ; SI-DAG: v_ashrrev_i32
163 ; SI-DAG: v_ashrrev_i32
164 ; SI-DAG: v_ashrrev_i32
165 ; SI-DAG: v_ashrrev_i32
166 ; SI-DAG: buffer_store_dwordx2
167 ; SI-DAG: buffer_store_dwordx2
169 ; SI-DAG: v_ashrrev_i32
170 ; SI-DAG: v_ashrrev_i32
171 ; SI-DAG: v_ashrrev_i32
172 ; SI-DAG: v_ashrrev_i32
173 ; SI-DAG: buffer_store_dwordx2
174 ; SI-DAG: buffer_store_dwordx2
176 ; SI-DAG: v_ashrrev_i32
177 ; SI-DAG: v_ashrrev_i32
178 ; SI-DAG: v_ashrrev_i32
179 ; SI-DAG: v_ashrrev_i32
180 ; SI-DAG: buffer_store_dwordx2
181 ; SI-DAG: buffer_store_dwordx2
183 ; SI-DAG: v_ashrrev_i32
184 ; SI-DAG: v_ashrrev_i32
185 ; SI-DAG: v_ashrrev_i32
186 ; SI-DAG: v_ashrrev_i32
187 ; SI-DAG: buffer_store_dwordx2
188 ; SI-DAG: buffer_store_dwordx2
190 define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
191 %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
192 %ext = sext <16 x i32> %load to <16 x i64>
193 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
197 ; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
198 ; SI: buffer_load_dwordx4
199 ; SI: buffer_load_dwordx4
200 ; SI: buffer_load_dwordx4
201 ; SI: buffer_load_dwordx4
203 ; SI: buffer_store_dwordx2
204 ; SI: buffer_store_dwordx2
205 ; SI: buffer_store_dwordx2
206 ; SI: buffer_store_dwordx2
207 ; SI: buffer_store_dwordx2
208 ; SI: buffer_store_dwordx2
209 ; SI: buffer_store_dwordx2
210 ; SI: buffer_store_dwordx2
211 ; SI: buffer_store_dwordx2
212 ; SI: buffer_store_dwordx2
213 ; SI: buffer_store_dwordx2
214 ; SI: buffer_store_dwordx2
215 ; SI: buffer_store_dwordx2
216 ; SI: buffer_store_dwordx2
217 ; SI: buffer_store_dwordx2
218 ; SI: buffer_store_dwordx2
221 define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
222 %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
223 %ext = zext <16 x i32> %load to <16 x i64>
224 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
228 ; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
229 ; SI: buffer_load_dwordx4
230 ; SI: buffer_load_dwordx4
231 ; SI: buffer_load_dwordx4
232 ; SI: buffer_load_dwordx4
233 ; SI: buffer_load_dwordx4
234 ; SI: buffer_load_dwordx4
235 ; SI: buffer_load_dwordx4
236 ; SI: buffer_load_dwordx4
239 ; SI-DAG: v_ashrrev_i32
240 ; SI-DAG: v_ashrrev_i32
241 ; SI-DAG: v_ashrrev_i32
242 ; SI-DAG: v_ashrrev_i32
243 ; SI-DAG: v_ashrrev_i32
244 ; SI-DAG: v_ashrrev_i32
245 ; SI-DAG: v_ashrrev_i32
246 ; SI-DAG: v_ashrrev_i32
247 ; SI-DAG: v_ashrrev_i32
248 ; SI-DAG: v_ashrrev_i32
249 ; SI-DAG: v_ashrrev_i32
250 ; SI-DAG: v_ashrrev_i32
251 ; SI-DAG: v_ashrrev_i32
252 ; SI-DAG: v_ashrrev_i32
253 ; SI-DAG: v_ashrrev_i32
254 ; SI-DAG: v_ashrrev_i32
255 ; SI-DAG: v_ashrrev_i32
256 ; SI-DAG: v_ashrrev_i32
257 ; SI-DAG: v_ashrrev_i32
258 ; SI-DAG: v_ashrrev_i32
259 ; SI-DAG: v_ashrrev_i32
260 ; SI-DAG: v_ashrrev_i32
261 ; SI-DAG: v_ashrrev_i32
262 ; SI-DAG: v_ashrrev_i32
263 ; SI-DAG: v_ashrrev_i32
264 ; SI-DAG: v_ashrrev_i32
265 ; SI-DAG: v_ashrrev_i32
266 ; SI-DAG: v_ashrrev_i32
267 ; SI-DAG: v_ashrrev_i32
268 ; SI-DAG: v_ashrrev_i32
269 ; SI-DAG: v_ashrrev_i32
270 ; SI-DAG: v_ashrrev_i32
272 ; SI-DAG: buffer_store_dwordx2
273 ; SI-DAG: buffer_store_dwordx2
274 ; SI-DAG: buffer_store_dwordx2
275 ; SI-DAG: buffer_store_dwordx2
276 ; SI-DAG: buffer_store_dwordx2
277 ; SI-DAG: buffer_store_dwordx2
278 ; SI-DAG: buffer_store_dwordx2
279 ; SI-DAG: buffer_store_dwordx2
281 ; SI-DAG: buffer_store_dwordx2
282 ; SI-DAG: buffer_store_dwordx2
283 ; SI-DAG: buffer_store_dwordx2
284 ; SI-DAG: buffer_store_dwordx2
285 ; SI-DAG: buffer_store_dwordx2
286 ; SI-DAG: buffer_store_dwordx2
287 ; SI-DAG: buffer_store_dwordx2
288 ; SI-DAG: buffer_store_dwordx2
290 ; SI-DAG: buffer_store_dwordx2
291 ; SI-DAG: buffer_store_dwordx2
292 ; SI-DAG: buffer_store_dwordx2
293 ; SI-DAG: buffer_store_dwordx2
294 ; SI-DAG: buffer_store_dwordx2
295 ; SI-DAG: buffer_store_dwordx2
296 ; SI-DAG: buffer_store_dwordx2
297 ; SI-DAG: buffer_store_dwordx2
299 ; SI-DAG: buffer_store_dwordx2
300 ; SI-DAG: buffer_store_dwordx2
301 ; SI-DAG: buffer_store_dwordx2
302 ; SI-DAG: buffer_store_dwordx2
303 ; SI-DAG: buffer_store_dwordx2
304 ; SI-DAG: buffer_store_dwordx2
305 ; SI-DAG: buffer_store_dwordx2
306 ; SI-DAG: buffer_store_dwordx2
309 define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
310 %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
311 %ext = sext <32 x i32> %load to <32 x i64>
312 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
316 ; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
317 ; SI: buffer_load_dwordx4
318 ; SI: buffer_load_dwordx4
319 ; SI: buffer_load_dwordx4
320 ; SI: buffer_load_dwordx4
321 ; SI: buffer_load_dwordx4
322 ; SI: buffer_load_dwordx4
323 ; SI: buffer_load_dwordx4
324 ; SI: buffer_load_dwordx4
326 ; SI-DAG: buffer_store_dwordx2
327 ; SI-DAG: buffer_store_dwordx2
328 ; SI-DAG: buffer_store_dwordx2
329 ; SI-DAG: buffer_store_dwordx2
330 ; SI-DAG: buffer_store_dwordx2
331 ; SI-DAG: buffer_store_dwordx2
332 ; SI-DAG: buffer_store_dwordx2
333 ; SI-DAG: buffer_store_dwordx2
335 ; SI-DAG: buffer_store_dwordx2
336 ; SI-DAG: buffer_store_dwordx2
337 ; SI-DAG: buffer_store_dwordx2
338 ; SI-DAG: buffer_store_dwordx2
339 ; SI-DAG: buffer_store_dwordx2
340 ; SI-DAG: buffer_store_dwordx2
341 ; SI-DAG: buffer_store_dwordx2
342 ; SI-DAG: buffer_store_dwordx2
344 ; SI-DAG: buffer_store_dwordx2
345 ; SI-DAG: buffer_store_dwordx2
346 ; SI-DAG: buffer_store_dwordx2
347 ; SI-DAG: buffer_store_dwordx2
348 ; SI-DAG: buffer_store_dwordx2
349 ; SI-DAG: buffer_store_dwordx2
350 ; SI-DAG: buffer_store_dwordx2
351 ; SI-DAG: buffer_store_dwordx2
353 ; SI-DAG: buffer_store_dwordx2
354 ; SI-DAG: buffer_store_dwordx2
355 ; SI-DAG: buffer_store_dwordx2
356 ; SI-DAG: buffer_store_dwordx2
357 ; SI-DAG: buffer_store_dwordx2
358 ; SI-DAG: buffer_store_dwordx2
359 ; SI-DAG: buffer_store_dwordx2
360 ; SI-DAG: buffer_store_dwordx2
363 define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
364 %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
365 %ext = zext <32 x i32> %load to <32 x i64>
366 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out