1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4 ; half args should be promoted to float
6 ; GCN-LABEL: {{^}}load_f16_arg:
7 ; GCN: s_load_dword [[ARG:s[0-9]+]]
8 ; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
9 ; GCN: buffer_store_short [[CVT]]
10 define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
11 store half %arg, half addrspace(1)* %out
15 ; GCN-LABEL: {{^}}load_v2f16_arg:
16 ; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
17 ; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
18 ; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
19 ; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
21 define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
22 store <2 x half> %arg, <2 x half> addrspace(1)* %out
26 ; GCN-LABEL: {{^}}load_v3f16_arg:
27 ; GCN: buffer_load_ushort
28 ; GCN: buffer_load_ushort
29 ; GCN: buffer_load_ushort
30 ; GCN-NOT: buffer_load
31 ; GCN-DAG: buffer_store_dword
32 ; GCN-DAG: buffer_store_short
33 ; GCN-NOT: buffer_store
35 define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
36 store <3 x half> %arg, <3 x half> addrspace(1)* %out
40 ; GCN-LABEL: {{^}}load_v4f16_arg:
41 ; GCN: buffer_load_ushort
42 ; GCN: buffer_load_ushort
43 ; GCN: buffer_load_ushort
44 ; GCN: buffer_load_ushort
45 ; GCN: buffer_store_short
46 ; GCN: buffer_store_short
47 ; GCN: buffer_store_short
48 ; GCN: buffer_store_short
50 define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
51 store <4 x half> %arg, <4 x half> addrspace(1)* %out
55 ; GCN-LABEL: {{^}}load_v8f16_arg:
56 define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
57 store <8 x half> %arg, <8 x half> addrspace(1)* %out
61 ; GCN-LABEL: {{^}}extload_v2f16_arg:
62 define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
63 %fpext = fpext <2 x half> %in to <2 x float>
64 store <2 x float> %fpext, <2 x float> addrspace(1)* %out
68 ; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
69 define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
70 %ext = fpext half %arg to float
71 store float %ext, float addrspace(1)* %out
75 ; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
76 define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
77 %ext = fpext <2 x half> %arg to <2 x float>
78 store <2 x float> %ext, <2 x float> addrspace(1)* %out
82 ; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
83 ; GCN: buffer_load_ushort
84 ; GCN: buffer_load_ushort
85 ; GCN: buffer_load_ushort
86 ; GCN-NOT: buffer_load
87 ; GCN: v_cvt_f32_f16_e32
88 ; GCN: v_cvt_f32_f16_e32
89 ; GCN: v_cvt_f32_f16_e32
90 ; GCN-NOT: v_cvt_f32_f16
91 ; GCN-DAG: buffer_store_dword
92 ; GCN-DAG: buffer_store_dwordx2
94 define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
95 %ext = fpext <3 x half> %arg to <3 x float>
96 store <3 x float> %ext, <3 x float> addrspace(1)* %out
100 ; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
101 define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
102 %ext = fpext <4 x half> %arg to <4 x float>
103 store <4 x float> %ext, <4 x float> addrspace(1)* %out
107 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
108 define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
109 %ext = fpext <8 x half> %arg to <8 x float>
110 store <8 x float> %ext, <8 x float> addrspace(1)* %out
114 ; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
115 define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
116 %ext = fpext half %arg to double
117 store double %ext, double addrspace(1)* %out
120 ; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
121 define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
122 %ext = fpext <2 x half> %arg to <2 x double>
123 store <2 x double> %ext, <2 x double> addrspace(1)* %out
127 ; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
128 define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
129 %ext = fpext <3 x half> %arg to <3 x double>
130 store <3 x double> %ext, <3 x double> addrspace(1)* %out
134 ; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
135 define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
136 %ext = fpext <4 x half> %arg to <4 x double>
137 store <4 x double> %ext, <4 x double> addrspace(1)* %out
141 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
142 define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
143 %ext = fpext <8 x half> %arg to <8 x double>
144 store <8 x double> %ext, <8 x double> addrspace(1)* %out
148 ; GCN-LABEL: {{^}}global_load_store_f16:
149 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
150 ; GCN: buffer_store_short [[TMP]]
151 define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
152 %val = load half, half addrspace(1)* %in
153 store half %val, half addrspace(1)* %out
157 ; GCN-LABEL: {{^}}global_load_store_v2f16:
158 ; GCN: buffer_load_dword [[TMP:v[0-9]+]]
159 ; GCN: buffer_store_dword [[TMP]]
160 define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
161 %val = load <2 x half>, <2 x half> addrspace(1)* %in
162 store <2 x half> %val, <2 x half> addrspace(1)* %out
166 ; GCN-LABEL: {{^}}global_load_store_v4f16:
167 ; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
168 ; GCN: buffer_store_dwordx2 [[TMP]]
169 define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
170 %val = load <4 x half>, <4 x half> addrspace(1)* %in
171 store <4 x half> %val, <4 x half> addrspace(1)* %out
175 ; GCN-LABEL: {{^}}global_load_store_v8f16:
176 ; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
177 ; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
179 define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
180 %val = load <8 x half>, <8 x half> addrspace(1)* %in
181 store <8 x half> %val, <8 x half> addrspace(1)* %out
185 ; GCN-LABEL: {{^}}global_extload_f16_to_f32:
186 ; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
187 ; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
188 ; GCN: buffer_store_dword [[CVT]]
189 define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
190 %val = load half, half addrspace(1)* %in
191 %cvt = fpext half %val to float
192 store float %cvt, float addrspace(1)* %out
196 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
197 define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
198 %val = load <2 x half>, <2 x half> addrspace(1)* %in
199 %cvt = fpext <2 x half> %val to <2 x float>
200 store <2 x float> %cvt, <2 x float> addrspace(1)* %out
204 ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
205 define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
206 %val = load <3 x half>, <3 x half> addrspace(1)* %in
207 %cvt = fpext <3 x half> %val to <3 x float>
208 store <3 x float> %cvt, <3 x float> addrspace(1)* %out
212 ; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
213 define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
214 %val = load <4 x half>, <4 x half> addrspace(1)* %in
215 %cvt = fpext <4 x half> %val to <4 x float>
216 store <4 x float> %cvt, <4 x float> addrspace(1)* %out
220 ; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
221 define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
222 %val = load <8 x half>, <8 x half> addrspace(1)* %in
223 %cvt = fpext <8 x half> %val to <8 x float>
224 store <8 x float> %cvt, <8 x float> addrspace(1)* %out
228 ; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
229 define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
230 %val = load <16 x half>, <16 x half> addrspace(1)* %in
231 %cvt = fpext <16 x half> %val to <16 x float>
232 store <16 x float> %cvt, <16 x float> addrspace(1)* %out
236 ; GCN-LABEL: {{^}}global_extload_f16_to_f64:
237 ; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
238 ; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
239 ; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
240 ; GCN: buffer_store_dwordx2 [[CVT1]]
241 define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
242 %val = load half, half addrspace(1)* %in
243 %cvt = fpext half %val to double
244 store double %cvt, double addrspace(1)* %out
248 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
249 define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
250 %val = load <2 x half>, <2 x half> addrspace(1)* %in
251 %cvt = fpext <2 x half> %val to <2 x double>
252 store <2 x double> %cvt, <2 x double> addrspace(1)* %out
256 ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
257 define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
258 %val = load <3 x half>, <3 x half> addrspace(1)* %in
259 %cvt = fpext <3 x half> %val to <3 x double>
260 store <3 x double> %cvt, <3 x double> addrspace(1)* %out
264 ; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
265 define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
266 %val = load <4 x half>, <4 x half> addrspace(1)* %in
267 %cvt = fpext <4 x half> %val to <4 x double>
268 store <4 x double> %cvt, <4 x double> addrspace(1)* %out
272 ; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
273 define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
274 %val = load <8 x half>, <8 x half> addrspace(1)* %in
275 %cvt = fpext <8 x half> %val to <8 x double>
276 store <8 x double> %cvt, <8 x double> addrspace(1)* %out
280 ; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
281 define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
282 %val = load <16 x half>, <16 x half> addrspace(1)* %in
283 %cvt = fpext <16 x half> %val to <16 x double>
284 store <16 x double> %cvt, <16 x double> addrspace(1)* %out
288 ; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
289 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
290 ; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
291 ; GCN: buffer_store_short [[CVT]]
292 define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
293 %val = load float, float addrspace(1)* %in
294 %cvt = fptrunc float %val to half
295 store half %cvt, half addrspace(1)* %out
299 ; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
300 ; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
301 ; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
302 ; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
303 ; GCN-DAG: buffer_store_short [[CVT0]]
304 ; GCN-DAG: buffer_store_short [[CVT1]]
306 define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
307 %val = load <2 x float>, <2 x float> addrspace(1)* %in
308 %cvt = fptrunc <2 x float> %val to <2 x half>
309 store <2 x half> %cvt, <2 x half> addrspace(1)* %out
313 ; FIXME: Shouldn't do 4th conversion
314 ; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
315 ; GCN: buffer_load_dwordx4
316 ; GCN: v_cvt_f16_f32_e32
317 ; GCN: v_cvt_f16_f32_e32
318 ; GCN: v_cvt_f16_f32_e32
319 ; GCN: v_cvt_f16_f32_e32
320 ; GCN: buffer_store_short
321 ; GCN: buffer_store_dword
323 define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
324 %val = load <3 x float>, <3 x float> addrspace(1)* %in
325 %cvt = fptrunc <3 x float> %val to <3 x half>
326 store <3 x half> %cvt, <3 x half> addrspace(1)* %out
330 ; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
331 ; GCN: buffer_load_dwordx4
332 ; GCN: v_cvt_f16_f32_e32
333 ; GCN: v_cvt_f16_f32_e32
334 ; GCN: v_cvt_f16_f32_e32
335 ; GCN: v_cvt_f16_f32_e32
336 ; GCN: buffer_store_short
337 ; GCN: buffer_store_short
338 ; GCN: buffer_store_short
339 ; GCN: buffer_store_short
341 define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
342 %val = load <4 x float>, <4 x float> addrspace(1)* %in
343 %cvt = fptrunc <4 x float> %val to <4 x half>
344 store <4 x half> %cvt, <4 x half> addrspace(1)* %out
348 ; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
349 ; GCN: buffer_load_dword
350 ; GCN: buffer_load_dword
351 ; GCN: buffer_load_dword
352 ; GCN: buffer_load_dword
353 ; GCN: buffer_load_dword
354 ; GCN: buffer_load_dword
355 ; GCN: buffer_load_dword
356 ; GCN: buffer_load_dword
357 ; GCN: v_cvt_f16_f32_e32
358 ; GCN: v_cvt_f16_f32_e32
359 ; GCN: v_cvt_f16_f32_e32
360 ; GCN: v_cvt_f16_f32_e32
361 ; GCN: v_cvt_f16_f32_e32
362 ; GCN: v_cvt_f16_f32_e32
363 ; GCN: v_cvt_f16_f32_e32
364 ; GCN: v_cvt_f16_f32_e32
365 ; GCN: buffer_store_short
366 ; GCN: buffer_store_short
367 ; GCN: buffer_store_short
368 ; GCN: buffer_store_short
369 ; GCN: buffer_store_short
370 ; GCN: buffer_store_short
371 ; GCN: buffer_store_short
372 ; GCN: buffer_store_short
374 define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
375 %val = load <8 x float>, <8 x float> addrspace(1)* %in
376 %cvt = fptrunc <8 x float> %val to <8 x half>
377 store <8 x half> %cvt, <8 x half> addrspace(1)* %out
381 ; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
382 ; GCN: buffer_load_dword
383 ; GCN: buffer_load_dword
384 ; GCN: buffer_load_dword
385 ; GCN: buffer_load_dword
386 ; GCN: buffer_load_dword
387 ; GCN: buffer_load_dword
388 ; GCN: buffer_load_dword
389 ; GCN: buffer_load_dword
390 ; GCN: buffer_load_dword
391 ; GCN: buffer_load_dword
392 ; GCN: buffer_load_dword
393 ; GCN: buffer_load_dword
394 ; GCN: buffer_load_dword
395 ; GCN: buffer_load_dword
396 ; GCN: buffer_load_dword
397 ; GCN: buffer_load_dword
398 ; GCN: v_cvt_f16_f32_e32
399 ; GCN: v_cvt_f16_f32_e32
400 ; GCN: v_cvt_f16_f32_e32
401 ; GCN: v_cvt_f16_f32_e32
402 ; GCN: v_cvt_f16_f32_e32
403 ; GCN: v_cvt_f16_f32_e32
404 ; GCN: v_cvt_f16_f32_e32
405 ; GCN: v_cvt_f16_f32_e32
406 ; GCN: v_cvt_f16_f32_e32
407 ; GCN: v_cvt_f16_f32_e32
408 ; GCN: v_cvt_f16_f32_e32
409 ; GCN: v_cvt_f16_f32_e32
410 ; GCN: v_cvt_f16_f32_e32
411 ; GCN: v_cvt_f16_f32_e32
412 ; GCN: v_cvt_f16_f32_e32
413 ; GCN: v_cvt_f16_f32_e32
414 ; GCN: buffer_store_short
415 ; GCN: buffer_store_short
416 ; GCN: buffer_store_short
417 ; GCN: buffer_store_short
418 ; GCN: buffer_store_short
419 ; GCN: buffer_store_short
420 ; GCN: buffer_store_short
421 ; GCN: buffer_store_short
422 ; GCN: buffer_store_short
423 ; GCN: buffer_store_short
424 ; GCN: buffer_store_short
425 ; GCN: buffer_store_short
426 ; GCN: buffer_store_short
427 ; GCN: buffer_store_short
428 ; GCN: buffer_store_short
429 ; GCN: buffer_store_short
431 define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
432 %val = load <16 x float>, <16 x float> addrspace(1)* %in
433 %cvt = fptrunc <16 x float> %val to <16 x half>
434 store <16 x half> %cvt, <16 x half> addrspace(1)* %out
438 ; FIXME: Unsafe math should fold conversions away
439 ; GCN-LABEL: {{^}}fadd_f16:
440 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
441 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
442 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
443 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
446 define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
447 %add = fadd half %a, %b
448 store half %add, half addrspace(1)* %out, align 4
452 ; GCN-LABEL: {{^}}fadd_v2f16:
456 define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
457 %add = fadd <2 x half> %a, %b
458 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
462 ; GCN-LABEL: {{^}}fadd_v4f16:
468 define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
469 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
470 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
471 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
472 %result = fadd <4 x half> %a, %b
473 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
477 ; GCN-LABEL: {{^}}fadd_v8f16:
487 define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
488 %add = fadd <8 x half> %a, %b
489 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
493 ; GCN-LABEL: {{^}}fsub_f16:
494 ; GCN: v_subrev_f32_e32
496 define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
497 %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1
498 %a = load half, half addrspace(1)* %in
499 %b = load half, half addrspace(1)* %b_ptr
500 %sub = fsub half %a, %b
501 store half %sub, half addrspace(1)* %out
505 ; GCN-LABEL: {{^}}test_bitcast_from_half:
506 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
507 ; GCN: buffer_store_short [[TMP]]
508 define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
509 %val = load half, half addrspace(1)* %in
510 %val_int = bitcast half %val to i16
511 store i16 %val_int, i16 addrspace(1)* %out
515 ; GCN-LABEL: {{^}}test_bitcast_to_half:
516 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
517 ; GCN: buffer_store_short [[TMP]]
518 define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
519 %val = load i16, i16 addrspace(1)* %in
520 %val_fp = bitcast i16 %val to half
521 store half %val_fp, half addrspace(1)* %out
525 attributes #0 = { nounwind }