1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4 ; http://llvm.org/bugs/show_bug.cgi?id=20982
7 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
9 ; FUNC-LABEL: @bfe_i32_arg_arg_arg
12 ; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
13 define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
14 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
15 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
19 ; FUNC-LABEL: @bfe_i32_arg_arg_imm
22 define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
23 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
24 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
28 ; FUNC-LABEL: @bfe_i32_arg_imm_arg
31 define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
32 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
33 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
37 ; FUNC-LABEL: @bfe_i32_imm_arg_arg
40 define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
41 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
42 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
46 ; FUNC-LABEL: @v_bfe_print_arg
47 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
48 define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
49 %load = load i32 addrspace(1)* %src0, align 4
50 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
51 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
55 ; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset
59 define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
60 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
61 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
65 ; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset
69 define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
70 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
71 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
75 ; FUNC-LABEL: @bfe_i32_test_6
76 ; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
77 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
79 define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
80 %x = load i32 addrspace(1)* %in, align 4
82 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
83 store i32 %bfe, i32 addrspace(1)* %out, align 4
87 ; FUNC-LABEL: @bfe_i32_test_7
90 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
91 ; SI: BUFFER_STORE_DWORD [[VREG]],
93 define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
94 %x = load i32 addrspace(1)* %in, align 4
96 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
97 store i32 %bfe, i32 addrspace(1)* %out, align 4
101 ; FIXME: The shifts should be 1 BFE
102 ; FUNC-LABEL: @bfe_i32_test_8
103 ; SI: BUFFER_LOAD_DWORD
104 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
106 define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
107 %x = load i32 addrspace(1)* %in, align 4
108 %shl = shl i32 %x, 31
109 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
110 store i32 %bfe, i32 addrspace(1)* %out, align 4
114 ; FUNC-LABEL: @bfe_i32_test_9
116 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
119 define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
120 %x = load i32 addrspace(1)* %in, align 4
121 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
122 store i32 %bfe, i32 addrspace(1)* %out, align 4
126 ; FUNC-LABEL: @bfe_i32_test_10
128 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
131 define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
132 %x = load i32 addrspace(1)* %in, align 4
133 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
134 store i32 %bfe, i32 addrspace(1)* %out, align 4
138 ; FUNC-LABEL: @bfe_i32_test_11
140 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
143 define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
144 %x = load i32 addrspace(1)* %in, align 4
145 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
146 store i32 %bfe, i32 addrspace(1)* %out, align 4
150 ; FUNC-LABEL: @bfe_i32_test_12
152 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
155 define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
156 %x = load i32 addrspace(1)* %in, align 4
157 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
158 store i32 %bfe, i32 addrspace(1)* %out, align 4
162 ; FUNC-LABEL: @bfe_i32_test_13
163 ; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
166 define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
167 %x = load i32 addrspace(1)* %in, align 4
168 %shl = ashr i32 %x, 31
169 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
170 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
173 ; FUNC-LABEL: @bfe_i32_test_14
177 define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
178 %x = load i32 addrspace(1)* %in, align 4
179 %shl = lshr i32 %x, 31
180 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
181 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
184 ; FUNC-LABEL: @bfe_i32_constant_fold_test_0
186 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
187 ; SI: BUFFER_STORE_DWORD [[VREG]],
190 define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
191 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
192 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
196 ; FUNC-LABEL: @bfe_i32_constant_fold_test_1
198 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
199 ; SI: BUFFER_STORE_DWORD [[VREG]],
202 define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
203 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
204 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
208 ; FUNC-LABEL: @bfe_i32_constant_fold_test_2
210 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
211 ; SI: BUFFER_STORE_DWORD [[VREG]],
214 define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
215 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
216 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
220 ; FUNC-LABEL: @bfe_i32_constant_fold_test_3
222 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
223 ; SI: BUFFER_STORE_DWORD [[VREG]],
226 define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
227 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
228 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
232 ; FUNC-LABEL: @bfe_i32_constant_fold_test_4
234 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
235 ; SI: BUFFER_STORE_DWORD [[VREG]],
238 define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
239 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
240 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
244 ; FUNC-LABEL: @bfe_i32_constant_fold_test_5
246 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
247 ; SI: BUFFER_STORE_DWORD [[VREG]],
250 define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
251 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
252 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
256 ; FUNC-LABEL: @bfe_i32_constant_fold_test_6
258 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80
259 ; SI: BUFFER_STORE_DWORD [[VREG]],
262 define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
263 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
264 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
268 ; FUNC-LABEL: @bfe_i32_constant_fold_test_7
270 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
271 ; SI: BUFFER_STORE_DWORD [[VREG]],
274 define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
275 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
276 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
280 ; FUNC-LABEL: @bfe_i32_constant_fold_test_8
282 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
283 ; SI: BUFFER_STORE_DWORD [[VREG]],
286 define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
287 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
288 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
292 ; FUNC-LABEL: @bfe_i32_constant_fold_test_9
294 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
295 ; SI: BUFFER_STORE_DWORD [[VREG]],
298 define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
299 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
300 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
304 ; FUNC-LABEL: @bfe_i32_constant_fold_test_10
306 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
307 ; SI: BUFFER_STORE_DWORD [[VREG]],
310 define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
311 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
312 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
316 ; FUNC-LABEL: @bfe_i32_constant_fold_test_11
318 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6
319 ; SI: BUFFER_STORE_DWORD [[VREG]],
322 define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
323 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
324 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
328 ; FUNC-LABEL: @bfe_i32_constant_fold_test_12
330 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
331 ; SI: BUFFER_STORE_DWORD [[VREG]],
334 define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
335 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
336 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
340 ; FUNC-LABEL: @bfe_i32_constant_fold_test_13
342 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
343 ; SI: BUFFER_STORE_DWORD [[VREG]],
346 define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
347 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
348 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
352 ; FUNC-LABEL: @bfe_i32_constant_fold_test_14
354 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
355 ; SI: BUFFER_STORE_DWORD [[VREG]],
358 define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
359 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
360 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
364 ; FUNC-LABEL: @bfe_i32_constant_fold_test_15
366 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
367 ; SI: BUFFER_STORE_DWORD [[VREG]],
370 define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
371 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
372 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
376 ; FUNC-LABEL: @bfe_i32_constant_fold_test_16
378 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
379 ; SI: BUFFER_STORE_DWORD [[VREG]],
382 define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
383 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
384 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
388 ; FUNC-LABEL: @bfe_i32_constant_fold_test_17
390 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
391 ; SI: BUFFER_STORE_DWORD [[VREG]],
394 define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
395 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
396 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
400 ; FUNC-LABEL: @bfe_i32_constant_fold_test_18
402 ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
403 ; SI: BUFFER_STORE_DWORD [[VREG]],
406 define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
407 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
408 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
412 ; XXX - This should really be a single BFE, but the sext_inreg of the
413 ; extended type i24 is never custom lowered.
414 ; FUNC-LABEL: @bfe_sext_in_reg_i24
415 ; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
416 ; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
417 ; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
418 ; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
421 ; XSI: BUFFER_STORE_DWORD [[BFE]],
422 define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
423 %x = load i32 addrspace(1)* %in, align 4
424 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
425 %shl = shl i32 %bfe, 8
426 %ashr = ashr i32 %shl, 8
427 store i32 %ashr, i32 addrspace(1)* %out, align 4