1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4 declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
6 ; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg:
9 define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
10 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
11 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
15 ; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm:
18 define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
19 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
20 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
24 ; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg:
27 define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
28 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
29 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
33 ; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg:
36 define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
37 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
38 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
42 ; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
46 define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
47 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
48 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
52 ; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
56 define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
57 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
58 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
62 ; FUNC-LABEL: {{^}}bfe_u32_zextload_i8:
63 ; SI: buffer_load_ubyte
66 define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
67 %load = load i8 addrspace(1)* %in
68 %ext = zext i8 %load to i32
69 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
70 store i32 %bfe, i32 addrspace(1)* %out, align 4
74 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
75 ; SI: buffer_load_dword
77 ; SI-NEXT: v_and_b32_e32
80 define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
81 %load = load i32 addrspace(1)* %in, align 4
82 %add = add i32 %load, 1
83 %ext = and i32 %add, 255
84 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
85 store i32 %bfe, i32 addrspace(1)* %out, align 4
89 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
90 ; SI: buffer_load_dword
92 ; SI-NEXT: v_and_b32_e32
95 define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
96 %load = load i32 addrspace(1)* %in, align 4
97 %add = add i32 %load, 1
98 %ext = and i32 %add, 65535
99 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
100 store i32 %bfe, i32 addrspace(1)* %out, align 4
104 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
105 ; SI: buffer_load_dword
109 define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
110 %load = load i32 addrspace(1)* %in, align 4
111 %add = add i32 %load, 1
112 %ext = and i32 %add, 255
113 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
114 store i32 %bfe, i32 addrspace(1)* %out, align 4
118 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
119 ; SI: buffer_load_dword
121 ; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
124 define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
125 %load = load i32 addrspace(1)* %in, align 4
126 %add = add i32 %load, 1
127 %ext = and i32 %add, 255
128 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
129 store i32 %bfe, i32 addrspace(1)* %out, align 4
133 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
134 ; SI: buffer_load_dword
136 ; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
139 define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
140 %load = load i32 addrspace(1)* %in, align 4
141 %add = add i32 %load, 1
142 %ext = and i32 %add, 255
143 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
144 store i32 %bfe, i32 addrspace(1)* %out, align 4
148 ; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
149 ; SI: buffer_load_dword
153 define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
154 %load = load i32 addrspace(1)* %in, align 4
155 %add = add i32 %load, 1
156 %ext = and i32 %add, 65535
157 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
158 store i32 %bfe, i32 addrspace(1)* %out, align 4
162 ; FUNC-LABEL: {{^}}bfe_u32_test_1:
163 ; SI: buffer_load_dword
164 ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
166 ; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
167 define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
168 %x = load i32 addrspace(1)* %in, align 4
169 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
170 store i32 %bfe, i32 addrspace(1)* %out, align 4
174 define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
175 %x = load i32 addrspace(1)* %in, align 4
176 %shl = shl i32 %x, 31
177 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
178 store i32 %bfe, i32 addrspace(1)* %out, align 4
182 define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
183 %x = load i32 addrspace(1)* %in, align 4
184 %shl = shl i32 %x, 31
185 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
186 store i32 %bfe, i32 addrspace(1)* %out, align 4
190 ; FUNC-LABEL: {{^}}bfe_u32_test_4:
193 ; SI-NOT: {{[^@]}}bfe
194 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
195 ; SI: buffer_store_dword [[VREG]],
197 define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
198 %x = load i32 addrspace(1)* %in, align 4
199 %shl = shl i32 %x, 31
200 %shr = lshr i32 %shl, 31
201 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
202 store i32 %bfe, i32 addrspace(1)* %out, align 4
206 ; FUNC-LABEL: {{^}}bfe_u32_test_5:
207 ; SI: buffer_load_dword
210 ; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
212 define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
213 %x = load i32 addrspace(1)* %in, align 4
214 %shl = shl i32 %x, 31
215 %shr = ashr i32 %shl, 31
216 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
217 store i32 %bfe, i32 addrspace(1)* %out, align 4
221 ; FUNC-LABEL: {{^}}bfe_u32_test_6:
222 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
223 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
225 define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
226 %x = load i32 addrspace(1)* %in, align 4
227 %shl = shl i32 %x, 31
228 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
229 store i32 %bfe, i32 addrspace(1)* %out, align 4
233 ; FUNC-LABEL: {{^}}bfe_u32_test_7:
234 ; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
235 ; SI-NOT: {{[^@]}}bfe
237 define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
238 %x = load i32 addrspace(1)* %in, align 4
239 %shl = shl i32 %x, 31
240 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
241 store i32 %bfe, i32 addrspace(1)* %out, align 4
245 ; FUNC-LABEL: {{^}}bfe_u32_test_8:
246 ; SI-NOT: {{[^@]}}bfe
247 ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
248 ; SI-NOT: {{[^@]}}bfe
250 define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
251 %x = load i32 addrspace(1)* %in, align 4
252 %shl = shl i32 %x, 31
253 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
254 store i32 %bfe, i32 addrspace(1)* %out, align 4
258 ; FUNC-LABEL: {{^}}bfe_u32_test_9:
259 ; SI-NOT: {{[^@]}}bfe
260 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
261 ; SI-NOT: {{[^@]}}bfe
263 define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
264 %x = load i32 addrspace(1)* %in, align 4
265 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
266 store i32 %bfe, i32 addrspace(1)* %out, align 4
270 ; FUNC-LABEL: {{^}}bfe_u32_test_10:
271 ; SI-NOT: {{[^@]}}bfe
272 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
273 ; SI-NOT: {{[^@]}}bfe
275 define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
276 %x = load i32 addrspace(1)* %in, align 4
277 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
278 store i32 %bfe, i32 addrspace(1)* %out, align 4
282 ; FUNC-LABEL: {{^}}bfe_u32_test_11:
283 ; SI-NOT: {{[^@]}}bfe
284 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
285 ; SI-NOT: {{[^@]}}bfe
287 define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
288 %x = load i32 addrspace(1)* %in, align 4
289 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
290 store i32 %bfe, i32 addrspace(1)* %out, align 4
294 ; FUNC-LABEL: {{^}}bfe_u32_test_12:
295 ; SI-NOT: {{[^@]}}bfe
296 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
297 ; SI-NOT: {{[^@]}}bfe
299 define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
300 %x = load i32 addrspace(1)* %in, align 4
301 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
302 store i32 %bfe, i32 addrspace(1)* %out, align 4
306 ; FUNC-LABEL: {{^}}bfe_u32_test_13:
307 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
308 ; SI-NOT: {{[^@]}}bfe
310 define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
311 %x = load i32 addrspace(1)* %in, align 4
312 %shl = ashr i32 %x, 31
313 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
314 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
317 ; FUNC-LABEL: {{^}}bfe_u32_test_14:
319 ; SI-NOT: {{[^@]}}bfe
321 define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
322 %x = load i32 addrspace(1)* %in, align 4
323 %shl = lshr i32 %x, 31
324 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
325 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
328 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0:
329 ; SI-NOT: {{[^@]}}bfe
330 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
331 ; SI: buffer_store_dword [[VREG]],
334 define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
335 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
336 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
340 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1:
341 ; SI-NOT: {{[^@]}}bfe
342 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
343 ; SI: buffer_store_dword [[VREG]],
346 define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
347 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
348 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
352 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2:
353 ; SI-NOT: {{[^@]}}bfe
354 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
355 ; SI: buffer_store_dword [[VREG]],
358 define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
359 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
360 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
364 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3:
365 ; SI-NOT: {{[^@]}}bfe
366 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
367 ; SI: buffer_store_dword [[VREG]],
370 define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
371 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
372 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
376 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4:
377 ; SI-NOT: {{[^@]}}bfe
378 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
379 ; SI: buffer_store_dword [[VREG]],
382 define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
383 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
384 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
388 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5:
389 ; SI-NOT: {{[^@]}}bfe
390 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
391 ; SI: buffer_store_dword [[VREG]],
394 define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
395 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
396 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
400 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6:
401 ; SI-NOT: {{[^@]}}bfe
402 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
403 ; SI: buffer_store_dword [[VREG]],
406 define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
407 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
408 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
412 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7:
413 ; SI-NOT: {{[^@]}}bfe
414 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
415 ; SI: buffer_store_dword [[VREG]],
418 define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
419 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
420 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
424 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8:
425 ; SI-NOT: {{[^@]}}bfe
426 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
427 ; SI: buffer_store_dword [[VREG]],
430 define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
431 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
432 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
436 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9:
437 ; SI-NOT: {{[^@]}}bfe
438 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
439 ; SI: buffer_store_dword [[VREG]],
441 ; EG-NOT: BFEfppppppppppppp
442 define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
443 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
444 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
448 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10:
449 ; SI-NOT: {{[^@]}}bfe
450 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
451 ; SI: buffer_store_dword [[VREG]],
454 define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
455 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
456 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
460 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11:
461 ; SI-NOT: {{[^@]}}bfe
462 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
463 ; SI: buffer_store_dword [[VREG]],
466 define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
467 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
468 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
472 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12:
473 ; SI-NOT: {{[^@]}}bfe
474 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
475 ; SI: buffer_store_dword [[VREG]],
478 define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
479 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
480 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
484 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13:
485 ; SI-NOT: {{[^@]}}bfe
486 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
487 ; SI: buffer_store_dword [[VREG]],
490 define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
491 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
492 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
496 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14:
497 ; SI-NOT: {{[^@]}}bfe
498 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
499 ; SI: buffer_store_dword [[VREG]],
502 define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
503 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
504 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
508 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15:
509 ; SI-NOT: {{[^@]}}bfe
510 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
511 ; SI: buffer_store_dword [[VREG]],
514 define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
515 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
516 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
520 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16:
521 ; SI-NOT: {{[^@]}}bfe
522 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
523 ; SI: buffer_store_dword [[VREG]],
526 define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
527 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
528 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
532 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17:
533 ; SI-NOT: {{[^@]}}bfe
534 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
535 ; SI: buffer_store_dword [[VREG]],
538 define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
539 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
540 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
544 ; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18:
545 ; SI-NOT: {{[^@]}}bfe
546 ; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
547 ; SI: buffer_store_dword [[VREG]],
550 define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
551 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
552 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
556 ; Make sure that SimplifyDemandedBits doesn't cause the and to be
557 ; reduced to the bits demanded by the bfe.
559 ; XXX: The operand to v_bfe_u32 could also just directly be the load register.
560 ; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
561 ; SI: buffer_load_dword [[ARG:v[0-9]+]]
562 ; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
563 ; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
564 ; SI-DAG: buffer_store_dword [[AND]]
565 ; SI-DAG: buffer_store_dword [[BFE]]
567 define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
568 i32 addrspace(1)* %out1,
569 i32 addrspace(1)* %in) nounwind {
570 %src = load i32 addrspace(1)* %in, align 4
571 %and = and i32 %src, 63
572 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
573 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
574 store i32 %and, i32 addrspace(1)* %out1, align 4