From: Matt Arsenault Date: Thu, 16 Oct 2014 20:07:40 +0000 (+0000) Subject: R600: Fix nonsensical implementation of computeKnownBits for BFE X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=0134a9bed3f148b90b8433737cc07946ae6b26b9 R600: Fix nonsensical implementation of computeKnownBits for BFE This was resulting in invalid simplifications of sdiv git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219953 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index bcac5408f37..6eebccdf9e2 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -2377,11 +2377,7 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( unsigned BitWidth = 32; uint32_t Width = CWidth->getZExtValue() & 0x1f; - // FIXME: This could do a lot more. If offset is 0, should be the same as - // sign_extend_inreg implementation, but that involves duplicating it. - if (Opc == AMDGPUISD::BFE_I32) - KnownOne = APInt::getHighBitsSet(BitWidth, BitWidth - Width); - else + if (Opc == AMDGPUISD::BFE_U32) KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width); break; diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll index 7c9d3e8bb20..c230a2e2afb 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll @@ -424,3 +424,18 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) store i32 %ashr, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @simplify_demanded_bfe_sdiv +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]] +; SI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16 +; SI: V_LSHRREV_B32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]] +; SI: V_ADD_I32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]] +; SI: V_ASHRREV_I32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]] +; SI: BUFFER_STORE_DWORD [[TMP2]] +define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %src = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone + %div = sdiv i32 %bfe, 2 + store i32 %div, i32 addrspace(1)* %out, align 4 + ret void +}