From 0c6d96cf160f2a6c63f59b5ab7e7a6bbe903ede3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 27 Mar 2014 17:23:31 +0000 Subject: [PATCH] R600: Implement isZExtFree. This allows 64-bit operations that are truncated to be reduced to 32-bit ones. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204946 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 16 ++++++++++++++++ lib/Target/R600/AMDGPUISelLowering.h | 4 ++++ test/CodeGen/R600/add_i64.ll | 16 +++++++++++++++- test/CodeGen/R600/mul.ll | 12 ++++++++++++ test/CodeGen/R600/or.ll | 13 +++++++++++++ test/CodeGen/R600/sext-in-reg.ll | 8 ++++---- test/CodeGen/R600/trunc.ll | 3 ++- 7 files changed, 66 insertions(+), 6 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index ba7ce13491d..797063d0e10 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -275,6 +275,22 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { (Dest->getPrimitiveSizeInBits() % 32 == 0); } +bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { + const DataLayout *DL = getDataLayout(); + unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType()); + unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType()); + + return SrcSize == 32 && DestSize == 64; +} + +bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { + // Any register load of a 64-bit value really requires 2 32-bit moves. For all + // practical purposes, the extra mov 0 to load a 64-bit is free. As used, + // this will enable reducing 64-bit operations the 32-bit, which is always + // good. + return Src == MVT::i32 && Dest == MVT::i64; +} + bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { // There aren't really 64-bit registers, but pairs of 32-bit ones and only a // limited number of native 64-bit operations. Shrinking an operation to fit diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 2d40e264264..48298f20979 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -87,6 +87,10 @@ public: virtual bool isFNegFree(EVT VT) const override; virtual bool isTruncateFree(EVT Src, EVT Dest) const override; virtual bool isTruncateFree(Type *Src, Type *Dest) const override; + + virtual bool isZExtFree(Type *Src, Type *Dest) const override; + virtual bool isZExtFree(EVT Src, EVT Dest) const override; + virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; virtual MVT getVectorIdxTy() const override; diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll index 82cd8a9170d..7081b077d0c 100644 --- a/test/CodeGen/R600/add_i64.ll +++ b/test/CodeGen/R600/add_i64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() readnone @@ -68,3 +68,17 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add store <2 x i64> %result, <2 x i64> addrspace(1)* %out ret void } + +; SI-LABEL: @trunc_i64_add_to_i32 +; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]], +; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]], +; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]] +; SI-NOT: ADDC +; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; SI: BUFFER_STORE_DWORD [[VRESULT]], +define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { + %add = add i64 %b, %a + %trunc = trunc i64 %add to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll index 8c27e28df16..e1761485835 100644 --- a/test/CodeGen/R600/mul.ll +++ b/test/CodeGen/R600/mul.ll @@ -40,3 +40,15 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } + +; SI-CHECK-LABEL: @trunc_i64_mul_to_i32 +; SI-CHECK: S_LOAD_DWORD +; SI-CHECK: S_LOAD_DWORD +; SI-CHECK: V_MUL_LO_I32 +; SI-CHECK: BUFFER_STORE_DWORD +define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { + %mul = mul i64 %b, %a + %trunc = trunc i64 %mul to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll index be984b27122..2cc991ecc29 100644 --- a/test/CodeGen/R600/or.ll +++ b/test/CodeGen/R600/or.ll @@ -114,3 +114,16 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 store i64 %or, i64 addrspace(1)* %out ret void } + +; SI-LABEL: @trunc_i64_or_to_i32 +; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]], +; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]], +; SI: S_OR_B32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]] +; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; SI: BUFFER_STORE_DWORD [[VRESULT]], +define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { + %add = or i64 %b, %a + %trunc = trunc i64 %add to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll index ee9f499662c..eef3f07441c 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/R600/sext-in-reg.ll @@ -90,10 +90,10 @@ define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun } ; FUNC-LABEL: @sext_in_reg_i32_to_i64 -; SI: S_LOAD_DWORDX2 -; SI: S_ADD_I32 -; SI-NEXT: S_ADDC_U32 -; SI-NEXT: S_ASHR_I32 s{{[0-9]+}}, s{{[0-9]+}}, 31 +; SI: S_LOAD_DWORD +; SI: S_LOAD_DWORD +; SI: S_ADD_I32 [[ADD:s[0-9]+]], +; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31 ; SI: BUFFER_STORE_DWORDX2 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %c = add i64 %a, %b diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll index e775fe7c989..8a759dc21c0 100644 --- a/test/CodeGen/R600/trunc.ll +++ b/test/CodeGen/R600/trunc.ll @@ -34,11 +34,12 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) { ; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]], ; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2 ; SI: BUFFER_STORE_DWORD v[[LO_VREG]], -define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) { +define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) { %aa = add i64 %a, 234 ; Prevent shrinking store. %b = shl i64 %aa, 2 %result = trunc i64 %b to i32 store i32 %result, i32 addrspace(1)* %out, align 4 + store i64 %b, i64 addrspace(1)* %out2, align 8 ; Prevent reducing ops to 32-bits ret void } -- 2.34.1