R600: Implement isZExtFree.

author Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 27 Mar 2014 17:23:31 +0000 (17:23 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 27 Mar 2014 17:23:31 +0000 (17:23 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 27 Mar 2014 17:23:31 +0000 (17:23 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 27 Mar 2014 17:23:31 +0000 (17:23 +0000)
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp

index ba7ce13491d46aa580c3ecf4bec29870745ff4da..797063d0e102a9eff27357bf05224e34087426cc 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -275,6 +275,22 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
           (Dest->getPrimitiveSizeInBits() % 32 == 0);
  }
  
+bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
+  const DataLayout *DL = getDataLayout();
+  unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType());
+  unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType());
+
+  return SrcSize == 32 && DestSize == 64;
+}
+
+bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
+  // Any register load of a 64-bit value really requires 2 32-bit moves. For all
+  // practical purposes, the extra mov 0 to load a 64-bit is free.  As used,
+  // this will enable reducing 64-bit operations the 32-bit, which is always
+  // good.
+  return Src == MVT::i32 && Dest == MVT::i64;
+}
+
  bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
    // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
    // limited number of native 64-bit operations. Shrinking an operation to fit
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h

index 2d40e2642642effd98175d17f8dbb000dcab16ee..48298f209799ad2af6ab1642e55398858de282c3 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -87,6 +87,10 @@ public:
    virtual bool isFNegFree(EVT VT) const override;
    virtual bool isTruncateFree(EVT Src, EVT Dest) const override;
    virtual bool isTruncateFree(Type *Src, Type *Dest) const override;
+
+  virtual bool isZExtFree(Type *Src, Type *Dest) const override;
+  virtual bool isZExtFree(EVT Src, EVT Dest) const override;
+
    virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
  
    virtual MVT getVectorIdxTy() const override;
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll

index 82cd8a9170d25acaebad37441f6ef68c465b43fd..7081b077d0ce7d14774a1b1cc0c5ed596c654ddd 100644 (file)
--- a/test/CodeGen/R600/add_i64.ll
+++ b/test/CodeGen/R600/add_i64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
  
  
  declare i32 @llvm.r600.read.tidig.x() readnone
@@ -68,3 +68,17 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
    store <2 x i64> %result, <2 x i64> addrspace(1)* %out
    ret void
  }
+
+; SI-LABEL: @trunc_i64_add_to_i32
+; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]],
+; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]],
+; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]]
+; SI-NOT: ADDC
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+  %add = add i64 %b, %a
+  %trunc = trunc i64 %add to i32
+  store i32 %trunc, i32 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll

index 8c27e28df16499f2c6c1e6fbdb15b24404470708..e1761485835ff4c92dffbd4299686a36fb8935ed 100644 (file)
--- a/test/CodeGen/R600/mul.ll
+++ b/test/CodeGen/R600/mul.ll
@@ -40,3 +40,15 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
    store <4 x i32> %result, <4 x i32> addrspace(1)* %out
    ret void
  }
+
+; SI-CHECK-LABEL: @trunc_i64_mul_to_i32
+; SI-CHECK: S_LOAD_DWORD
+; SI-CHECK: S_LOAD_DWORD
+; SI-CHECK: V_MUL_LO_I32
+; SI-CHECK: BUFFER_STORE_DWORD
+define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+  %mul = mul i64 %b, %a
+  %trunc = trunc i64 %mul to i32
+  store i32 %trunc, i32 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll

index be984b2712246fd22ae08f1f4bca5897db982bd3..2cc991ecc296296cffc549c81c2ab927f6f418ef 100644 (file)
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -114,3 +114,16 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64
    store i64 %or, i64 addrspace(1)* %out
    ret void
  }
+
+; SI-LABEL: @trunc_i64_or_to_i32
+; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]],
+; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]],
+; SI: S_OR_B32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+  %add = or i64 %b, %a
+  %trunc = trunc i64 %add to i32
+  store i32 %trunc, i32 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll

index ee9f499662c008412ada2c88a1cbdaea17b1ccc1..eef3f07441c49730d23e6fbc7d887a409db92f09 100644 (file)
--- a/test/CodeGen/R600/sext-in-reg.ll
+++ b/test/CodeGen/R600/sext-in-reg.ll
@@ -90,10 +90,10 @@ define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
  }
  
  ; FUNC-LABEL: @sext_in_reg_i32_to_i64
-; SI: S_LOAD_DWORDX2
-; SI: S_ADD_I32
-; SI-NEXT: S_ADDC_U32
-; SI-NEXT: S_ASHR_I32 s{{[0-9]+}}, s{{[0-9]+}}, 31
+; SI: S_LOAD_DWORD
+; SI: S_LOAD_DWORD
+; SI: S_ADD_I32 [[ADD:s[0-9]+]],
+; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
  ; SI: BUFFER_STORE_DWORDX2
  define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
    %c = add i64 %a, %b
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll

index e775fe7c989c0aad33a2e27e7e1b305aeb1e769a..8a759dc21c0ef7787a13ec3d89afb21c9dfea287 100644 (file)
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -34,11 +34,12 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
  ; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]],
  ; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
  ; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
-define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
+define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
    %aa = add i64 %a, 234 ; Prevent shrinking store.
    %b = shl i64 %aa, 2
    %result = trunc i64 %b to i32
    store i32 %result, i32 addrspace(1)* %out, align 4
+  store i64 %b, i64 addrspace(1)* %out2, align 8 ; Prevent reducing ops to 32-bits
    ret void
  }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 27 Mar 2014 17:23:31 +0000 (17:23 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 27 Mar 2014 17:23:31 +0000 (17:23 +0000)
lib/Target/R600/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUISelLowering.h		patch \| blob \| history
test/CodeGen/R600/add_i64.ll		patch \| blob \| history
test/CodeGen/R600/mul.ll		patch \| blob \| history
test/CodeGen/R600/or.ll		patch \| blob \| history
test/CodeGen/R600/sext-in-reg.ll		patch \| blob \| history
test/CodeGen/R600/trunc.ll		patch \| blob \| history