R600/SI: Prefer selecting more e64 instruction forms.

author Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 15 Sep 2014 17:15:02 +0000 (17:15 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 15 Sep 2014 17:15:02 +0000 (17:15 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 15 Sep 2014 17:15:02 +0000 (17:15 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 15 Sep 2014 17:15:02 +0000 (17:15 +0000)
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td

index 8082d3254ef2620c99108abdd34a277f618152de..e4d7dc79be170acc64a53551e75c6f55a838c1c9 100644 (file)
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1976,9 +1976,9 @@ class BinOp64Pat <SDNode node, Instruction inst> : Pat <
                    (EXTRACT_SUBREG i64:$src1, sub1)), sub1)
  >;
  
-def : BinOp64Pat <and, V_AND_B32_e32>;
-def : BinOp64Pat <or, V_OR_B32_e32>;
-def : BinOp64Pat <xor, V_XOR_B32_e32>;
+def : BinOp64Pat <and, V_AND_B32_e64>;
+def : BinOp64Pat <or, V_OR_B32_e64>;
+def : BinOp64Pat <xor, V_XOR_B32_e64>;
  
  class SextInReg <ValueType vt, int ShiftAmt> : Pat <
    (sext_inreg i32:$src0, vt),
@@ -1990,7 +1990,7 @@ def : SextInReg <i16, 16>;
  
  def : Pat <
    (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
-  (V_BCNT_U32_B32_e32 $popcnt, $val)
+  (V_BCNT_U32_B32_e64 $popcnt, $val)
  >;
  
  def : Pat <
@@ -2010,7 +2010,7 @@ def : Pat <
  
  def : Pat <
    (addc i32:$src0, i32:$src1),
-  (V_ADD_I32_e32 $src0, $src1)
+  (V_ADD_I32_e64 $src0, $src1)
  >;
  
  /********** ======================= **********/
@@ -3070,13 +3070,13 @@ def : Pat <
  
  def : Pat <
    (i1 (trunc i32:$a)),
-  (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
+  (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
  >;
  
  //============================================================================//
  // Miscellaneous Optimization Patterns
  //============================================================================//
  
-def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>;
+def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
  
  } // End isSI predicate
diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll

index 7bbbec2d9691b1dae2819387be312ddd35e4f9a0..ccb97e38f3b3061ce2400ccf27df41cb5cccd9cd 100644 (file)
--- a/test/CodeGen/R600/and.ll
+++ b/test/CodeGen/R600/and.ll
@@ -129,11 +129,30 @@ endif:
  }
  
  ; FUNC-LABEL: @v_and_constant_i64
-; SI: V_AND_B32
-; SI: V_AND_B32
+; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
  define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
    %a = load i64 addrspace(1)* %aptr, align 8
    %and = and i64 %a, 1234567
    store i64 %and, i64 addrspace(1)* %out, align 8
    ret void
  }
+
+; FIXME: Replace and 0 with mov 0
+; FUNC-LABEL: @v_and_inline_imm_i64
+; SI: V_AND_B32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
+; SI: V_AND_B32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+  %a = load i64 addrspace(1)* %aptr, align 8
+  %and = and i64 %a, 64
+  store i64 %and, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @s_and_inline_imm_i64
+; SI: S_AND_B64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
+define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+  %and = and i64 %a, 64
+  store i64 %and, i64 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll

index d18702a1de98784eb9ce47fe888b664090a4df6b..107158de54d82a358f2cb605ce3838d21b1b10e4 100644 (file)
--- a/test/CodeGen/R600/bfi_int.ll
+++ b/test/CodeGen/R600/bfi_int.ll
@@ -38,8 +38,8 @@ entry:
  ; R600-CHECK: @bfi_sha256_ma
  ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
  ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
-; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}}
-; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{[sv][0-9]+, [sv][0-9]+}}
+; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
+; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
  
  define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
  entry:
diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll

index c7c406a57e67cfdd3b36aff8ad65f3f183638051..fd128672a174030adc250d0df9c37ffce08e0fc3 100644 (file)
--- a/test/CodeGen/R600/ctpop.ll
+++ b/test/CodeGen/R600/ctpop.ll
@@ -42,8 +42,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
  ; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
  ; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
  ; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
-; SI-NOT: ADD
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
  ; SI: BUFFER_STORE_DWORD [[RESULT]],
  ; SI: S_ENDPGM
  
@@ -59,6 +58,20 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
    ret void
  }
  
+; FUNC-LABEL: @v_ctpop_add_sgpr_i32
+; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
+; SI-NEXT: S_WAITCNT
+; SI-NEXT: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
+; SI-NEXT: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
+  %val0 = load i32 addrspace(1)* %in0, align 4
+  %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
+  %add = add i32 %ctpop0, %sval
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
  ; FUNC-LABEL: @v_ctpop_v2i32:
  ; SI: V_BCNT_U32_B32_e32
  ; SI: V_BCNT_U32_B32_e32
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll

index 27cf6b28fd6699051643c0c2890cbb43d3192cf3..449b3afc381ef3de7a5ad2317513830d3e7f3871 100644 (file)
--- a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
@@ -4,10 +4,29 @@
  declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone
  
  ; FUNC-LABEL: @rsq_f32
-; SI: V_RSQ_F32_e32
+; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}
  ; EG: RECIPSQRT_IEEE
  define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind {
    %rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone
    store float %rsq, float addrspace(1)* %out, align 4
    ret void
  }
+
+; TODO: Really these should be constant folded
+; FUNC-LABEL: @rsq_f32_constant_4.0
+; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 4.0
+; EG: RECIPSQRT_IEEE
+define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind {
+  %rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone
+  store float %rsq, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rsq_f32_constant_100.0
+; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 0x42c80000
+; EG: RECIPSQRT_IEEE
+define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind {
+  %rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone
+  store float %rsq, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/R600/rsq.ll

index 3069f62724b7a670a61b7759b2ad43bfb1929f32..a9f3013d3e44f10acb430ddda093b35aa3d67253 100644 (file)
--- a/test/CodeGen/R600/rsq.ll
+++ b/test/CodeGen/R600/rsq.ll
@@ -26,3 +26,13 @@ define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noa
    store double %div, double addrspace(1)* %out, align 4
    ret void
  }
+
+; SI-LABEL: @rsq_f32_sgpr
+; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; SI: S_ENDPGM
+define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
+  %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
+  %div = fdiv float 1.0, %sqrt
+  store float %div, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll

index 2f4b48236d416d508eb18bfbf9fd119810790264..d3b191db282384160b359c4ae705836694d1b145 100644 (file)
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -46,9 +46,20 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64
  }
  
  ; SI-LABEL: @trunc_i32_to_i1:
-; SI: V_AND_B32
+; SI: V_AND_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
  ; SI: V_CMP_EQ_I32
-define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
+define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
+  %a = load i32 addrspace(1)* %ptr, align 4
+  %trunc = trunc i32 %a to i1
+  %result = select i1 %trunc, i32 1, i32 0
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @sgpr_trunc_i32_to_i1:
+; SI: V_AND_B32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}}
+; SI: V_CMP_EQ_I32
+define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
    %trunc = trunc i32 %a to i1
    %result = select i1 %trunc, i32 1, i32 0
    store i32 %result, i32 addrspace(1)* %out, align 4
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 15 Sep 2014 17:15:02 +0000 (17:15 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 15 Sep 2014 17:15:02 +0000 (17:15 +0000)
lib/Target/R600/SIInstructions.td		patch \| blob \| history
test/CodeGen/R600/and.ll		patch \| blob \| history
test/CodeGen/R600/bfi_int.ll		patch \| blob \| history
test/CodeGen/R600/ctpop.ll		patch \| blob \| history
test/CodeGen/R600/llvm.AMDGPU.rsq.ll		patch \| blob \| history
test/CodeGen/R600/rsq.ll		patch \| blob \| history
test/CodeGen/R600/trunc.ll		patch \| blob \| history