Implement computeKnownBits for min/max nodes

author Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 9 Jun 2015 00:52:41 +0000 (00:52 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 9 Jun 2015 00:52:41 +0000 (00:52 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 9 Jun 2015 00:52:41 +0000 (00:52 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 9 Jun 2015 00:52:41 +0000 (00:52 +0000)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index 804c0805c346ea834dae3588d5192861f29977a4..cf51e756d847d0b993a878152b2bc393ffe1fb8d 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2432,6 +2432,19 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
      KnownOne = KnownOne.trunc(BitWidth);
      break;
    }
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX: {
+    APInt Op0Zero, Op0One;
+    APInt Op1Zero, Op1One;
+    computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, Depth);
+    computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, Depth);
+
+    KnownZero = Op0Zero & Op1Zero;
+    KnownOne = Op0One & Op1One;
+    break;
+  }
    case ISD::FrameIndex:
    case ISD::TargetFrameIndex:
      if (unsigned Align = InferPtrAlignment(Op)) {
@@ -2535,7 +2548,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
      if (Tmp == 1) return 1;  // Early out.
      Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
      return std::min(Tmp, Tmp2);
-
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+    if (Tmp == 1)
+      return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+    return std::min(Tmp, Tmp2);
    case ISD::SADDO:
    case ISD::UADDO:
    case ISD::SSUBO:
diff --git a/test/CodeGen/R600/max.ll b/test/CodeGen/R600/max.ll

index 1aa9e68830117a9c217b0fe3c4c6f2fc10bd84dc..fef3e2f0a21ca7917970a9304295078db4666339 100644 (file)
--- a/test/CodeGen/R600/max.ll
+++ b/test/CodeGen/R600/max.ll
@@ -115,3 +115,54 @@ define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
    store i32 %val, i32 addrspace(1)* %out, align 4
    ret void
  }
+
+; Make sure redundant and removed
+; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_max_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
+; SI-NEXT: buffer_store_dword [[VMIN]]
+define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
+  %a.ext = zext i16 %a to i32
+  %b.ext = zext i16 %b to i32
+  %cmp = icmp ugt i32 %a.ext, %b.ext
+  %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
+  %mask = and i32 %val, 65535
+  store i32 %mask, i32 addrspace(1)* %out
+  ret void
+}
+
+; Make sure redundant sign_extend_inreg removed.
+
+; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_max_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
+; SI-NEXT: buffer_store_dword [[VMIN]]
+define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
+  %a.ext = sext i16 %a to i32
+  %b.ext = sext i16 %b to i32
+  %cmp = icmp sgt i32 %a.ext, %b.ext
+  %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
+  %shl = shl i32 %val, 16
+  %sextinreg = ashr i32 %shl, 16
+  store i32 %sextinreg, i32 addrspace(1)* %out
+  ret void
+}
+
+; FIXME: Should get match min/max through extends inserted by
+; legalization.
+
+; FUNC-LABEL: {{^}}s_test_imin_sge_i16:
+; SI: s_sext_i32_i16
+; SI: s_sext_i32_i16
+; SI: v_cmp_ge_i32_e32
+; SI: v_cndmask_b32
+define void @s_test_imin_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
+  %cmp = icmp sge i16 %a, %b
+  %val = select i1 %cmp, i16 %a, i16 %b
+  store i16 %val, i16 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/min.ll b/test/CodeGen/R600/min.ll

index 275e9a7d899bf3e077a0766cd46845abb63956b3..0332d1a8e407cfa3865529ac22cf3f316e47e887 100644 (file)
--- a/test/CodeGen/R600/min.ll
+++ b/test/CodeGen/R600/min.ll
@@ -136,3 +136,54 @@ define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace
    store i1 %cmp, i1 addrspace(1)* %outgep1
    ret void
  }
+
+; Make sure redundant and removed
+; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
+; SI-NEXT: buffer_store_dword [[VMIN]]
+define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
+  %a.ext = zext i16 %a to i32
+  %b.ext = zext i16 %b to i32
+  %cmp = icmp ult i32 %a.ext, %b.ext
+  %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
+  %mask = and i32 %val, 65535
+  store i32 %mask, i32 addrspace(1)* %out
+  ret void
+}
+
+; Make sure redundant sign_extend_inreg removed.
+
+; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
+; SI-NEXT: buffer_store_dword [[VMIN]]
+define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
+  %a.ext = sext i16 %a to i32
+  %b.ext = sext i16 %b to i32
+  %cmp = icmp slt i32 %a.ext, %b.ext
+  %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
+  %shl = shl i32 %val, 16
+  %sextinreg = ashr i32 %shl, 16
+  store i32 %sextinreg, i32 addrspace(1)* %out
+  ret void
+}
+
+; FIXME: Should get match min/max through extends inserted by
+; legalization.
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
+; SI: s_sext_i32_i16
+; SI: s_sext_i32_i16
+; SI: v_cmp_le_i32_e32
+; SI: v_cndmask_b32
+define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
+  %cmp = icmp sle i16 %a, %b
+  %val = select i1 %cmp, i16 %a, i16 %b
+  store i16 %val, i16 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll

index d9ad4935968d2db73da4a149051907dd31365ced..5aedda2ce1a9cd3fbed4a1e2d3927de1fef89dae 100644 (file)
--- a/test/CodeGen/R600/sext-in-reg.ll
+++ b/test/CodeGen/R600/sext-in-reg.ll
@@ -450,13 +450,10 @@ define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x
    ret void
  }
  
-; FIXME: The BFE should really be eliminated. I think it should happen
-; when computeKnownBitsForTargetNode is implemented for imax.
-
  ; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type:
  ; SI: buffer_load_sbyte
  ; SI: v_max_i32
-; SI: v_bfe_i32
+; SI-NOT: bfe
  ; SI: buffer_store_short
  define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
    %tmp5 = load i8, i8 addrspace(1)* %src, align 1
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 9 Jun 2015 00:52:41 +0000 (00:52 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 9 Jun 2015 00:52:41 +0000 (00:52 +0000)
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
test/CodeGen/R600/max.ll		patch \| blob \| history
test/CodeGen/R600/min.ll		patch \| blob \| history
test/CodeGen/R600/sext-in-reg.ll		patch \| blob \| history