From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sat, 19 Dec 2015 01:39:48 +0000 (+0000)
Subject: Fix broken type legalization of min/max
X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=4b9d868cc774eba681a93a61f056105b7dfd9c8f

Fix broken type legalization of min/max

This was using an anyext when promoting the type
when zext/sext is required.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256074 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 63c9cc52871..3131ca10145 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -76,9 +76,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT_CC:   Res = PromoteIntRes_SELECT_CC(N); break;
   case ISD::SETCC:       Res = PromoteIntRes_SETCC(N); break;
   case ISD::SMIN:
-  case ISD::SMAX:
+  case ISD::SMAX:        Res = PromoteIntRes_SExtOrZExtIntBinOp(N, true); break;
   case ISD::UMIN:
-  case ISD::UMAX:        Res = PromoteIntRes_SimpleIntBinOp(N); break;
+  case ISD::UMAX:        Res = PromoteIntRes_SExtOrZExtIntBinOp(N, false); break;
+
   case ISD::SHL:         Res = PromoteIntRes_SHL(N); break;
   case ISD::SIGN_EXTEND_INREG:
                          Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
@@ -660,6 +661,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
                      LHS.getValueType(), LHS, RHS);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtOrZExtIntBinOp(SDNode *N,
+                                                           bool Signed) {
+  SDValue LHS, RHS;
+
+  if (Signed) {
+    LHS = SExtPromotedInteger(N->getOperand(0));
+    RHS = SExtPromotedInteger(N->getOperand(1));
+  } else {
+    LHS = ZExtPromotedInteger(N->getOperand(0));
+    RHS = ZExtPromotedInteger(N->getOperand(1));
+  }
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N),
+                     LHS.getValueType(), LHS, RHS);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 267a1145a0a..e121e3bc6fa 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -276,6 +276,7 @@ private:
   SDValue PromoteIntRes_SETCC(SDNode *N);
   SDValue PromoteIntRes_SHL(SDNode *N);
   SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+  SDValue PromoteIntRes_SExtOrZExtIntBinOp(SDNode *N, bool Signed);
   SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
   SDValue PromoteIntRes_SRA(SDNode *N);
   SDValue PromoteIntRes_SRL(SDNode *N);
diff --git a/test/CodeGen/AMDGPU/max.ll b/test/CodeGen/AMDGPU/max.ll
index 3f8662d507e..eeb915c10a9 100644
--- a/test/CodeGen/AMDGPU/max.ll
+++ b/test/CodeGen/AMDGPU/max.ll
@@ -53,6 +53,23 @@ define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
   ret void
 }
 
+; FUNC-LABEL: {{^}}v_test_imax_sge_i8:
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: v_max_i32_e32
+define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+  %a = load i8, i8 addrspace(1)* %gep0, align 1
+  %b = load i8, i8 addrspace(1)* %gep1, align 1
+  %cmp = icmp sge i8 %a, %b
+  %val = select i1 %cmp, i8 %a, i8 %b
+  store i8 %val, i8 addrspace(1)* %outgep, align 1
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32:
 ; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
 define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
@@ -132,6 +149,23 @@ define void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <
   ret void
 }
 
+; FUNC-LABEL: {{^}}v_test_umax_uge_i8:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: v_max_u32_e32
+define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+  %a = load i8, i8 addrspace(1)* %gep0, align 1
+  %b = load i8, i8 addrspace(1)* %gep1, align 1
+  %cmp = icmp uge i8 %a, %b
+  %val = select i1 %cmp, i8 %a, i8 %b
+  store i8 %val, i8 addrspace(1)* %outgep, align 1
+  ret void
+}
+
 ; FUNC-LABEL: @v_test_umax_ugt_i32
 ; SI: v_max_u32_e32
 define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
@@ -203,6 +237,10 @@ define void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, i16
 }
 
 ; FUNC-LABEL: {{^}}s_test_imax_sge_i16:
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: s_sext_i32_i16
+; SI: s_sext_i32_i16
 ; SI: s_max_i32
 define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
   %cmp = icmp sge i16 %a, %b
diff --git a/test/CodeGen/AMDGPU/min.ll b/test/CodeGen/AMDGPU/min.ll
index fbc3d7dcea2..215dbeb4b2f 100644
--- a/test/CodeGen/AMDGPU/min.ll
+++ b/test/CodeGen/AMDGPU/min.ll
@@ -48,6 +48,10 @@ define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <
 }
 
 ; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: s_sext_i32_i8
+; SI: s_sext_i32_i8
 ; SI: s_min_i32
 define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind {
   %cmp = icmp sle i8 %a, %b
@@ -60,10 +64,21 @@ define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind {
 ; extloads with mubuf instructions.
 
 ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+
 ; SI: v_min_i32
 ; SI: v_min_i32
 ; SI: v_min_i32
 ; SI: v_min_i32
+
+; SI: s_endpgm
 define void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) nounwind {
   %cmp = icmp sle <4 x i8> %a, %b
   %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
@@ -192,6 +207,23 @@ define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
   ret void
 }
 
+; FUNC-LABEL: {{^}}v_test_umin_ult_i8:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: v_min_u32_e32
+define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+  %a = load i8, i8 addrspace(1)* %gep0, align 1
+  %b = load i8, i8 addrspace(1)* %gep1, align 1
+  %cmp = icmp ult i8 %a, %b
+  %val = select i1 %cmp, i8 %a, i8 %b
+  store i8 %val, i8 addrspace(1)* %outgep, align 1
+  ret void
+}
+
 ; FUNC-LABEL: @s_test_umin_ult_i32
 ; SI: s_min_u32
 define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {