AMDGPU/SI: Fix a GPU hang with POS_W_FLOAT enabled

author Marek Olsak <marek.olsak@amd.com>

Wed, 13 Jan 2016 17:23:20 +0000 (17:23 +0000)

committer Marek Olsak <marek.olsak@amd.com>

Wed, 13 Jan 2016 17:23:20 +0000 (17:23 +0000)
author Marek Olsak <marek.olsak@amd.com>
Wed, 13 Jan 2016 17:23:20 +0000 (17:23 +0000)
committer Marek Olsak <marek.olsak@amd.com>
Wed, 13 Jan 2016 17:23:20 +0000 (17:23 +0000)
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 4206e6fb869fbab8b1cdf67d2704be2cbbd43fb5..544867513d9c541720560cae67e978e4eca156e6 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -646,8 +646,15 @@ SDValue SITargetLowering::LowerFormalArguments(
    // based on run-time states. Since we can't know what the final PSInputEna
    // will look like, so we shouldn't do anything here and the user should take
    // responsibility for the correct programming.
+  //
+  // Otherwise, the following restrictions apply:
+  // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+  // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+  //   enabled too.
    if (Info->getShaderType() == ShaderType::PIXEL &&
-      (Info->getPSInputAddr() & 0x7F) == 0) {
+      ((Info->getPSInputAddr() & 0x7F) == 0 ||
+       ((Info->getPSInputAddr() & 0xF) == 0 &&
+       Info->isPSInputAllocated(11)))) {
      CCInfo.AllocateReg(AMDGPU::VGPR0);
      CCInfo.AllocateReg(AMDGPU::VGPR1);
      Info->markPSInputAllocated(0);
diff --git a/test/CodeGen/AMDGPU/ret.ll b/test/CodeGen/AMDGPU/ret.ll

index 26817729355269ca8b5f20a2550dbb2bfee403b5..2bd9fd6858fec90eeaa5465d2c2486dd11cf1fe6 100644 (file)
--- a/test/CodeGen/AMDGPU/ret.ll
+++ b/test/CodeGen/AMDGPU/ret.ll
@@ -76,6 +76,23 @@ define float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 in
  }
  
  
+; GCN: .long 165580
+; GCN-NEXT: .long 2081
+; GCN-NEXT: .long 165584
+; GCN-NEXT: .long 2081
+; GCN-LABEL: {{^}}ps_input_ena_pos_w:
+; GCN-DAG: v_mov_b32_e32 v0, v4
+; GCN-DAG: v_mov_b32_e32 v1, v2
+; GCN: v_mov_b32_e32 v2, v3
+; GCN-NOT: s_endpgm
+define {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
+  %f = bitcast <2 x i32> %8 to <2 x float>
+  %s = insertvalue {float, <2 x float>} undef, float %14, 0
+  %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
+  ret {float, <2 x float>} %s1
+}
+
+
  ; GCN: .long 165580
  ; GCN-NEXT: .long 562
  ; GCN-NEXT: .long 165584
author	Marek Olsak <marek.olsak@amd.com>
	Wed, 13 Jan 2016 17:23:20 +0000 (17:23 +0000)
committer	Marek Olsak <marek.olsak@amd.com>
	Wed, 13 Jan 2016 17:23:20 +0000 (17:23 +0000)
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/ret.ll		patch \| blob \| history