AMDGPU/SI: Set the code objects private segment size when targeting HSA.

author Tom Stellard <thomas.stellard@amd.com>

Tue, 15 Dec 2015 22:55:30 +0000 (22:55 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Tue, 15 Dec 2015 22:55:30 +0000 (22:55 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Tue, 15 Dec 2015 22:55:30 +0000 (22:55 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Tue, 15 Dec 2015 22:55:30 +0000 (22:55 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

index 0b7477db2f3aa5fb2138886aeece4fb960ced423..be0df57e9f0b6a892bab4eca857c0d9f913fbc99 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -619,6 +619,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
    header.kernarg_segment_byte_size = MFI->ABIArgOffset;
    header.wavefront_sgpr_count = KernelInfo.NumSGPR;
    header.workitem_vgpr_count = KernelInfo.NumVGPR;
+  header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
  
    AMDGPUTargetStreamer *TS =
        static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
diff --git a/test/CodeGen/AMDGPU/large-alloca-compute.ll b/test/CodeGen/AMDGPU/large-alloca-compute.ll

index c348a2e7980fdde890c5869f8da5f3e84c58059c..de802c49ed4a9a9e61dbef73ed41c95f9d86d926 100644 (file)
--- a/test/CodeGen/AMDGPU/large-alloca-compute.ll
+++ b/test/CodeGen/AMDGPU/large-alloca-compute.ll
@@ -34,7 +34,7 @@
  ; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0
  ; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0
  ; GCNHSA: enable_sgpr_grid_workgroup_count_z = 0
-; GCNHSA: workitem_private_segment_byte_size = 0
+; GCNHSA: workitem_private_segment_byte_size = 32772
  ; GCNHSA: private_segment_alignment = 4
  ; GCNHSA: .end_amd_kernel_code_t
  
diff --git a/test/CodeGen/AMDGPU/private-memory.ll b/test/CodeGen/AMDGPU/private-memory.ll

index 645dc04f44202d1bb2e107b1b59601bccb6c7781..628b8a21b819474d72dd340be38c3f0edfb33724 100644 (file)
--- a/test/CodeGen/AMDGPU/private-memory.ll
+++ b/test/CodeGen/AMDGPU/private-memory.ll
@@ -1,6 +1,7 @@
  ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
  ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
  ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA
  ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
  ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
  
@@ -18,6 +19,12 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
  ; SI-PROMOTE: ds_read_b32
  ; SI-PROMOTE: ds_read_b32
  
+; HSA-ALLOCA: .amd_kernel_code_t
+; FIXME: Creating the emergency stack slots causes us to over-estimate scratch
+; by 4 bytes.
+; HSA-ALLOCA: workitem_private_segment_byte_size = 24
+; HSA-ALLOCA: .end_amd_kernel_code_t
+
  ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
  ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
  define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
author	Tom Stellard <thomas.stellard@amd.com>
	Tue, 15 Dec 2015 22:55:30 +0000 (22:55 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Tue, 15 Dec 2015 22:55:30 +0000 (22:55 +0000)
lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/large-alloca-compute.ll		patch \| blob \| history
test/CodeGen/AMDGPU/private-memory.ll		patch \| blob \| history