From 09dd945fa5dce92b9acdd8772e90ef574274c1b4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 15 Dec 2015 22:55:30 +0000 Subject: [PATCH] AMDGPU/SI: Set the code objects private segment size when targeting HSA. Summary: I'm not sure how things worked before without this. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15492 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255692 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 1 + test/CodeGen/AMDGPU/large-alloca-compute.ll | 2 +- test/CodeGen/AMDGPU/private-memory.ll | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0b7477db2f3..be0df57e9f0 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -619,6 +619,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, header.kernarg_segment_byte_size = MFI->ABIArgOffset; header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; + header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; AMDGPUTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); diff --git a/test/CodeGen/AMDGPU/large-alloca-compute.ll b/test/CodeGen/AMDGPU/large-alloca-compute.ll index c348a2e7980..de802c49ed4 100644 --- a/test/CodeGen/AMDGPU/large-alloca-compute.ll +++ b/test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -34,7 +34,7 @@ ; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0 ; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0 ; GCNHSA: enable_sgpr_grid_workgroup_count_z = 0 -; GCNHSA: workitem_private_segment_byte_size = 0 +; GCNHSA: workitem_private_segment_byte_size = 32772 ; GCNHSA: private_segment_alignment = 4 ; GCNHSA: .end_amd_kernel_code_t diff --git a/test/CodeGen/AMDGPU/private-memory.ll b/test/CodeGen/AMDGPU/private-memory.ll index 645dc04f442..628b8a21b81 100644 --- a/test/CodeGen/AMDGPU/private-memory.ll +++ b/test/CodeGen/AMDGPU/private-memory.ll @@ -1,6 +1,7 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC @@ -18,6 +19,12 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI-PROMOTE: ds_read_b32 ; SI-PROMOTE: ds_read_b32 +; HSA-ALLOCA: .amd_kernel_code_t +; FIXME: Creating the emergency stack slots causes us to over-estimate scratch +; by 4 bytes. +; HSA-ALLOCA: workitem_private_segment_byte_size = 24 +; HSA-ALLOCA: .end_amd_kernel_code_t + ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { -- 2.34.1