From fac1bfe37da03ffee2674250475b3a9e0ba39e64 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Mon, 4 Jan 2016 23:35:53 +0000 Subject: [PATCH] AMDGPU: add +xnack feature Summary: Enabling this feature will account for the two SGPRs used by the hardware to store the XNACK_MASK physically. The hardware only requires this reservation when the XNACK feature is explicitly enabled. At some point, HSA will probably want to do that, but it does increase SGPR register pressure, so leave it disabled by default for now (but do add a small test). Reviewers: arsenm, tstellarAMD Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15869 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256794 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPU.td | 5 ++++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 13 ++++++---- lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 + lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 ++++ lib/Target/AMDGPU/SIRegisterInfo.cpp | 33 ++++++++++++++++++++----- test/CodeGen/AMDGPU/flat-scratch-reg.ll | 17 ++++++++----- 6 files changed, 57 insertions(+), 17 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index d4af8d2e48d..db869cf7dd8 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -118,6 +118,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "true", "Support flat address space">; +def FeatureXNACK : SubtargetFeature<"xnack", + "EnableXNACK", + "true", + "Enable XNACK support">; + def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", "EnableVGPRSpilling", "true", diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index ba71dc05a8f..9c379026437 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -417,13 +417,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } } - if (VCCUsed || FlatUsed) + if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) { MaxSGPR += 2; - if (FlatUsed) { - MaxSGPR += 2; - // 2 additional for VI+. - if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (FlatUsed) + MaxSGPR += 2; + + if (STM.isXNACKEnabled()) MaxSGPR += 2; } @@ -620,6 +620,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, if (MFI->hasDispatchPtr()) header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + if (STM.isXNACKEnabled()) + header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; + header.kernarg_segment_byte_size = MFI->ABIArgOffset; header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 44e0c47877a..c6af5b93d25 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -73,6 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), + EnableXNACK(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 9c7bb88f8f4..d3712276d5e 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -76,6 +76,7 @@ private: bool EnableIfCvt; bool EnableLoadStoreOpt; bool EnableUnsafeDSOffsetFolding; + bool EnableXNACK; unsigned WavefrontSize; bool CFALUBug; int LocalMemorySize; @@ -290,6 +291,10 @@ public: } bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const; + bool isXNACKEnabled() const { + return EnableXNACK; + } + unsigned getMaxWavesPerCU() const { if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) return 10; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 353c2b93063..2afa0099660 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -37,13 +37,17 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const AMDGPUSubtarget &ST = MF.getSubtarget(); if (ST.hasSGPRInitBug()) { unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4; + if (ST.isXNACKEnabled()) + BaseIdx -= 4; + unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the - // next sgpr128 down. + // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and + // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down + // either way. return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95; } @@ -54,13 +58,25 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { const AMDGPUSubtarget &ST = MF.getSubtarget(); if (ST.hasSGPRInitBug()) { - unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + unsigned Idx; + + if (!ST.isXNACKEnabled()) + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + else + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1; + return AMDGPU::SGPR_32RegClass.getRegister(Idx); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // Next register before reservations for flat_scr and vcc. - return AMDGPU::SGPR97; + if (!ST.isXNACKEnabled()) { + // Next register before reservations for flat_scr and vcc. + return AMDGPU::SGPR97; + } else { + // Next register before reservations for flat_scr, xnack_mask, vcc, + // and scratch resource. + return AMDGPU::SGPR91; + } } return AMDGPU::SGPR95; @@ -86,6 +102,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // for VCC/FLAT_SCR. reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); + + if (ST.isXNACKEnabled()) + reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); } // Tonga and Iceland can only allocate a fixed number of SGPRs due @@ -93,9 +112,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (ST.hasSGPRInitBug()) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). - // Assume XNACK_MASK is unused. unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; + if (ST.isXNACKEnabled()) + Limit -= 2; + for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll index e2ae3353ae1..9aea7c77343 100644 --- a/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -1,8 +1,10 @@ -; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI -; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI +; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI --check-prefix=NO-XNACK +; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=NO-XNACK +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=XNACK ; GCN-LABEL: {{^}}no_vcc_no_flat: -; GCN: ; NumSgprs: 8 +; NO-XNACK: ; NumSgprs: 8 +; XNACK: ; NumSgprs: 12 define void @no_vcc_no_flat() { entry: call void asm sideeffect "", "~{SGPR7}"() @@ -10,7 +12,8 @@ entry: } ; GCN-LABEL: {{^}}vcc_no_flat: -; GCN: ; NumSgprs: 10 +; NO-XNACK: ; NumSgprs: 10 +; XNACK: ; NumSgprs: 12 define void @vcc_no_flat() { entry: call void asm sideeffect "", "~{SGPR7},~{VCC}"() @@ -19,7 +22,8 @@ entry: ; GCN-LABEL: {{^}}no_vcc_flat: ; CI: ; NumSgprs: 12 -; VI: ; NumSgprs: 14 +; VI: ; NumSgprs: 12 +; XNACK: ; NumSgprs: 14 define void @no_vcc_flat() { entry: call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"() @@ -28,7 +32,8 @@ entry: ; GCN-LABEL: {{^}}vcc_flat: ; CI: ; NumSgprs: 12 -; VI: ; NumSgprs: 14 +; VI: ; NumSgprs: 12 +; XNACK: ; NumSgprs: 14 define void @vcc_flat() { entry: call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"() -- 2.34.1