From aa23e0330d48ed41a552f13074e6b16e76c7c30c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 26 Nov 2015 00:43:29 +0000 Subject: [PATCH] AMDGPU: Add llvm.amdgcn.dispatch.ptr intrinsic Summary: This returns a pointer to the dispatch packet, which can be used to load information about the kernel dispach. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D14898 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254116 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsAMDGPU.td | 4 ++++ .../AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 3 ++- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 +++ lib/Target/AMDGPU/SIISelLowering.cpp | 16 ++++++++++++++++ lib/Target/AMDGPU/SIRegisterInfo.cpp | 6 ++++++ lib/Target/AMDGPU/SIRegisterInfo.h | 1 + test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll | 16 ++++++++++++++++ 7 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index cad2b56a35f..0f87596d473 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -127,4 +127,8 @@ def int_amdgcn_s_dcache_wb_vol : GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, Intrinsic<[], [], []>; +def int_amdgcn_dispatch_ptr : + GCCBuiltin<"__builtin_amdgcn_disptch_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + } diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index e57415f3b8e..37818392724 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -105,7 +105,8 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { { "llvm.r600.read.global.size.x", "amdgpu-dispatch-ptr" }, { "llvm.r600.read.global.size.y", "amdgpu-dispatch-ptr" }, - { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" } + { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" }, + { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" } }; // TODO: Intrinsics that require queue ptr. diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 39beb6a4f50..314ef721c1f 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -528,6 +528,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR | AMD_CODE_PROPERTY_IS_PTR64; + if (MFI->hasDispatchPtr()) + header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + header.kernarg_segment_byte_size = MFI->ABIArgOffset; header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 4ed9cf6c97e..5c67bf80c17 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -646,6 +646,18 @@ SDValue SITargetLowering::LowerFormalArguments( CCInfo.AllocateReg(ScratchPtrRegHi); MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass); MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass); + SIMachineFunctionInfo *MFI = MF.getInfo(); + if (Subtarget->isAmdHsaOS() && MFI->hasDispatchPtr()) { + unsigned DispatchPtrReg = + TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR); + unsigned DispatchPtrRegLo = + TRI->getPhysRegSubReg(DispatchPtrReg, &AMDGPU::SReg_32RegClass, 0); + unsigned DispatchPtrRegHi = + TRI->getPhysRegSubReg(DispatchPtrReg, &AMDGPU::SReg_32RegClass, 1); + CCInfo.AllocateReg(DispatchPtrRegLo); + CCInfo.AllocateReg(DispatchPtrRegHi); + MF.addLiveIn(DispatchPtrReg, &AMDGPU::SReg_64RegClass); + } } if (Info->getShaderType() == ShaderType::COMPUTE) { @@ -1053,6 +1065,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // TODO: Should this propagate fast-math-flags? switch (IntrinsicID) { + case Intrinsic::amdgcn_dispatch_ptr: + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, + TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT); + case Intrinsic::r600_read_ngroups_x: return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), SI::KernelInputOffsets::NGROUPS_X, false); diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 436808b5287..ab7539b6fb3 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -510,6 +510,7 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const { + const AMDGPUSubtarget &ST = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); switch (Value) { case SIRegisterInfo::TGID_X: @@ -525,6 +526,11 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, case SIRegisterInfo::SCRATCH_PTR: return AMDGPU::SGPR2_SGPR3; case SIRegisterInfo::INPUT_PTR: + if (ST.isAmdHsaOS()) + return MFI->hasDispatchPtr() ? AMDGPU::SGPR2_SGPR3 : AMDGPU::SGPR0_SGPR1; + return AMDGPU::SGPR0_SGPR1; + case SIRegisterInfo::DISPATCH_PTR: + assert(MFI->hasDispatchPtr()); return AMDGPU::SGPR0_SGPR1; case SIRegisterInfo::TIDIG_X: return AMDGPU::VGPR0; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index b1389533ec3..36f6d1c7a26 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -99,6 +99,7 @@ public: enum PreloadedValue { // SGPRS: SCRATCH_PTR = 0, + DISPATCH_PTR = 1, INPUT_PTR = 3, TGID_X = 10, TGID_Y = 11, diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll new file mode 100644 index 00000000000..719f7ffe0f1 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}test: +; GCN: enable_sgpr_dispatch_ptr = 1 +; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0 +define void @test(i32 addrspace(1)* %out) { + %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 + %header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)* + %value = load i32, i32 addrspace(2)* %header_ptr + store i32 %value, i32 addrspace(1)* %out + ret void +} + +declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 + +attributes #0 = { readnone } -- 2.34.1