From: Marek Olsak Date: Wed, 13 Jan 2016 17:23:09 +0000 (+0000) Subject: AMDGPU/SI: Add s_waitcnt at the end of non-void functions X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=fe319de414dd46a01c94f4dee1ed978dae7f2378;hp=7207597936bb29378fdc9cf5f5705fb715aa7ef3 AMDGPU/SI: Add s_waitcnt at the end of non-void functions Summary: v2: Make ReturnsVoid private, so that I can another 8 lines of code and look more productive. Reviewers: tstellarAMD, arsenm Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16034 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257622 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 0c678c4b73b..4206e6fb869 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -893,6 +893,8 @@ SDValue SITargetLowering::LowerReturn(SDValue Chain, return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs, OutVals, DL, DAG); + Info->setIfReturnsVoid(Outs.size() == 0); + SmallVector Splits; SmallVector SplitVals; diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp index 821aada526c..d3b41797871 100644 --- a/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -84,6 +84,9 @@ private: bool LastInstWritesM0; + /// \brief Whether the machine function returns void + bool ReturnsVoid; + /// \brief Get increment/decrement amount for this instruction. Counters getHwCounts(MachineInstr &MI); @@ -322,7 +325,9 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, const Counters &Required) { // End of program? No need to wait on anything - if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM) + // A function not returning void needs to wait, because other bytecode will + // be appended after it and we don't know what it will be. + if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid) return false; // Figure out if the async instructions execute in order @@ -465,6 +470,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { LastIssued = ZeroCounts; LastOpcodeType = OTHER; LastInstWritesM0 = false; + ReturnsVoid = MF.getInfo()->returnsVoid(); memset(&UsedRegs, 0, sizeof(UsedRegs)); memset(&DefinedRegs, 0, sizeof(DefinedRegs)); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 878fd768bba..49677fc2b0a 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -47,6 +47,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), PSInputAddr(0), + ReturnsVoid(true), LDSWaveSpillSize(0), PSInputEna(0), NumUserSGPRs(0), diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 61d575385ff..846ee5de057 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -59,6 +59,7 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction { // Graphics info. unsigned PSInputAddr; + bool ReturnsVoid; public: // FIXME: Make private @@ -288,6 +289,14 @@ public: PSInputAddr |= 1 << Index; } + bool returnsVoid() const { + return ReturnsVoid; + } + + void setIfReturnsVoid(bool Value) { + ReturnsVoid = Value; + } + unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const; };