From: Nicolai Haehnle Date: Thu, 17 Dec 2015 16:46:42 +0000 (+0000) Subject: AMDGPU: Fix off-by-one in SIRegisterInfo::eliminateFrameIndex X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=7c502030bf075f11afe020ef11236ae417af7474;hp=02fd5dfa7c48314f0cdfc98f1b7efaa25693f052 AMDGPU: Fix off-by-one in SIRegisterInfo::eliminateFrameIndex Summary: The method insertNOPs expected the number of wait states to be passed as parameter, while eliminateFrameIndex passed the immediate argument for the S_NOP, leading to an off-by-one error. Rename the method to make the meaning of its parameter clearer. The number of 4 / 5 wait states (which is what the method has always _tried_ to do according to the comment) is correct according to the hardware docs. I stumbled upon this while trying to track down the cause of https://bugs.freedesktop.org/show_bug.cgi?id=93264. While clearly needed, this patch unfortunately does not fix that bug... Reviewers: arsenm, tstellarAMD Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15542 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255906 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 65c4d032a51..2375432305a 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -742,8 +742,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, return TmpReg; } -void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, - int Count) const { +void SIInstrInfo::insertWaitStates(MachineBasicBlock::iterator MI, + int Count) const { while (Count > 0) { int Arg; if (Count >= 8) diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 8d18d29196f..307ef67ed26 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -441,7 +441,7 @@ public: void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I, unsigned SavReg, unsigned IndexReg) const; - void insertNOPs(MachineBasicBlock::iterator MI, int Count) const; + void insertWaitStates(MachineBasicBlock::iterator MI, int Count) const; /// \brief Returns the operand named \p Op. If \p MI does not have an /// operand named \c Op, this function returns nullptr. diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index bf87f022527..3cdffef0558 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -331,16 +331,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // TODO: only do this when it is needed switch (MF->getSubtarget().getGeneration()) { case AMDGPUSubtarget::SOUTHERN_ISLANDS: - // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI - TII->insertNOPs(MI, 3); + // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states + // ("S_NOP 3") on SI + TII->insertWaitStates(MI, 4); break; case AMDGPUSubtarget::SEA_ISLANDS: break; default: // VOLCANIC_ISLANDS and later - // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI - // and later. This also applies to VALUs which write VCC, but we're - // unlikely to see VMEM use VCC. - TII->insertNOPs(MI, 4); + // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states + // ("S_NOP 4") on VI and later. This also applies to VALUs which write + // VCC, but we're unlikely to see VMEM use VCC. + TII->insertWaitStates(MI, 5); } MI->eraseFromParent();