X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FR600ControlFlowFinalizer.cpp;h=8e7bc1079b7bfdc1cd7b6a88f478e9a9e1c6d5be;hb=9f85dccfc64b5f0b0c63ddfa0a42d8615aa1fcb3;hp=6b42a7a9faf240f918d709d479f3fbbdfe47269d;hpb=5c0c884e423cd4dce8fbe3696fe935eaffdcffa7;p=oota-llvm.git diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 6b42a7a9faf..8e7bc1079b7 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -12,9 +12,9 @@ /// computing their address on the fly ; it also sets STACK_SIZE info. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "r600cf" #include "llvm/Support/Debug.h" #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" @@ -26,6 +26,8 @@ using namespace llvm; +#define DEBUG_TYPE "r600cf" + namespace { struct CFStack { @@ -73,6 +75,44 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) { return false; } +bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { + if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() && + getLoopDepth() > 1) + return true; + + if (!ST.hasCFAluBug()) + return false; + + switch(Opcode) { + default: return false; + case AMDGPU::CF_ALU_PUSH_BEFORE: + case AMDGPU::CF_ALU_ELSE_AFTER: + case AMDGPU::CF_ALU_BREAK: + case AMDGPU::CF_ALU_CONTINUE: + if (CurrentSubEntries == 0) + return false; + if (ST.getWavefrontSize() == 64) { + // We are being conservative here. We only require this work-around if + // CurrentSubEntries > 3 && + // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) + // + // We have to be conservative, because we don't know for certain that + // our stack allocation algorithm for Evergreen/NI is correct. Applying this + // work-around when CurrentSubEntries > 3 allows us to over-allocate stack + // resources without any problems. + return CurrentSubEntries > 3; + } else { + assert(ST.getWavefrontSize() == 32); + // We are being conservative here. We only require the work-around if + // CurrentSubEntries > 7 && + // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) + // See the comment on the wavefront size == 64 case for why we are + // being conservative. + return CurrentSubEntries > 7; + } + } +} + unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { switch(Item) { default: @@ -430,25 +470,27 @@ private: public: R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), - TII (0), TRI(0), + TII (nullptr), TRI(nullptr), ST(tm.getSubtarget()) { const AMDGPUSubtarget &ST = tm.getSubtarget(); MaxFetchInst = ST.getTexVTXClauseSize(); } - virtual bool runOnMachineFunction(MachineFunction &MF) { - TII=static_cast(MF.getTarget().getInstrInfo()); - TRI=static_cast(MF.getTarget().getRegisterInfo()); + bool runOnMachineFunction(MachineFunction &MF) override { + TII = static_cast( + MF.getTarget().getSubtargetImpl()->getInstrInfo()); + TRI = static_cast( + MF.getTarget().getSubtargetImpl()->getRegisterInfo()); R600MachineFunctionInfo *MFI = MF.getInfo(); - CFStack CFStack(ST, MFI->ShaderType); + CFStack CFStack(ST, MFI->getShaderType()); for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; ++MB) { MachineBasicBlock &MBB = *MB; unsigned CfCount = 0; std::vector > > LoopStack; std::vector IfThenElseStack; - if (MFI->ShaderType == 1) { + if (MFI->getShaderType() == ShaderType::VERTEX) { BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), getHWInstrDesc(CF_CALL_FS)); CfCount++; @@ -463,18 +505,22 @@ public: DEBUG(dbgs() << CfCount << ":"; I->dump();); FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; + LastAlu.back() = nullptr; continue; } MachineBasicBlock::iterator MI = I; if (MI->getOpcode() != AMDGPU::ENDIF) - LastAlu.back() = 0; + LastAlu.back() = nullptr; if (MI->getOpcode() == AMDGPU::CF_ALU) LastAlu.back() = MI; I++; + bool RequiresWorkAround = + CFStack.requiresWorkAroundForInst(MI->getOpcode()); switch (MI->getOpcode()) { case AMDGPU::CF_ALU_PUSH_BEFORE: - if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) { + if (RequiresWorkAround) { + DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n"); BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG)) .addImm(CfCount + 1) .addImm(1); @@ -516,7 +562,7 @@ public: break; } case AMDGPU::IF_PREDICATE_SET: { - LastAlu.push_back(0); + LastAlu.push_back(nullptr); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) .addImm(0) @@ -623,7 +669,7 @@ public: return false; } - const char *getPassName() const { + const char *getPassName() const override { return "R600 Control Flow Finalizer Pass"; } };