+ unsigned getLoopDepth();
+ bool branchStackContains(CFStack::StackItem);
+ bool requiresWorkAroundForInst(unsigned Opcode);
+ unsigned getSubEntrySize(CFStack::StackItem Item);
+ void updateMaxStackSize();
+ void pushBranch(unsigned Opcode, bool isWQM = false);
+ void pushLoop();
+ void popBranch();
+ void popLoop();
+};
+
+unsigned CFStack::getLoopDepth() {
+ return LoopStack.size();
+}
+
+bool CFStack::branchStackContains(CFStack::StackItem Item) {
+ for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
+ E = BranchStack.end(); I != E; ++I) {
+ if (*I == Item)
+ return true;
+ }
+ return false;
+}
+
+bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
+ if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() &&
+ getLoopDepth() > 1)
+ return true;
+
+ if (!ST.hasCFAluBug())
+ return false;
+
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ case AMDGPU::CF_ALU_ELSE_AFTER:
+ case AMDGPU::CF_ALU_BREAK:
+ case AMDGPU::CF_ALU_CONTINUE:
+ if (CurrentSubEntries == 0)
+ return false;
+ if (ST.getWavefrontSize() == 64) {
+ // We are being conservative here. We only require this work-around if
+ // CurrentSubEntries > 3 &&
+ // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
+ //
+ // We have to be conservative, because we don't know for certain that
+ // our stack allocation algorithm for Evergreen/NI is correct. Applying this
+ // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
+ // resources without any problems.
+ return CurrentSubEntries > 3;
+ } else {
+ assert(ST.getWavefrontSize() == 32);
+ // We are being conservative here. We only require the work-around if
+ // CurrentSubEntries > 7 &&
+ // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
+ // See the comment on the wavefront size == 64 case for why we are
+ // being conservative.
+ return CurrentSubEntries > 7;
+ }
+ }
+}
+
+unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
+ switch(Item) {
+ default:
+ return 0;
+ case CFStack::FIRST_NON_WQM_PUSH:
+ assert(!ST.hasCaymanISA());
+ if (ST.getGeneration() <= AMDGPUSubtarget::R700) {
+ // +1 For the push operation.
+ // +2 Extra space required.
+ return 3;
+ } else {
+ // Some documentation says that this is not necessary on Evergreen,
+ // but experimentation has show that we need to allocate 1 extra
+ // sub-entry for the first non-WQM push.
+ // +1 For the push operation.
+ // +1 Extra space required.
+ return 2;
+ }
+ case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
+ assert(ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+ // +1 For the push operation.
+ // +1 Extra space required.
+ return 2;
+ case CFStack::SUB_ENTRY:
+ return 1;
+ }
+}
+
+void CFStack::updateMaxStackSize() {
+ unsigned CurrentStackSize = CurrentEntries +
+ (RoundUpToAlignment(CurrentSubEntries, 4) / 4);
+ MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
+}
+
+void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
+ CFStack::StackItem Item = CFStack::ENTRY;
+ switch(Opcode) {
+ case AMDGPU::CF_PUSH_EG:
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ if (!isWQM) {
+ if (!ST.hasCaymanISA() && !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
+ Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
+ // See comment in
+ // CFStack::getSubEntrySize()
+ else if (CurrentEntries > 0 &&
+ ST.getGeneration() > AMDGPUSubtarget::EVERGREEN &&
+ !ST.hasCaymanISA() &&
+ !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
+ Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
+ else
+ Item = CFStack::SUB_ENTRY;
+ } else
+ Item = CFStack::ENTRY;
+ break;
+ }
+ BranchStack.push_back(Item);
+ if (Item == CFStack::ENTRY)
+ CurrentEntries++;
+ else
+ CurrentSubEntries += getSubEntrySize(Item);
+ updateMaxStackSize();
+}
+
+void CFStack::pushLoop() {
+ LoopStack.push_back(CFStack::ENTRY);
+ CurrentEntries++;
+ updateMaxStackSize();
+}
+
+void CFStack::popBranch() {
+ CFStack::StackItem Top = BranchStack.back();
+ if (Top == CFStack::ENTRY)
+ CurrentEntries--;
+ else
+ CurrentSubEntries-= getSubEntrySize(Top);
+ BranchStack.pop_back();
+}
+
+void CFStack::popLoop() {
+ CurrentEntries--;
+ LoopStack.pop_back();
+}