1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
9 //==-----------------------------------------------------------------------===//
11 #define DEBUG_TYPE "structcfg"
14 #include "AMDGPUInstrInfo.h"
15 #include "llvm/Support/Debug.h"
16 #include "llvm/Support/raw_ostream.h"
17 #include "llvm/ADT/SCCIterator.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/DominatorInternals.h"
21 #include "llvm/Analysis/Dominators.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineJumpTableInfo.h"
28 #include "llvm/CodeGen/MachineLoopInfo.h"
29 #include "llvm/CodeGen/MachinePostDominators.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
36 #define DEFAULT_VEC_SLOTS 8
40 //===----------------------------------------------------------------------===//
42 // Statistics for CFGStructurizer.
44 //===----------------------------------------------------------------------===//
46 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
48 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
50 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
52 STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
54 STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
56 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
57 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
59 //===----------------------------------------------------------------------===//
61 // Miscellaneous utility for CFGStructurizer.
63 //===----------------------------------------------------------------------===//
65 #define SHOWNEWINSTR(i) \
66 DEBUG(dbgs() << "New instr: " << *i << "\n");
68 #define SHOWNEWBLK(b, msg) \
70 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
74 #define SHOWBLK_DETAIL(b, msg) \
77 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
83 #define INVALIDSCCNUM -1
84 #define INVALIDREGNUM 0
86 template<class LoopinfoT>
87 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
88 for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
89 iterEnd = LoopInfo.end();
90 iter != iterEnd; ++iter) {
91 (*iter)->print(OS, 0);
96 void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
97 size_t sz = Src.size();
98 for (size_t i = 0; i < sz/2; ++i) {
100 Src[i] = Src[sz - i - 1];
105 } // end anonymous namespace
107 //===----------------------------------------------------------------------===//
109 // supporting data structure for CFGStructurizer
111 //===----------------------------------------------------------------------===//
114 template<class PassT>
115 struct CFGStructTraits {
118 template <class InstrT>
119 class BlockInformation {
123 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
124 //Instructions defining the corresponding successor.
125 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
128 template <class BlockT, class InstrT, class RegiT>
129 class LandInformation {
132 std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
133 //WHILELOOP(thisloop) init before entering
135 std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
136 //WHILELOOP(thisloop) init after entering
138 std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
139 //land block, branch cond on this reg.
140 std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
141 //endif" after ENDLOOP(thisloop) break
142 //outerLoopOf(thisLoop).
143 std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
144 //endif" after ENDLOOP(thisloop) continue on
145 //outerLoopOf(thisLoop).
146 LandInformation() : landBlk(NULL) {}
149 } // end anonymous namespace
151 //===----------------------------------------------------------------------===//
155 //===----------------------------------------------------------------------===//
158 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
159 template<class PassT>
160 class CFGStructurizer {
164 SinglePath_InPath = 1,
165 SinglePath_NotInPath = 2
169 typedef typename PassT::InstructionType InstrT;
170 typedef typename PassT::FunctionType FuncT;
171 typedef typename PassT::DominatortreeType DomTreeT;
172 typedef typename PassT::PostDominatortreeType PostDomTreeT;
173 typedef typename PassT::DomTreeNodeType DomTreeNodeT;
174 typedef typename PassT::LoopinfoType LoopInfoT;
176 typedef GraphTraits<FuncT *> FuncGTraits;
177 //typedef FuncGTraits::nodes_iterator BlockIterator;
178 typedef typename FuncT::iterator BlockIterator;
180 typedef typename FuncGTraits::NodeType BlockT;
181 typedef GraphTraits<BlockT *> BlockGTraits;
182 typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
183 //typedef BlockGTraits::succ_iterator InstructionIterator;
184 typedef typename BlockT::iterator InstrIterator;
186 typedef CFGStructTraits<PassT> CFGTraits;
187 typedef BlockInformation<InstrT> BlockInfo;
188 typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
191 typedef typename PassT::LoopType LoopT;
192 typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
193 typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
194 //landing info for loop break
195 typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
201 /// Perform the CFG structurization
202 bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
204 /// Perform the CFG preparation
205 bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
208 void reversePredicateSetter(typename BlockT::iterator);
210 void printOrderedBlocks(llvm::raw_ostream &OS);
211 int patternMatch(BlockT *CurBlock);
212 int patternMatchGroup(BlockT *CurBlock);
214 int serialPatternMatch(BlockT *CurBlock);
215 int ifPatternMatch(BlockT *CurBlock);
216 int switchPatternMatch(BlockT *CurBlock);
217 int loopendPatternMatch(BlockT *CurBlock);
218 int loopPatternMatch(BlockT *CurBlock);
220 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
221 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
222 //int loopWithoutBreak(BlockT *);
224 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
225 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
226 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
227 BlockT *ContBlock, LoopT *contLoop);
228 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
229 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
231 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
233 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
234 BlockT *FalseBlock, BlockT **LandBlockPtr);
235 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
236 BlockT *FalseBlock, BlockT *LandBlock,
237 bool Detail = false);
238 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
239 bool AllowSideEntry = true);
240 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
241 bool AllowSideEntry = true);
242 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
243 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
245 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
246 BlockT *TrueBlock, BlockT *FalseBlock,
248 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
249 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
250 BlockT *ExitLandBlock, RegiT SetReg);
251 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
253 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
254 std::set<BlockT*> &ExitBlockSet,
255 BlockT *ExitLandBlk);
256 BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
257 BlockTSmallerVector &ExitingBlocks,
258 BlockTSmallerVector &ExitBlocks);
259 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
260 void removeUnconditionalBranch(BlockT *SrcBlock);
261 void removeRedundantConditionalBranch(BlockT *SrcBlock);
262 void addDummyExitBlock(SmallVectorImpl<BlockT *> &RetBlocks);
264 void removeSuccessor(BlockT *SrcBlock);
265 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
266 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
268 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
269 InstrIterator InsertPos);
271 void recordSccnum(BlockT *SrcBlock, int SCCNum);
272 int getSCCNum(BlockT *srcBlk);
274 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
275 bool isRetiredBlock(BlockT *SrcBlock);
276 bool isActiveLoophead(BlockT *CurBlock);
277 bool needMigrateBlock(BlockT *Block);
279 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
280 BlockTSmallerVector &exitBlocks,
281 std::set<BlockT*> &ExitBlockSet);
282 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
283 BlockT *getLoopLandBlock(LoopT *LoopRep);
284 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
286 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
287 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
288 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
289 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
290 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
292 bool hasBackEdge(BlockT *curBlock);
293 unsigned getLoopDepth (LoopT *LoopRep);
294 int countActiveBlock(
295 typename SmallVectorImpl<BlockT *>::const_iterator IterStart,
296 typename SmallVectorImpl<BlockT *>::const_iterator IterEnd);
297 BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
298 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
302 PostDomTreeT *postDomTree;
307 BlockInfoMap blockInfoMap;
308 LoopLandInfoMap loopLandInfoMap;
309 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
310 const AMDGPURegisterInfo *TRI;
312 }; //template class CFGStructurizer
314 template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
315 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
318 template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
319 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
320 E = blockInfoMap.end(); I != E; ++I) {
325 template<class PassT>
326 bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
327 const AMDGPURegisterInfo * tri) {
332 bool changed = false;
334 //FIXME: if not reducible flow graph, make it so ???
337 dbgs() << "AMDGPUCFGStructurizer::prepare\n";
340 loopInfo = CFGTraits::getLoopInfo(pass);
342 dbgs() << "LoopInfo:\n";
343 PrintLoopinfo(*loopInfo, dbgs());
348 for (typename SmallVectorImpl<BlockT *>::const_iterator
349 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
350 iterBlk != iterBlkEnd;
354 dbgs() << "Ordered blocks:\n";
355 printOrderedBlocks(dbgs());
358 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
360 for (typename LoopInfoT::iterator iter = loopInfo->begin(),
361 iterEnd = loopInfo->end();
362 iter != iterEnd; ++iter) {
363 LoopT* loopRep = (*iter);
364 BlockTSmallerVector exitingBlks;
365 loopRep->getExitingBlocks(exitingBlks);
367 if (exitingBlks.size() == 0) {
368 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
369 if (dummyExitBlk != NULL)
370 retBlks.push_back(dummyExitBlk);
374 // Remove unconditional branch instr.
375 // Add dummy exit block iff there are multiple returns.
377 for (typename SmallVectorImpl<BlockT *>::const_iterator
378 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
379 iterBlk != iterEndBlk;
381 BlockT *curBlk = *iterBlk;
382 removeUnconditionalBranch(curBlk);
383 removeRedundantConditionalBranch(curBlk);
384 if (CFGTraits::isReturnBlock(curBlk)) {
385 retBlks.push_back(curBlk);
387 assert(curBlk->succ_size() <= 2);
390 if (retBlks.size() >= 2) {
391 addDummyExitBlock(retBlks);
396 } //CFGStructurizer::prepare
398 template<class PassT>
399 bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
400 const AMDGPURegisterInfo * tri) {
405 //Assume reducible CFG...
407 dbgs() << "AMDGPUCFGStructurizer::run\n";
411 domTree = CFGTraits::getDominatorTree(pass);
413 domTree->print(dbgs(), (const llvm::Module*)0);
416 postDomTree = CFGTraits::getPostDominatorTree(pass);
418 postDomTree->print(dbgs());
421 loopInfo = CFGTraits::getLoopInfo(pass);
423 dbgs() << "LoopInfo:\n";
424 PrintLoopinfo(*loopInfo, dbgs());
429 //Use the worse block ordering to test the algorithm.
430 ReverseVector(orderedBlks);
434 dbgs() << "Ordered blocks:\n";
435 printOrderedBlocks(dbgs());
440 bool makeProgress = false;
441 int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
447 dbgs() << "numIter = " << numIter
448 << ", numRemaintedBlk = " << numRemainedBlk << "\n";
451 typename SmallVectorImpl<BlockT *>::const_iterator
452 iterBlk = orderedBlks.begin();
453 typename SmallVectorImpl<BlockT *>::const_iterator
454 iterBlkEnd = orderedBlks.end();
456 typename SmallVectorImpl<BlockT *>::const_iterator
457 sccBeginIter = iterBlk;
458 BlockT *sccBeginBlk = NULL;
459 int sccNumBlk = 0; // The number of active blocks, init to a
460 // maximum possible number.
461 int sccNumIter; // Number of iteration in this SCC.
463 while (iterBlk != iterBlkEnd) {
466 if (sccBeginBlk == NULL) {
467 sccBeginIter = iterBlk;
468 sccBeginBlk = curBlk;
470 sccNumBlk = numRemainedBlk; // Init to maximum possible number.
472 dbgs() << "start processing SCC" << getSCCNum(sccBeginBlk);
477 if (!isRetiredBlock(curBlk)) {
478 patternMatch(curBlk);
483 bool contNextScc = true;
484 if (iterBlk == iterBlkEnd
485 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
486 // Just finish one scc.
488 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
489 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
491 dbgs() << "Can't reduce SCC " << getSCCNum(curBlk)
492 << ", sccNumIter = " << sccNumIter;
493 dbgs() << "doesn't make any progress\n";
496 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
497 sccNumBlk = sccRemainedNumBlk;
498 iterBlk = sccBeginIter;
501 dbgs() << "repeat processing SCC" << getSCCNum(curBlk)
502 << "sccNumIter = " << sccNumIter << "\n";
506 // Finish the current scc.
510 // Continue on next component in the current scc.
517 } //while, "one iteration" over the function.
519 BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
520 if (entryBlk->succ_size() == 0) {
523 dbgs() << "Reduce to one block\n";
526 int newnumRemainedBlk
527 = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
528 // consider cloned blocks ??
529 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
531 numRemainedBlk = newnumRemainedBlk;
533 makeProgress = false;
535 dbgs() << "No progress\n";
539 } while (!finish && makeProgress);
541 // Misc wrap up to maintain the consistency of the Function representation.
542 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
544 // Detach retired Block, release memory.
545 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
546 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
547 if ((*iterMap).second && (*iterMap).second->isRetired) {
548 assert(((*iterMap).first)->getNumber() != -1);
550 dbgs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
552 (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
554 delete (*iterMap).second;
556 blockInfoMap.clear();
558 // clear loopLandInfoMap
559 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
560 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
561 delete (*iterMap).second;
563 loopLandInfoMap.clear();
570 llvm_unreachable("IRREDUCIBL_CF");
574 } //CFGStructurizer::run
576 /// Print the ordered Blocks.
578 template<class PassT>
579 void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
581 for (typename SmallVectorImpl<BlockT *>::const_iterator
582 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
583 iterBlk != iterBlkEnd;
585 os << "BB" << (*iterBlk)->getNumber();
586 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
587 if (i != 0 && i % 10 == 0) {
593 } //printOrderedBlocks
595 /// Compute the reversed DFS post order of Blocks
597 template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
600 for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
601 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
602 std::vector<BlockT *> &sccNext = *sccIter;
603 for (typename std::vector<BlockT *>::const_iterator
604 blockIter = sccNext.begin(), blockEnd = sccNext.end();
605 blockIter != blockEnd; ++blockIter) {
607 orderedBlks.push_back(bb);
608 recordSccnum(bb, sccNum);
612 //walk through all the block in func to check for unreachable
613 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
614 blockEnd1 = FuncGTraits::nodes_end(funcRep);
615 blockIter1 != blockEnd1; ++blockIter1) {
616 BlockT *bb = &(*blockIter1);
617 sccNum = getSCCNum(bb);
618 if (sccNum == INVALIDSCCNUM) {
619 dbgs() << "unreachable block BB" << bb->getNumber() << "\n";
624 template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
629 dbgs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
632 while ((curMatch = patternMatchGroup(curBlk)) > 0) {
633 numMatch += curMatch;
637 dbgs() << "End patternMatch BB" << curBlk->getNumber()
638 << ", numMatch = " << numMatch << "\n";
644 template<class PassT>
645 int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
647 numMatch += serialPatternMatch(curBlk);
648 numMatch += ifPatternMatch(curBlk);
649 numMatch += loopendPatternMatch(curBlk);
650 numMatch += loopPatternMatch(curBlk);
654 template<class PassT>
655 int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
656 if (curBlk->succ_size() != 1) {
660 BlockT *childBlk = *curBlk->succ_begin();
661 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
665 mergeSerialBlock(curBlk, childBlk);
666 ++numSerialPatternMatch;
668 } //serialPatternMatch
670 template<class PassT>
671 int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
673 if (curBlk->succ_size() != 2) {
677 if (hasBackEdge(curBlk)) {
681 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
682 if (branchInstr == NULL) {
686 assert(CFGTraits::isCondBranch(branchInstr));
688 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
689 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
694 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
695 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
696 landBlk = *trueBlk->succ_begin();
697 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
699 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
702 } else if (falseBlk->succ_size() == 1
703 && *falseBlk->succ_begin() == trueBlk) {
706 } else if (falseBlk->succ_size() == 1
707 && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
708 landBlk = *falseBlk->succ_begin();
709 } else if (trueBlk->succ_size() == 1
710 && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
711 landBlk = *trueBlk->succ_begin();
713 return handleJumpintoIf(curBlk, trueBlk, falseBlk);
716 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
717 // new BB created for landBlk==NULL may introduce new challenge to the
718 // reduction process.
719 if (landBlk != NULL &&
720 ((trueBlk && trueBlk->pred_size() > 1)
721 || (falseBlk && falseBlk->pred_size() > 1))) {
722 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
725 if (trueBlk && trueBlk->pred_size() > 1) {
726 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
730 if (falseBlk && falseBlk->pred_size() > 1) {
731 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
735 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
739 numClonedBlock += cloned;
744 template<class PassT>
745 int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
747 } //switchPatternMatch
749 template<class PassT>
750 int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
751 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
752 typename std::vector<LoopT *> nestedLoops;
754 nestedLoops.push_back(loopRep);
755 loopRep = loopRep->getParentLoop();
758 if (nestedLoops.size() == 0) {
762 // Process nested loop outside->inside, so "continue" to a outside loop won't
763 // be mistaken as "break" of the current loop.
765 for (typename std::vector<LoopT *>::reverse_iterator
766 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
767 iter != iterEnd; ++iter) {
770 if (getLoopLandBlock(loopRep) != NULL) {
774 BlockT *loopHeader = loopRep->getHeader();
776 int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
778 if (numBreak == -1) {
782 int numCont = loopcontPatternMatch(loopRep, loopHeader);
783 num += numBreak + numCont;
787 } //loopendPatternMatch
789 template<class PassT>
790 int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
791 if (curBlk->succ_size() != 0) {
796 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
797 while (loopRep && loopRep->getHeader() == curBlk) {
798 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
800 BlockT *landBlk = loopLand->landBlk;
802 if (!isRetiredBlock(landBlk)) {
803 mergeLooplandBlock(curBlk, loopLand);
807 loopRep = loopRep->getParentLoop();
810 numLoopPatternMatch += numLoop;
815 template<class PassT>
816 int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
817 BlockT *loopHeader) {
818 BlockTSmallerVector exitingBlks;
819 loopRep->getExitingBlocks(exitingBlks);
822 dbgs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
825 if (exitingBlks.size() == 0) {
826 setLoopLandBlock(loopRep);
830 // Compute the corresponding exitBlks and exit block set.
831 BlockTSmallerVector exitBlks;
832 std::set<BlockT *> exitBlkSet;
833 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
834 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
835 BlockT *exitingBlk = *iter;
836 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
837 exitBlks.push_back(exitBlk);
838 exitBlkSet.insert(exitBlk); //non-duplicate insert
841 assert(exitBlkSet.size() > 0);
842 assert(exitBlks.size() == exitingBlks.size());
845 dbgs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
849 BlockT *exitLandBlk = NULL;
853 if (exitBlkSet.size() == 1) {
854 exitLandBlk = *exitBlkSet.begin();
856 exitLandBlk = findNearestCommonPostDom(exitBlkSet);
858 if (exitLandBlk == NULL) {
862 bool allInPath = true;
863 bool allNotInPath = true;
864 for (typename std::set<BlockT*>::const_iterator
865 iter = exitBlkSet.begin(),
866 iterEnd = exitBlkSet.end();
867 iter != iterEnd; ++iter) {
868 BlockT *exitBlk = *iter;
870 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
872 dbgs() << "BB" << exitBlk->getNumber()
873 << " to BB" << exitLandBlk->getNumber() << " PathToKind="
877 allInPath = allInPath && (pathKind == SinglePath_InPath);
878 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
880 if (!allInPath && !allNotInPath) {
882 dbgs() << "singlePath check fail\n";
886 } // check all exit blocks
890 // TODO: Simplify, maybe separate function?
891 LoopT *parentLoopRep = loopRep->getParentLoop();
892 BlockT *parentLoopHeader = NULL;
894 parentLoopHeader = parentLoopRep->getHeader();
896 if (exitLandBlk == parentLoopHeader &&
897 (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
900 exitLandBlk)) != NULL) {
902 dbgs() << "relocateLoopcontBlock success\n";
904 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
906 exitBlks)) != NULL) {
908 dbgs() << "insertEndbranchBlock success\n";
912 dbgs() << "loop exit fail\n";
918 // Handle side entry to exit path.
921 for (typename BlockTSmallerVector::iterator iterExiting =
923 iterExitingEnd = exitingBlks.end();
924 iterExiting != iterExitingEnd; ++iterExiting) {
925 BlockT *exitingBlk = *iterExiting;
926 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
927 BlockT *newExitBlk = exitBlk;
929 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
930 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
934 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
936 exitBlks.push_back(newExitBlk);
937 exitBlkSet.insert(newExitBlk);
940 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
941 iterExitEnd = exitBlks.end();
942 iterExit != iterExitEnd; ++iterExit) {
943 BlockT *exitBlk = *iterExit;
944 numSerial += serialPatternMatch(exitBlk);
947 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
948 iterExitEnd = exitBlks.end();
949 iterExit != iterExitEnd; ++iterExit) {
950 BlockT *exitBlk = *iterExit;
951 if (exitBlk->pred_size() > 1) {
952 if (exitBlk != exitLandBlk) {
956 if (exitBlk != exitLandBlk &&
957 (exitBlk->succ_size() != 1 ||
958 *exitBlk->succ_begin() != exitLandBlk)) {
965 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
967 // Fold break into the breaking block. Leverage across level breaks.
968 assert(exitingBlks.size() == exitBlks.size());
969 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
970 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
971 iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
972 BlockT *exitBlk = *iterExit;
973 BlockT *exitingBlk = *iterExiting;
974 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
975 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
976 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
979 int numBreak = static_cast<int>(exitingBlks.size());
980 numLoopbreakPatternMatch += numBreak;
981 numClonedBlock += numCloned;
982 return numBreak + numSerial + numCloned;
983 } //loopbreakPatternMatch
985 template<class PassT>
986 int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
987 BlockT *loopHeader) {
989 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
990 for (typename InvBlockGTraits::ChildIteratorType iter =
991 InvBlockGTraits::child_begin(loopHeader),
992 iterEnd = InvBlockGTraits::child_end(loopHeader);
993 iter != iterEnd; ++iter) {
994 BlockT *curBlk = *iter;
995 if (loopRep->contains(curBlk)) {
996 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
997 loopHeader, loopRep);
998 contBlk.push_back(curBlk);
1003 for (typename SmallVectorImpl<BlockT *>::iterator
1004 iter = contBlk.begin(), iterEnd = contBlk.end();
1005 iter != iterEnd; ++iter) {
1006 (*iter)->removeSuccessor(loopHeader);
1009 numLoopcontPatternMatch += numCont;
1012 } //loopcontPatternMatch
1015 template<class PassT>
1016 bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1018 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1019 // same loop with LoopLandInfo without explicitly keeping track of
1020 // loopContBlks and loopBreakBlks, this is a method to get the information.
1022 if (src1Blk->succ_size() == 0) {
1023 LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1024 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1025 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1026 if (theEntry != NULL) {
1028 dbgs() << "isLoopContBreakBlock yes src1 = BB"
1029 << src1Blk->getNumber()
1030 << " src2 = BB" << src2Blk->getNumber() << "\n";
1037 } //isSameloopDetachedContbreak
1039 template<class PassT>
1040 int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1043 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1046 dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1048 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1053 template<class PassT>
1054 int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1060 //trueBlk could be the common post dominator
1064 dbgs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1065 << " true = BB" << trueBlk->getNumber()
1066 << ", numSucc=" << trueBlk->succ_size()
1067 << " false = BB" << falseBlk->getNumber() << "\n";
1072 dbgs() << "check down = BB" << downBlk->getNumber();
1075 if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1077 dbgs() << " working\n";
1080 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1081 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1083 numClonedBlock += num;
1084 num += serialPatternMatch(*headBlk->succ_begin());
1085 num += serialPatternMatch(*(++headBlk->succ_begin()));
1086 num += ifPatternMatch(headBlk);
1092 dbgs() << " not working\n";
1094 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1095 } // walk down the postDomTree
1098 } //handleJumpintoIf
1100 template<class PassT>
1101 void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1106 dbgs() << "head = BB" << headBlk->getNumber()
1107 << " size = " << headBlk->size();
1110 headBlk->print(dbgs());
1115 dbgs() << ", true = BB" << trueBlk->getNumber() << " size = "
1116 << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1119 trueBlk->print(dbgs());
1124 dbgs() << ", false = BB" << falseBlk->getNumber() << " size = "
1125 << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1128 falseBlk->print(dbgs());
1133 dbgs() << ", land = BB" << landBlk->getNumber() << " size = "
1134 << landBlk->size() << " numPred = " << landBlk->pred_size();
1137 landBlk->print(dbgs());
1143 } //showImproveSimpleJumpintoIf
1145 template<class PassT>
1146 int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1149 BlockT **plandBlk) {
1150 bool migrateTrue = false;
1151 bool migrateFalse = false;
1153 BlockT *landBlk = *plandBlk;
1155 assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1156 && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1158 if (trueBlk == falseBlk) {
1162 migrateTrue = needMigrateBlock(trueBlk);
1163 migrateFalse = needMigrateBlock(falseBlk);
1165 if (!migrateTrue && !migrateFalse) {
1169 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1170 // have more than one predecessors. without doing this, its predecessor
1171 // rather than headBlk will have undefined value in initReg.
1172 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1175 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1176 migrateFalse = true;
1180 dbgs() << "before improveSimpleJumpintoIf: ";
1181 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1184 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1186 // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1187 // {initReg = 0; org falseBlk branch }
1188 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1190 // if landBlk->pred_size() > 2, put the about if-else inside
1191 // if (initReg !=2) {...}
1193 // add initReg = initVal to headBlk
1195 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1197 funcRep->getRegInfo().createVirtualRegister(I32RC);
1198 if (!migrateTrue || !migrateFalse) {
1199 int initVal = migrateTrue ? 0 : 1;
1200 CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1205 if (landBlk == NULL) {
1206 landBlk = funcRep->CreateMachineBasicBlock();
1207 funcRep->push_back(landBlk); //insert to function
1210 trueBlk->addSuccessor(landBlk);
1212 headBlk->addSuccessor(landBlk);
1216 falseBlk->addSuccessor(landBlk);
1218 headBlk->addSuccessor(landBlk);
1224 bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1226 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1227 typename BlockT::iterator insertPos =
1228 CFGTraits::getInstrPos
1229 (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1231 if (landBlkHasOtherPred) {
1233 funcRep->getRegInfo().createVirtualRegister(I32RC);
1234 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1235 unsigned cmpResReg =
1236 funcRep->getRegInfo().createVirtualRegister(I32RC);
1238 CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1240 CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1241 AMDGPU::IF_PREDICATE_SET, passRep,
1242 cmpResReg, DebugLoc());
1245 CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
1246 passRep, initReg, DebugLoc());
1249 migrateInstruction(trueBlk, landBlk, insertPos);
1250 // need to uncondionally insert the assignment to ensure a path from its
1251 // predecessor rather than headBlk has valid value in initReg if
1253 CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1255 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1258 migrateInstruction(falseBlk, landBlk, insertPos);
1259 // need to uncondionally insert the assignment to ensure a path from its
1260 // predecessor rather than headBlk has valid value in initReg if
1262 CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1265 if (landBlkHasOtherPred) {
1267 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1269 // put initReg = 2 to other predecessors of landBlk
1270 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1271 predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1273 BlockT *curBlk = *predIter;
1274 if (curBlk != trueBlk && curBlk != falseBlk) {
1275 CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1280 dbgs() << "result from improveSimpleJumpintoIf: ";
1281 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1285 *plandBlk = landBlk;
1288 } //improveSimpleJumpintoIf
1290 template<class PassT>
1291 void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1297 dbgs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1298 << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1300 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1302 RegiT initReg = INVALIDREGNUM;
1303 if (exitingLoop != exitLoop) {
1304 initReg = static_cast<int>
1305 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1306 assert(initReg != INVALIDREGNUM);
1307 addLoopBreakInitReg(exitLoop, initReg);
1308 while (exitingLoop != exitLoop && exitingLoop) {
1309 addLoopBreakOnReg(exitingLoop, initReg);
1310 exitingLoop = exitingLoop->getParentLoop();
1312 assert(exitingLoop == exitLoop);
1315 mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1319 template<class PassT>
1320 void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1325 dbgs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1326 << " header = BB" << contBlk->getNumber() << "\n";
1328 dbgs() << "Trying to continue loop-depth = "
1329 << getLoopDepth(contLoop)
1330 << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1333 RegiT initReg = INVALIDREGNUM;
1334 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1335 if (contingLoop != contLoop) {
1336 initReg = static_cast<int>
1337 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1338 assert(initReg != INVALIDREGNUM);
1339 addLoopContInitReg(contLoop, initReg);
1340 while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1341 addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
1342 contingLoop = contingLoop->getParentLoop();
1344 assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1345 addLoopContOnReg(contingLoop, initReg);
1348 settleLoopcontBlock(contingBlk, contBlk, initReg);
1349 } //handleLoopcontBlock
1351 template<class PassT>
1352 void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1354 dbgs() << "serialPattern BB" << dstBlk->getNumber()
1355 << " <= BB" << srcBlk->getNumber() << "\n";
1357 dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
1359 dstBlk->removeSuccessor(srcBlk);
1360 CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1362 removeSuccessor(srcBlk);
1363 retireBlock(dstBlk, srcBlk);
1364 } //mergeSerialBlock
1366 template<class PassT>
1367 void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1373 dbgs() << "ifPattern BB" << curBlk->getNumber();
1376 dbgs() << "BB" << trueBlk->getNumber();
1378 dbgs() << " } else ";
1381 dbgs() << "BB" << falseBlk->getNumber();
1384 dbgs() << "landBlock: ";
1385 if (landBlk == NULL) {
1388 dbgs() << "BB" << landBlk->getNumber();
1393 int oldOpcode = branchInstr->getOpcode();
1394 DebugLoc branchDL = branchInstr->getDebugLoc();
1404 typename BlockT::iterator branchInstrPos =
1405 CFGTraits::getInstrPos(curBlk, branchInstr);
1406 CFGTraits::insertCondBranchBefore(branchInstrPos,
1407 CFGTraits::getBranchNzeroOpcode(oldOpcode),
1412 curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
1413 curBlk->removeSuccessor(trueBlk);
1414 if (landBlk && trueBlk->succ_size()!=0) {
1415 trueBlk->removeSuccessor(landBlk);
1417 retireBlock(curBlk, trueBlk);
1419 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1422 curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
1424 curBlk->removeSuccessor(falseBlk);
1425 if (landBlk && falseBlk->succ_size() != 0) {
1426 falseBlk->removeSuccessor(landBlk);
1428 retireBlock(curBlk, falseBlk);
1430 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1432 branchInstr->eraseFromParent();
1434 if (landBlk && trueBlk && falseBlk) {
1435 curBlk->addSuccessor(landBlk);
1438 } //mergeIfthenelseBlock
1440 template<class PassT>
1441 void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1442 LoopLandInfo *loopLand) {
1443 BlockT *landBlk = loopLand->landBlk;
1446 dbgs() << "loopPattern header = BB" << dstBlk->getNumber()
1447 << " land = BB" << landBlk->getNumber() << "\n";
1450 // Loop contInitRegs are init at the beginning of the loop.
1451 for (typename std::set<RegiT>::const_iterator iter =
1452 loopLand->contInitRegs.begin(),
1453 iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1454 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1457 /* we last inserterd the DebugLoc in the
1458 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1459 * search for the DebugLoc in the that statement.
1460 * if not found, we have to insert the empty/default DebugLoc */
1461 InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1462 DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1464 CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1465 // Loop breakInitRegs are init before entering the loop.
1466 for (typename std::set<RegiT>::const_iterator iter =
1467 loopLand->breakInitRegs.begin(),
1468 iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
1469 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1471 // Loop endbranchInitRegs are init before entering the loop.
1472 for (typename std::set<RegiT>::const_iterator iter =
1473 loopLand->endbranchInitRegs.begin(),
1474 iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1475 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1478 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1479 * search for the DebugLoc in the continue statement.
1480 * if not found, we have to insert the empty/default DebugLoc */
1481 InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1482 DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1484 CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1485 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1487 for (typename std::set<RegiT>::const_iterator iter =
1488 loopLand->breakOnRegs.begin(),
1489 iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1490 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
1494 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1496 for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1497 iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1498 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1502 dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1504 for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1505 iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1506 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
1509 removeSuccessor(landBlk);
1510 retireBlock(dstBlk, landBlk);
1511 } //mergeLooplandBlock
1513 template<class PassT>
1514 void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
1516 if (I->getOpcode() == AMDGPU::PRED_X) {
1517 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
1518 case OPCODE_IS_ZERO_INT:
1519 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
1521 case OPCODE_IS_NOT_ZERO_INT:
1522 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
1524 case OPCODE_IS_ZERO:
1525 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
1527 case OPCODE_IS_NOT_ZERO:
1528 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
1531 llvm_unreachable("PRED_X Opcode invalid!");
1537 template<class PassT>
1538 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1540 BlockT *exitLandBlk,
1543 dbgs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1544 << " exit = BB" << exitBlk->getNumber()
1545 << " land = BB" << exitLandBlk->getNumber() << "\n";
1548 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1549 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1551 DebugLoc DL = branchInstr->getDebugLoc();
1553 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1555 // transform exitingBlk to
1557 // exitBlk (if exitBlk != exitLandBlk)
1561 // successor = {orgSuccessor(exitingBlk) - exitBlk}
1563 typename BlockT::iterator branchInstrPos =
1564 CFGTraits::getInstrPos(exitingBlk, branchInstr);
1566 if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1569 if (trueBranch != exitBlk) {
1570 reversePredicateSetter(branchInstrPos);
1572 CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1574 if (trueBranch != exitBlk) {
1575 reversePredicateSetter(branchInstr);
1577 CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1578 if (exitBlk != exitLandBlk) {
1579 //splice is insert-before ...
1580 exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1583 if (setReg != INVALIDREGNUM) {
1584 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1586 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1589 //now branchInst can be erase safely
1590 branchInstr->eraseFromParent();
1592 //now take care of successors, retire blocks
1593 exitingBlk->removeSuccessor(exitBlk);
1594 if (exitBlk != exitLandBlk) {
1595 //splice is insert-before ...
1596 exitBlk->removeSuccessor(exitLandBlk);
1597 retireBlock(exitingBlk, exitBlk);
1600 } //mergeLoopbreakBlock
1602 template<class PassT>
1603 void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1607 dbgs() << "settleLoopcontBlock conting = BB"
1608 << contingBlk->getNumber()
1609 << ", cont = BB" << contBlk->getNumber() << "\n";
1612 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1614 assert(CFGTraits::isCondBranch(branchInstr));
1615 typename BlockT::iterator branchInstrPos =
1616 CFGTraits::getInstrPos(contingBlk, branchInstr);
1617 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1618 int oldOpcode = branchInstr->getOpcode();
1619 DebugLoc DL = branchInstr->getDebugLoc();
1621 // transform contingBlk to
1623 // move instr after branchInstr
1629 // successor = {orgSuccessor(contingBlk) - loopHeader}
1631 bool useContinueLogical =
1632 (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1634 if (useContinueLogical == false) {
1636 trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1637 : CFGTraits::getBranchZeroOpcode(oldOpcode);
1639 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1641 if (setReg != INVALIDREGNUM) {
1642 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1643 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1644 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1646 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1647 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1650 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1653 trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1654 : CFGTraits::getContinueZeroOpcode(oldOpcode);
1656 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1659 branchInstr->eraseFromParent();
1661 // if we've arrived here then we've already erased the branch instruction
1662 // travel back up the basic block to see the last reference of our debug location
1663 // we've just inserted that reference here so it should be representative
1664 if (setReg != INVALIDREGNUM) {
1665 CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1666 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1667 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1669 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1670 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1674 } //settleLoopcontBlock
1676 // BBs in exitBlkSet are determined as in break-path for loopRep,
1677 // before we can put code for BBs as inside loop-body for loopRep
1678 // check whether those BBs are determined as cont-BB for parentLoopRep
1680 // If so, generate a new BB newBlk
1681 // (1) set newBlk common successor of BBs in exitBlkSet
1682 // (2) change the continue-instr in BBs in exitBlkSet to break-instr
1683 // (3) generate continue-instr in newBlk
1685 template<class PassT>
1686 typename CFGStructurizer<PassT>::BlockT *
1687 CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1689 std::set<BlockT *> &exitBlkSet,
1690 BlockT *exitLandBlk) {
1691 std::set<BlockT *> endBlkSet;
1695 for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1696 iterEnd = exitBlkSet.end();
1697 iter != iterEnd; ++iter) {
1698 BlockT *exitBlk = *iter;
1699 BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1701 if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1704 endBlkSet.insert(endBlk);
1707 BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1708 funcRep->push_back(newBlk); //insert to function
1709 CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1710 SHOWNEWBLK(newBlk, "New continue block: ");
1712 for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1713 iterEnd = endBlkSet.end();
1714 iter != iterEnd; ++iter) {
1715 BlockT *endBlk = *iter;
1716 InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1718 contInstr->eraseFromParent();
1720 endBlk->addSuccessor(newBlk);
1722 dbgs() << "Add new continue Block to BB"
1723 << endBlk->getNumber() << " successors\n";
1728 } //relocateLoopcontBlock
1731 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1732 // LoopLandBlock. This BB branch on the loop endBranchInit register to the
1733 // pathes corresponding to the loop exiting branches.
1735 template<class PassT>
1736 typename CFGStructurizer<PassT>::BlockT *
1737 CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1738 BlockTSmallerVector &exitingBlks,
1739 BlockTSmallerVector &exitBlks) {
1740 const AMDGPUInstrInfo *tii =
1741 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
1742 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1744 RegiT endBranchReg = static_cast<int>
1745 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1746 assert(endBranchReg >= 0);
1748 // reg = 0 before entering the loop
1749 addLoopEndbranchInitReg(loopRep, endBranchReg);
1751 uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1752 assert(numBlks >=2 && numBlks == exitBlks.size());
1754 BlockT *preExitingBlk = exitingBlks[0];
1755 BlockT *preExitBlk = exitBlks[0];
1756 BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1757 funcRep->push_back(preBranchBlk); //insert to function
1758 SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1760 BlockT *newLandBlk = preBranchBlk;
1762 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1764 preExitingBlk->removeSuccessor(preExitBlk);
1765 preExitingBlk->addSuccessor(newLandBlk);
1767 //it is redundant to add reg = 0 to exitingBlks[0]
1769 // For 1..n th exiting path (the last iteration handles two pathes) create the
1770 // branch to the previous path and the current path.
1771 for (uint32_t i = 1; i < numBlks; ++i) {
1772 BlockT *curExitingBlk = exitingBlks[i];
1773 BlockT *curExitBlk = exitBlks[i];
1774 BlockT *curBranchBlk;
1776 if (i == numBlks - 1) {
1777 curBranchBlk = curExitBlk;
1779 curBranchBlk = funcRep->CreateMachineBasicBlock();
1780 funcRep->push_back(curBranchBlk); //insert to function
1781 SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1784 // Add reg = i to exitingBlks[i].
1785 CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1788 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1789 // (exitingBlks[i], newLandBlk).
1790 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1792 curExitingBlk->removeSuccessor(curExitBlk);
1793 curExitingBlk->addSuccessor(newLandBlk);
1795 // add to preBranchBlk the branch instruction:
1796 // if (endBranchReg == preVal)
1801 // preValReg = i - 1
1804 RegiT preValReg = static_cast<int>
1805 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1807 preBranchBlk->insert(preBranchBlk->begin(),
1808 tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1811 // condResReg = (endBranchReg == preValReg)
1812 RegiT condResReg = static_cast<int>
1813 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1814 BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1815 .addReg(endBranchReg).addReg(preValReg);
1817 BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1818 .addMBB(preExitBlk).addReg(condResReg);
1820 preBranchBlk->addSuccessor(preExitBlk);
1821 preBranchBlk->addSuccessor(curBranchBlk);
1823 // Update preExitingBlk, preExitBlk, preBranchBlk.
1824 preExitingBlk = curExitingBlk;
1825 preExitBlk = curExitBlk;
1826 preBranchBlk = curBranchBlk;
1828 } //end for 1 .. n blocks
1831 } //addLoopEndbranchBlock
1833 template<class PassT>
1834 typename CFGStructurizer<PassT>::PathToKind
1835 CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1836 bool allowSideEntry) {
1839 if (srcBlk == dstBlk) {
1840 return SinglePath_InPath;
1843 while (srcBlk && srcBlk->succ_size() == 1) {
1844 srcBlk = *srcBlk->succ_begin();
1845 if (srcBlk == dstBlk) {
1846 return SinglePath_InPath;
1849 if (!allowSideEntry && srcBlk->pred_size() > 1) {
1850 return Not_SinglePath;
1854 if (srcBlk && srcBlk->succ_size()==0) {
1855 return SinglePath_NotInPath;
1858 return Not_SinglePath;
1861 // If there is a single path from srcBlk to dstBlk, return the last block before
1862 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the
1863 // last block in the path Otherwise, return NULL
1864 template<class PassT>
1865 typename CFGStructurizer<PassT>::BlockT *
1866 CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
1867 bool allowSideEntry) {
1870 if (srcBlk == dstBlk) {
1874 if (srcBlk->succ_size() == 0) {
1878 while (srcBlk && srcBlk->succ_size() == 1) {
1879 BlockT *preBlk = srcBlk;
1881 srcBlk = *srcBlk->succ_begin();
1882 if (srcBlk == NULL) {
1886 if (!allowSideEntry && srcBlk->pred_size() > 1) {
1891 if (srcBlk && srcBlk->succ_size()==0) {
1899 template<class PassT>
1900 int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
1903 assert(preBlk->isSuccessor(srcBlk));
1904 while (srcBlk && srcBlk != dstBlk) {
1905 assert(srcBlk->succ_size() == 1);
1906 if (srcBlk->pred_size() > 1) {
1907 srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
1912 srcBlk = *srcBlk->succ_begin();
1916 } //cloneOnSideEntryTo
1918 template<class PassT>
1919 typename CFGStructurizer<PassT>::BlockT *
1920 CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
1922 assert(predBlk->isSuccessor(curBlk) &&
1923 "succBlk is not a prececessor of curBlk");
1925 BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
1926 CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
1927 //srcBlk, oldBlk, newBlk
1929 predBlk->removeSuccessor(curBlk);
1930 predBlk->addSuccessor(cloneBlk);
1932 // add all successor to cloneBlk
1933 CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
1935 numClonedInstr += curBlk->size();
1938 dbgs() << "Cloned block: " << "BB"
1939 << curBlk->getNumber() << "size " << curBlk->size() << "\n";
1942 SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
1945 } //cloneBlockForPredecessor
1947 template<class PassT>
1948 typename CFGStructurizer<PassT>::BlockT *
1949 CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
1950 BlockT *exitingBlk) {
1951 BlockT *exitBlk = NULL;
1953 for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
1954 iterSuccEnd = exitingBlk->succ_end();
1955 iterSucc != iterSuccEnd; ++iterSucc) {
1956 BlockT *curBlk = *iterSucc;
1957 if (!loopRep->contains(curBlk)) {
1958 assert(exitBlk == NULL);
1963 assert(exitBlk != NULL);
1966 } //exitingBlock2ExitBlock
1968 template<class PassT>
1969 void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
1971 InstrIterator insertPos) {
1972 InstrIterator spliceEnd;
1973 //look for the input branchinstr, not the AMDGPU branchinstr
1974 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
1975 if (branchInstr == NULL) {
1977 dbgs() << "migrateInstruction don't see branch instr\n" ;
1979 spliceEnd = srcBlk->end();
1982 dbgs() << "migrateInstruction see branch instr\n" ;
1983 branchInstr->dump();
1985 spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
1988 dbgs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
1989 << "srcSize = " << srcBlk->size() << "\n";
1992 //splice insert before insertPos
1993 dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
1996 dbgs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
1997 << "srcSize = " << srcBlk->size() << "\n";
1999 } //migrateInstruction
2001 // normalizeInfiniteLoopExit change
2003 // uncond_br LoopHeader
2007 // cond_br 1 LoopHeader dummyExit
2008 // and return the newly added dummy exit block
2010 template<class PassT>
2011 typename CFGStructurizer<PassT>::BlockT *
2012 CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2015 loopHeader = LoopRep->getHeader();
2016 loopLatch = LoopRep->getLoopLatch();
2017 BlockT *dummyExitBlk = NULL;
2018 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2019 if (loopHeader!=NULL && loopLatch!=NULL) {
2020 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2021 if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2022 dummyExitBlk = funcRep->CreateMachineBasicBlock();
2023 funcRep->push_back(dummyExitBlk); //insert to function
2024 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2026 DEBUG(dbgs() << "Old branch instr: " << *branchInstr << "\n";);
2028 typename BlockT::iterator insertPos =
2029 CFGTraits::getInstrPos(loopLatch, branchInstr);
2031 funcRep->getRegInfo().createVirtualRegister(I32RC);
2032 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2034 CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2035 MachineInstrBuilder MIB(*funcRep, newInstr);
2036 MIB.addMBB(loopHeader);
2037 MIB.addReg(immReg, false);
2039 SHOWNEWINSTR(newInstr);
2041 branchInstr->eraseFromParent();
2042 loopLatch->addSuccessor(dummyExitBlk);
2046 return dummyExitBlk;
2047 } //normalizeInfiniteLoopExit
2049 template<class PassT>
2050 void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2051 InstrT *branchInstr;
2053 // I saw two unconditional branch in one basic block in example
2054 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2055 while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2056 && CFGTraits::isUncondBranch(branchInstr)) {
2058 dbgs() << "Removing unconditional branch instruction" ;
2059 branchInstr->dump();
2061 branchInstr->eraseFromParent();
2063 } //removeUnconditionalBranch
2065 template<class PassT>
2066 void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2067 if (srcBlk->succ_size() == 2) {
2068 BlockT *blk1 = *srcBlk->succ_begin();
2069 BlockT *blk2 = *(++srcBlk->succ_begin());
2072 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2073 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2075 dbgs() << "Removing unneeded conditional branch instruction" ;
2076 branchInstr->dump();
2078 branchInstr->eraseFromParent();
2079 SHOWNEWBLK(blk1, "Removing redundant successor");
2080 srcBlk->removeSuccessor(blk1);
2083 } //removeRedundantConditionalBranch
2085 template<class PassT>
2086 void CFGStructurizer<PassT>::addDummyExitBlock(SmallVectorImpl<BlockT *>
2088 BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2089 funcRep->push_back(dummyExitBlk); //insert to function
2090 CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2092 for (typename SmallVectorImpl<BlockT *>::iterator iter =
2094 iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2095 BlockT *curBlk = *iter;
2096 InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2098 curInstr->eraseFromParent();
2100 curBlk->addSuccessor(dummyExitBlk);
2102 dbgs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2107 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2108 } //addDummyExitBlock
2110 template<class PassT>
2111 void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2112 while (srcBlk->succ_size()) {
2113 srcBlk->removeSuccessor(*srcBlk->succ_begin());
2117 template<class PassT>
2118 void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2119 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2121 if (srcBlkInfo == NULL) {
2122 srcBlkInfo = new BlockInfo();
2125 srcBlkInfo->sccNum = sccNum;
2128 template<class PassT>
2129 int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2130 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2131 return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2134 template<class PassT>
2135 void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2137 dbgs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2140 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2142 if (srcBlkInfo == NULL) {
2143 srcBlkInfo = new BlockInfo();
2146 srcBlkInfo->isRetired = true;
2147 assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2148 && "can't retire block yet");
2151 template<class PassT>
2152 bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2153 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2154 return (srcBlkInfo && srcBlkInfo->isRetired);
2157 template<class PassT>
2158 bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2159 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2160 while (loopRep && loopRep->getHeader() == curBlk) {
2161 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2163 if(loopLand == NULL)
2166 BlockT *landBlk = loopLand->landBlk;
2168 if (!isRetiredBlock(landBlk)) {
2172 loopRep = loopRep->getParentLoop();
2176 } //isActiveLoophead
2178 template<class PassT>
2179 bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2180 const unsigned blockSizeThreshold = 30;
2181 const unsigned cloneInstrThreshold = 100;
2183 bool multiplePreds = blk && (blk->pred_size() > 1);
2188 unsigned blkSize = blk->size();
2189 return ((blkSize > blockSizeThreshold)
2190 && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2191 } //needMigrateBlock
2193 template<class PassT>
2194 typename CFGStructurizer<PassT>::BlockT *
2195 CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2196 BlockTSmallerVector &exitBlks,
2197 std::set<BlockT *> &exitBlkSet) {
2198 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
2200 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2201 predIterEnd = landBlk->pred_end();
2202 predIter != predIterEnd; ++predIter) {
2203 BlockT *curBlk = *predIter;
2204 if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2205 inpathBlks.push_back(curBlk);
2209 //if landBlk has predecessors that are not in the given loop,
2210 //create a new block
2211 BlockT *newLandBlk = landBlk;
2212 if (inpathBlks.size() != landBlk->pred_size()) {
2213 newLandBlk = funcRep->CreateMachineBasicBlock();
2214 funcRep->push_back(newLandBlk); //insert to function
2215 newLandBlk->addSuccessor(landBlk);
2216 for (typename SmallVectorImpl<BlockT *>::iterator iter =
2218 iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2219 BlockT *curBlk = *iter;
2220 CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2221 //srcBlk, oldBlk, newBlk
2222 curBlk->removeSuccessor(landBlk);
2223 curBlk->addSuccessor(newLandBlk);
2225 for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2226 if (exitBlks[i] == landBlk) {
2227 exitBlks[i] = newLandBlk;
2230 SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2233 setLoopLandBlock(loopRep, newLandBlk);
2236 } // recordLoopbreakLand
2238 template<class PassT>
2239 void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2240 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2242 if (theEntry == NULL) {
2243 theEntry = new LoopLandInfo();
2245 assert(theEntry->landBlk == NULL);
2248 blk = funcRep->CreateMachineBasicBlock();
2249 funcRep->push_back(blk); //insert to function
2250 SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2253 theEntry->landBlk = blk;
2256 dbgs() << "setLoopLandBlock loop-header = BB"
2257 << loopRep->getHeader()->getNumber()
2258 << " landing-block = BB" << blk->getNumber() << "\n";
2260 } // setLoopLandBlock
2262 template<class PassT>
2263 void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2264 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2266 if (theEntry == NULL) {
2267 theEntry = new LoopLandInfo();
2270 theEntry->breakOnRegs.insert(regNum);
2273 dbgs() << "addLoopBreakOnReg loop-header = BB"
2274 << loopRep->getHeader()->getNumber()
2275 << " regNum = " << regNum << "\n";
2277 } // addLoopBreakOnReg
2279 template<class PassT>
2280 void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2281 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2283 if (theEntry == NULL) {
2284 theEntry = new LoopLandInfo();
2286 theEntry->contOnRegs.insert(regNum);
2289 dbgs() << "addLoopContOnReg loop-header = BB"
2290 << loopRep->getHeader()->getNumber()
2291 << " regNum = " << regNum << "\n";
2293 } // addLoopContOnReg
2295 template<class PassT>
2296 void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2297 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2299 if (theEntry == NULL) {
2300 theEntry = new LoopLandInfo();
2302 theEntry->breakInitRegs.insert(regNum);
2305 dbgs() << "addLoopBreakInitReg loop-header = BB"
2306 << loopRep->getHeader()->getNumber()
2307 << " regNum = " << regNum << "\n";
2309 } // addLoopBreakInitReg
2311 template<class PassT>
2312 void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2313 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2315 if (theEntry == NULL) {
2316 theEntry = new LoopLandInfo();
2318 theEntry->contInitRegs.insert(regNum);
2321 dbgs() << "addLoopContInitReg loop-header = BB"
2322 << loopRep->getHeader()->getNumber()
2323 << " regNum = " << regNum << "\n";
2325 } // addLoopContInitReg
2327 template<class PassT>
2328 void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2330 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2332 if (theEntry == NULL) {
2333 theEntry = new LoopLandInfo();
2335 theEntry->endbranchInitRegs.insert(regNum);
2338 dbgs() << "addLoopEndbranchInitReg loop-header = BB"
2339 << loopRep->getHeader()->getNumber()
2340 << " regNum = " << regNum << "\n";
2342 } // addLoopEndbranchInitReg
2344 template<class PassT>
2345 typename CFGStructurizer<PassT>::LoopLandInfo *
2346 CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2347 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2350 } // getLoopLandInfo
2352 template<class PassT>
2353 typename CFGStructurizer<PassT>::BlockT *
2354 CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2355 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2357 return theEntry ? theEntry->landBlk : NULL;
2358 } // getLoopLandBlock
2361 template<class PassT>
2362 bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2363 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2364 if (loopRep == NULL)
2367 BlockT *loopHeader = loopRep->getHeader();
2369 return curBlk->isSuccessor(loopHeader);
2373 template<class PassT>
2374 unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2375 return loopRep ? loopRep->getLoopDepth() : 0;
2378 template<class PassT>
2379 int CFGStructurizer<PassT>::countActiveBlock
2380 (typename SmallVectorImpl<BlockT *>::const_iterator iterStart,
2381 typename SmallVectorImpl<BlockT *>::const_iterator iterEnd) {
2383 while (iterStart != iterEnd) {
2384 if (!isRetiredBlock(*iterStart)) {
2391 } //countActiveBlock
2393 // This is work around solution for findNearestCommonDominator not avaiable to
2394 // post dom a proper fix should go to Dominators.h.
2396 template<class PassT>
2397 typename CFGStructurizer<PassT>::BlockT*
2398 CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2400 if (postDomTree->dominates(blk1, blk2)) {
2403 if (postDomTree->dominates(blk2, blk1)) {
2407 DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2408 DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2410 // Handle newly cloned node.
2411 if (node1 == NULL && blk1->succ_size() == 1) {
2412 return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2414 if (node2 == NULL && blk2->succ_size() == 1) {
2415 return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2418 if (node1 == NULL || node2 == NULL) {
2422 node1 = node1->getIDom();
2424 if (postDomTree->dominates(node1, node2)) {
2425 return node1->getBlock();
2427 node1 = node1->getIDom();
2433 template<class PassT>
2434 typename CFGStructurizer<PassT>::BlockT *
2435 CFGStructurizer<PassT>::findNearestCommonPostDom
2436 (typename std::set<BlockT *> &blks) {
2438 typename std::set<BlockT *>::const_iterator iter = blks.begin();
2439 typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2440 for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2441 BlockT *curBlk = *iter;
2442 if (curBlk != commonDom) {
2443 commonDom = findNearestCommonPostDom(curBlk, commonDom);
2448 dbgs() << "Common post dominator for exit blocks is ";
2450 dbgs() << "BB" << commonDom->getNumber() << "\n";
2457 } //findNearestCommonPostDom
2459 } // end anonymous namespace
2464 //===----------------------------------------------------------------------===//
2466 // CFGStructurizer for AMDGPU
2468 //===----------------------------------------------------------------------===//
2472 class AMDGPUCFGStructurizer : public MachineFunctionPass {
2474 typedef MachineInstr InstructionType;
2475 typedef MachineFunction FunctionType;
2476 typedef MachineBasicBlock BlockType;
2477 typedef MachineLoopInfo LoopinfoType;
2478 typedef MachineDominatorTree DominatortreeType;
2479 typedef MachinePostDominatorTree PostDominatortreeType;
2480 typedef MachineDomTreeNode DomTreeNodeType;
2481 typedef MachineLoop LoopType;
2487 AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
2488 const TargetInstrInfo *getTargetInstrInfo() const;
2489 const AMDGPURegisterInfo *getTargetRegisterInfo() const;
2492 } // end anonymous namespace
2493 AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
2494 : MachineFunctionPass(pid), TM(tm) {
2497 const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
2498 return TM.getInstrInfo();
2501 const AMDGPURegisterInfo *AMDGPUCFGStructurizer::getTargetRegisterInfo() const {
2502 return static_cast<const AMDGPURegisterInfo *>(TM.getRegisterInfo());
2505 //===----------------------------------------------------------------------===//
2509 //===----------------------------------------------------------------------===//
2513 class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
2518 AMDGPUCFGPrepare(TargetMachine &tm);
2520 virtual const char *getPassName() const;
2521 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2523 bool runOnMachineFunction(MachineFunction &F);
2526 char AMDGPUCFGPrepare::ID = 0;
2527 } // end anonymous namespace
2529 AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
2530 : AMDGPUCFGStructurizer(ID, tm ) {
2532 const char *AMDGPUCFGPrepare::getPassName() const {
2533 return "AMD IL Control Flow Graph Preparation Pass";
2536 void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2537 AU.addPreserved<MachineFunctionAnalysis>();
2538 AU.addRequired<MachineFunctionAnalysis>();
2539 AU.addRequired<MachineDominatorTree>();
2540 AU.addRequired<MachinePostDominatorTree>();
2541 AU.addRequired<MachineLoopInfo>();
2544 //===----------------------------------------------------------------------===//
2548 //===----------------------------------------------------------------------===//
2552 class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
2557 AMDGPUCFGPerform(TargetMachine &tm);
2558 virtual const char *getPassName() const;
2559 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2560 bool runOnMachineFunction(MachineFunction &F);
2563 char AMDGPUCFGPerform::ID = 0;
2564 } // end anonymous namespace
2566 AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
2567 : AMDGPUCFGStructurizer(ID, tm) {
2570 const char *AMDGPUCFGPerform::getPassName() const {
2571 return "AMD IL Control Flow Graph structurizer Pass";
2574 void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2575 AU.addPreserved<MachineFunctionAnalysis>();
2576 AU.addRequired<MachineFunctionAnalysis>();
2577 AU.addRequired<MachineDominatorTree>();
2578 AU.addRequired<MachinePostDominatorTree>();
2579 AU.addRequired<MachineLoopInfo>();
2582 //===----------------------------------------------------------------------===//
2584 // CFGStructTraits<AMDGPUCFGStructurizer>
2586 //===----------------------------------------------------------------------===//
2589 // this class is tailor to the AMDGPU backend
2591 struct CFGStructTraits<AMDGPUCFGStructurizer> {
2594 static int getBranchNzeroOpcode(int oldOpcode) {
2596 case AMDGPU::JUMP_COND:
2597 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2598 case AMDGPU::BRANCH_COND_i32:
2599 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
2601 llvm_unreachable("internal error");
2606 static int getBranchZeroOpcode(int oldOpcode) {
2608 case AMDGPU::JUMP_COND:
2609 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2610 case AMDGPU::BRANCH_COND_i32:
2611 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
2613 llvm_unreachable("internal error");
2618 static int getContinueNzeroOpcode(int oldOpcode) {
2620 case AMDGPU::JUMP_COND:
2621 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
2623 llvm_unreachable("internal error");
2628 static int getContinueZeroOpcode(int oldOpcode) {
2630 case AMDGPU::JUMP_COND:
2631 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
2633 llvm_unreachable("internal error");
2638 static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2639 return instr->getOperand(0).getMBB();
2642 static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2643 instr->getOperand(0).setMBB(blk);
2646 static MachineBasicBlock *
2647 getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2648 assert(blk->succ_size() == 2);
2649 MachineBasicBlock *trueBranch = getTrueBranch(instr);
2650 MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2651 MachineBasicBlock::succ_iterator iterNext = iter;
2654 return (*iter == trueBranch) ? *iterNext : *iter;
2657 static bool isCondBranch(MachineInstr *instr) {
2658 switch (instr->getOpcode()) {
2659 case AMDGPU::JUMP_COND:
2660 case AMDGPU::BRANCH_COND_i32:
2661 case AMDGPU::BRANCH_COND_f32:
2669 static bool isUncondBranch(MachineInstr *instr) {
2670 switch (instr->getOpcode()) {
2672 case AMDGPU::BRANCH:
2680 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2681 //get DebugLoc from the first MachineBasicBlock instruction with debug info
2683 for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2684 MachineInstr *instr = &(*iter);
2685 if (instr->getDebugLoc().isUnknown() == false) {
2686 DL = instr->getDebugLoc();
2692 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2693 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2694 MachineInstr *instr = &*iter;
2695 if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2701 // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2703 // BB with backward-edge could have move instructions after the branch
2704 // instruction. Such move instruction "belong to" the loop backward-edge.
2706 static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2707 const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
2708 blk->getParent()->getTarget().getInstrInfo());
2710 for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2711 iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2713 MachineInstr *instr = &*iter;
2715 if (isCondBranch(instr) || isUncondBranch(instr)) {
2717 } else if (!TII->isMov(instr->getOpcode())) {
2725 static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2726 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2727 if (iter != blk->rend()) {
2728 MachineInstr *instr = &(*iter);
2729 if (instr->getOpcode() == AMDGPU::RETURN) {
2736 static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2737 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2738 if (iter != blk->rend()) {
2739 MachineInstr *instr = &(*iter);
2740 if (instr->getOpcode() == AMDGPU::CONTINUE) {
2747 static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2748 for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2749 MachineInstr *instr = &(*iter);
2750 if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
2757 static bool isReturnBlock(MachineBasicBlock *blk) {
2758 MachineInstr *instr = getReturnInstr(blk);
2759 bool isReturn = (blk->succ_size() == 0);
2762 } else if (isReturn) {
2764 dbgs() << "BB" << blk->getNumber()
2765 <<" is return block without RETURN instr\n";
2772 static MachineBasicBlock::iterator
2773 getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2774 assert(instr->getParent() == blk && "instruction doesn't belong to block");
2775 MachineBasicBlock::iterator iter = blk->begin();
2776 MachineBasicBlock::iterator iterEnd = blk->end();
2777 while (&(*iter) != instr && iter != iterEnd) {
2781 assert(iter != iterEnd);
2785 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2786 AMDGPUCFGStructurizer *passRep) {
2787 return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
2788 } //insertInstrBefore
2790 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2791 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2792 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2793 MachineInstr *newInstr =
2794 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
2796 MachineBasicBlock::iterator res;
2797 if (blk->begin() != blk->end()) {
2798 blk->insert(blk->begin(), newInstr);
2800 blk->push_back(newInstr);
2803 SHOWNEWINSTR(newInstr);
2806 } //insertInstrBefore
2808 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2809 AMDGPUCFGStructurizer *passRep) {
2810 insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
2813 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2814 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2815 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2816 MachineInstr *newInstr = blk->getParent()
2817 ->CreateMachineInstr(tii->get(newOpcode), DL);
2819 blk->push_back(newInstr);
2820 //assume the instruction doesn't take any reg operand ...
2822 SHOWNEWINSTR(newInstr);
2825 static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
2827 AMDGPUCFGStructurizer *passRep) {
2828 MachineInstr *oldInstr = &(*instrPos);
2829 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2830 MachineBasicBlock *blk = oldInstr->getParent();
2831 MachineInstr *newInstr =
2832 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
2835 blk->insert(instrPos, newInstr);
2836 //assume the instruction doesn't take any reg operand ...
2838 SHOWNEWINSTR(newInstr);
2840 } //insertInstrBefore
2842 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
2844 AMDGPUCFGStructurizer *passRep,
2846 MachineInstr *oldInstr = &(*instrPos);
2847 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2848 MachineBasicBlock *blk = oldInstr->getParent();
2849 MachineFunction *MF = blk->getParent();
2850 MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
2852 blk->insert(instrPos, newInstr);
2853 MachineInstrBuilder MIB(*MF, newInstr);
2854 MIB.addReg(oldInstr->getOperand(1).getReg(), false);
2856 SHOWNEWINSTR(newInstr);
2857 //erase later oldInstr->eraseFromParent();
2858 } //insertCondBranchBefore
2860 static void insertCondBranchBefore(MachineBasicBlock *blk,
2861 MachineBasicBlock::iterator insertPos,
2863 AMDGPUCFGStructurizer *passRep,
2866 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2867 MachineFunction *MF = blk->getParent();
2869 MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
2872 blk->insert(insertPos, newInstr);
2873 MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
2875 SHOWNEWINSTR(newInstr);
2876 } //insertCondBranchBefore
2878 static void insertCondBranchEnd(MachineBasicBlock *blk,
2880 AMDGPUCFGStructurizer *passRep,
2882 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2883 MachineFunction *MF = blk->getParent();
2884 MachineInstr *newInstr =
2885 MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
2887 blk->push_back(newInstr);
2888 MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
2890 SHOWNEWINSTR(newInstr);
2891 } //insertCondBranchEnd
2894 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
2895 AMDGPUCFGStructurizer *passRep,
2896 RegiT regNum, int regVal) {
2897 MachineInstr *oldInstr = &(*instrPos);
2898 const AMDGPUInstrInfo *tii =
2899 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2900 MachineBasicBlock *blk = oldInstr->getParent();
2901 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2903 blk->insert(instrPos, newInstr);
2905 SHOWNEWINSTR(newInstr);
2906 } //insertAssignInstrBefore
2908 static void insertAssignInstrBefore(MachineBasicBlock *blk,
2909 AMDGPUCFGStructurizer *passRep,
2910 RegiT regNum, int regVal) {
2911 const AMDGPUInstrInfo *tii =
2912 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2914 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2916 if (blk->begin() != blk->end()) {
2917 blk->insert(blk->begin(), newInstr);
2919 blk->push_back(newInstr);
2922 SHOWNEWINSTR(newInstr);
2924 } //insertInstrBefore
2926 static void insertCompareInstrBefore(MachineBasicBlock *blk,
2927 MachineBasicBlock::iterator instrPos,
2928 AMDGPUCFGStructurizer *passRep,
2929 RegiT dstReg, RegiT src1Reg,
2931 const AMDGPUInstrInfo *tii =
2932 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2933 MachineFunction *MF = blk->getParent();
2934 MachineInstr *newInstr =
2935 MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
2937 MachineInstrBuilder MIB(*MF, newInstr);
2938 MIB.addReg(dstReg, RegState::Define); //set target
2939 MIB.addReg(src1Reg); //set src value
2940 MIB.addReg(src2Reg); //set src value
2942 blk->insert(instrPos, newInstr);
2943 SHOWNEWINSTR(newInstr);
2945 } //insertCompareInstrBefore
2947 static void cloneSuccessorList(MachineBasicBlock *dstBlk,
2948 MachineBasicBlock *srcBlk) {
2949 for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
2950 iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
2951 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
2953 } //cloneSuccessorList
2955 static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
2956 MachineFunction *func = srcBlk->getParent();
2957 MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
2958 func->push_back(newBlk); //insert to function
2959 for (MachineBasicBlock::iterator iter = srcBlk->begin(),
2960 iterEnd = srcBlk->end();
2961 iter != iterEnd; ++iter) {
2962 MachineInstr *instr = func->CloneMachineInstr(iter);
2963 newBlk->push_back(instr);
2968 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
2969 //the AMDGPU instruction is not recognized as terminator fix this and retire
2971 static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
2972 MachineBasicBlock *oldBlk,
2973 MachineBasicBlock *newBlk) {
2974 MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
2975 if (branchInstr && isCondBranch(branchInstr) &&
2976 getTrueBranch(branchInstr) == oldBlk) {
2977 setTrueBranch(branchInstr, newBlk);
2981 static void wrapup(MachineBasicBlock *entryBlk) {
2982 assert((!entryBlk->getParent()->getJumpTableInfo()
2983 || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
2984 && "found a jump table");
2986 //collect continue right before endloop
2987 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
2988 MachineBasicBlock::iterator pre = entryBlk->begin();
2989 MachineBasicBlock::iterator iterEnd = entryBlk->end();
2990 MachineBasicBlock::iterator iter = pre;
2991 while (iter != iterEnd) {
2992 if (pre->getOpcode() == AMDGPU::CONTINUE
2993 && iter->getOpcode() == AMDGPU::ENDLOOP) {
2994 contInstr.push_back(pre);
3000 //delete continue right before endloop
3001 for (unsigned i = 0; i < contInstr.size(); ++i) {
3002 contInstr[i]->eraseFromParent();
3005 // TODO to fix up jump table so later phase won't be confused. if
3006 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3007 // there isn't such an interface yet. alternatively, replace all the other
3008 // blocks in the jump table with the entryBlk //}
3012 static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
3013 return &pass.getAnalysis<MachineDominatorTree>();
3016 static MachinePostDominatorTree*
3017 getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
3018 return &pass.getAnalysis<MachinePostDominatorTree>();
3021 static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
3022 return &pass.getAnalysis<MachineLoopInfo>();
3024 }; // template class CFGStructTraits
3025 } // end anonymous namespace
3027 // createAMDGPUCFGPreparationPass- Returns a pass
3028 FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm) {
3029 return new AMDGPUCFGPrepare(tm);
3032 bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3033 return CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func, *this,
3034 getTargetRegisterInfo());
3037 // createAMDGPUCFGStructurizerPass- Returns a pass
3038 FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) {
3039 return new AMDGPUCFGPerform(tm);
3042 bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
3043 return CFGStructurizer<AMDGPUCFGStructurizer>().run(func, *this,
3044 getTargetRegisterInfo());