1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
11 #define DEBUG_TYPE "structcfg"
14 #include "AMDILInstrInfo.h"
15 #include "AMDILRegisterInfo.h"
16 #include "AMDILUtilityFunctions.h"
17 #include "llvm/ADT/SCCIterator.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/DominatorInternals.h"
21 #include "llvm/Analysis/Dominators.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineDominators.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineJumpTableInfo.h"
30 #include "llvm/CodeGen/MachineLoopInfo.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/Target/TargetInstrInfo.h"
34 #define FirstNonDebugInstr(A) A->begin()
39 //===----------------------------------------------------------------------===//
41 // Statistics for CFGStructurizer.
43 //===----------------------------------------------------------------------===//
45 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
47 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
49 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
51 STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
53 STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
55 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
56 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
58 //===----------------------------------------------------------------------===//
60 // Miscellaneous utility for CFGStructurizer.
62 //===----------------------------------------------------------------------===//
63 namespace llvmCFGStruct
65 #define SHOWNEWINSTR(i) \
66 if (DEBUGME) errs() << "New instr: " << *i << "\n"
68 #define SHOWNEWBLK(b, msg) \
70 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
74 #define SHOWBLK_DETAIL(b, msg) \
77 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
83 #define INVALIDSCCNUM -1
84 #define INVALIDREGNUM 0
86 template<class LoopinfoT>
87 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
88 for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
89 iterEnd = LoopInfo.end();
90 iter != iterEnd; ++iter) {
91 (*iter)->print(OS, 0);
96 void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
97 size_t sz = Src.size();
98 for (size_t i = 0; i < sz/2; ++i) {
100 Src[i] = Src[sz - i - 1];
105 } //end namespace llvmCFGStruct
108 //===----------------------------------------------------------------------===//
110 // MachinePostDominatorTree
112 //===----------------------------------------------------------------------===//
116 /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
117 /// to compute the a post-dominator tree.
119 struct MachinePostDominatorTree : public MachineFunctionPass {
120 static char ID; // Pass identification, replacement for typeid
121 DominatorTreeBase<MachineBasicBlock> *DT;
122 MachinePostDominatorTree() : MachineFunctionPass(ID)
124 DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
128 ~MachinePostDominatorTree();
130 virtual bool runOnMachineFunction(MachineFunction &MF);
132 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
133 AU.setPreservesAll();
134 MachineFunctionPass::getAnalysisUsage(AU);
137 inline const std::vector<MachineBasicBlock *> &getRoots() const {
138 return DT->getRoots();
141 inline MachineDomTreeNode *getRootNode() const {
142 return DT->getRootNode();
145 inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
146 return DT->getNode(BB);
149 inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
150 return DT->getNode(BB);
153 inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
154 return DT->dominates(A, B);
157 inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
158 return DT->dominates(A, B);
162 properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
163 return DT->properlyDominates(A, B);
167 properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
168 return DT->properlyDominates(A, B);
171 inline MachineBasicBlock *
172 findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
173 return DT->findNearestCommonDominator(A, B);
176 virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
180 } //end of namespace llvm
182 char MachinePostDominatorTree::ID = 0;
183 static RegisterPass<MachinePostDominatorTree>
184 machinePostDominatorTreePass("machinepostdomtree",
185 "MachinePostDominator Tree Construction",
188 //const PassInfo *const llvm::MachinePostDominatorsID
189 //= &machinePostDominatorTreePass;
191 bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
197 MachinePostDominatorTree::~MachinePostDominatorTree() {
201 //===----------------------------------------------------------------------===//
203 // supporting data structure for CFGStructurizer
205 //===----------------------------------------------------------------------===//
207 namespace llvmCFGStruct
209 template<class PassT>
210 struct CFGStructTraits {
213 template <class InstrT>
214 class BlockInformation {
218 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
219 //Instructions defining the corresponding successor.
220 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
223 template <class BlockT, class InstrT, class RegiT>
224 class LandInformation {
227 std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
228 //WHILELOOP(thisloop) init before entering
230 std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
231 //WHILELOOP(thisloop) init after entering
233 std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
234 //land block, branch cond on this reg.
235 std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
236 //endif" after ENDLOOP(thisloop) break
237 //outerLoopOf(thisLoop).
238 std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
239 //endif" after ENDLOOP(thisloop) continue on
240 //outerLoopOf(thisLoop).
241 LandInformation() : landBlk(NULL) {}
244 } //end of namespace llvmCFGStruct
246 //===----------------------------------------------------------------------===//
250 //===----------------------------------------------------------------------===//
252 namespace llvmCFGStruct
254 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
255 template<class PassT>
256 class CFGStructurizer
261 SinglePath_InPath = 1,
262 SinglePath_NotInPath = 2
266 typedef typename PassT::InstructionType InstrT;
267 typedef typename PassT::FunctionType FuncT;
268 typedef typename PassT::DominatortreeType DomTreeT;
269 typedef typename PassT::PostDominatortreeType PostDomTreeT;
270 typedef typename PassT::DomTreeNodeType DomTreeNodeT;
271 typedef typename PassT::LoopinfoType LoopInfoT;
273 typedef GraphTraits<FuncT *> FuncGTraits;
274 //typedef FuncGTraits::nodes_iterator BlockIterator;
275 typedef typename FuncT::iterator BlockIterator;
277 typedef typename FuncGTraits::NodeType BlockT;
278 typedef GraphTraits<BlockT *> BlockGTraits;
279 typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
280 //typedef BlockGTraits::succ_iterator InstructionIterator;
281 typedef typename BlockT::iterator InstrIterator;
283 typedef CFGStructTraits<PassT> CFGTraits;
284 typedef BlockInformation<InstrT> BlockInfo;
285 typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
288 typedef typename PassT::LoopType LoopT;
289 typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
290 typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
291 //landing info for loop break
292 typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
298 /// Perform the CFG structurization
299 bool run(FuncT &Func, PassT &Pass, const AMDILRegisterInfo *tri);
301 /// Perform the CFG preparation
302 bool prepare(FuncT &Func, PassT &Pass, const AMDILRegisterInfo *tri);
306 void printOrderedBlocks(llvm::raw_ostream &OS);
307 int patternMatch(BlockT *CurBlock);
308 int patternMatchGroup(BlockT *CurBlock);
310 int serialPatternMatch(BlockT *CurBlock);
311 int ifPatternMatch(BlockT *CurBlock);
312 int switchPatternMatch(BlockT *CurBlock);
313 int loopendPatternMatch(BlockT *CurBlock);
314 int loopPatternMatch(BlockT *CurBlock);
316 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
317 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
318 //int loopWithoutBreak(BlockT *);
320 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
321 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
322 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
323 BlockT *ContBlock, LoopT *contLoop);
324 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
325 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
327 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
329 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
330 BlockT *FalseBlock, BlockT **LandBlockPtr);
331 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
332 BlockT *FalseBlock, BlockT *LandBlock,
333 bool Detail = false);
334 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
335 bool AllowSideEntry = true);
336 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
337 bool AllowSideEntry = true);
338 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
339 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
341 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
342 BlockT *TrueBlock, BlockT *FalseBlock,
344 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
345 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
346 BlockT *ExitLandBlock, RegiT SetReg);
347 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
349 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
350 std::set<BlockT*> &ExitBlockSet,
351 BlockT *ExitLandBlk);
352 BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
353 BlockTSmallerVector &ExitingBlocks,
354 BlockTSmallerVector &ExitBlocks);
355 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
356 void removeUnconditionalBranch(BlockT *SrcBlock);
357 void removeRedundantConditionalBranch(BlockT *SrcBlock);
358 void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
360 void removeSuccessor(BlockT *SrcBlock);
361 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
362 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
364 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
365 InstrIterator InsertPos);
367 void recordSccnum(BlockT *SrcBlock, int SCCNum);
368 int getSCCNum(BlockT *srcBlk);
370 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
371 bool isRetiredBlock(BlockT *SrcBlock);
372 bool isActiveLoophead(BlockT *CurBlock);
373 bool needMigrateBlock(BlockT *Block);
375 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
376 BlockTSmallerVector &exitBlocks,
377 std::set<BlockT*> &ExitBlockSet);
378 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
379 BlockT *getLoopLandBlock(LoopT *LoopRep);
380 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
382 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
383 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
384 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
385 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
386 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
388 bool hasBackEdge(BlockT *curBlock);
389 unsigned getLoopDepth (LoopT *LoopRep);
390 int countActiveBlock(
391 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
392 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
393 BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
394 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
398 PostDomTreeT *postDomTree;
403 BlockInfoMap blockInfoMap;
404 LoopLandInfoMap loopLandInfoMap;
405 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
406 const AMDILRegisterInfo *TRI;
408 }; //template class CFGStructurizer
410 template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
411 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
414 template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
415 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
416 E = blockInfoMap.end(); I != E; ++I) {
421 template<class PassT>
422 bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
423 const AMDILRegisterInfo * tri) {
428 bool changed = false;
429 //func.RenumberBlocks();
431 //to do, if not reducible flow graph, make it so ???
434 errs() << "AMDILCFGStructurizer::prepare\n";
436 //func.viewCFGOnly();
440 //FIXME: gcc complains on this.
441 //domTree = &pass.getAnalysis<DomTreeT>();
442 //domTree = CFGTraits::getDominatorTree(pass);
444 // domTree->print(errs());
447 //FIXME: gcc complains on this.
448 //domTree = &pass.getAnalysis<DomTreeT>();
449 //postDomTree = CFGTraits::getPostDominatorTree(pass);
451 // postDomTree->print(errs());
454 //FIXME: gcc complains on this.
455 //loopInfo = &pass.getAnalysis<LoopInfoT>();
456 loopInfo = CFGTraits::getLoopInfo(pass);
458 errs() << "LoopInfo:\n";
459 PrintLoopinfo(*loopInfo, errs());
464 errs() << "Ordered blocks:\n";
465 printOrderedBlocks(errs());
468 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
470 for (typename LoopInfoT::iterator iter = loopInfo->begin(),
471 iterEnd = loopInfo->end();
472 iter != iterEnd; ++iter) {
473 LoopT* loopRep = (*iter);
474 BlockTSmallerVector exitingBlks;
475 loopRep->getExitingBlocks(exitingBlks);
477 if (exitingBlks.size() == 0) {
478 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
479 if (dummyExitBlk != NULL)
480 retBlks.push_back(dummyExitBlk);
484 // Remove unconditional branch instr.
485 // Add dummy exit block iff there are multiple returns.
487 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
488 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
489 iterBlk != iterEndBlk;
491 BlockT *curBlk = *iterBlk;
492 removeUnconditionalBranch(curBlk);
493 removeRedundantConditionalBranch(curBlk);
494 if (CFGTraits::isReturnBlock(curBlk)) {
495 retBlks.push_back(curBlk);
497 assert(curBlk->succ_size() <= 2);
498 //assert(curBlk->size() > 0);
499 //removeEmptyBlock(curBlk) ??
502 if (retBlks.size() >= 2) {
503 addDummyExitBlock(retBlks);
508 } //CFGStructurizer::prepare
510 template<class PassT>
511 bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
512 const AMDILRegisterInfo * tri) {
517 //func.RenumberBlocks();
519 //Assume reducible CFG...
521 errs() << "AMDILCFGStructurizer::run\n";
522 //errs() << func.getFunction()->getNameStr() << "\n";
524 //func.viewCFGOnly();
529 //FIXME: gcc complains on this.
530 //domTree = &pass.getAnalysis<DomTreeT>();
531 domTree = CFGTraits::getDominatorTree(pass);
533 domTree->print(errs(), (const llvm::Module*)0);
537 //FIXME: gcc complains on this.
538 //domTree = &pass.getAnalysis<DomTreeT>();
539 postDomTree = CFGTraits::getPostDominatorTree(pass);
541 postDomTree->print(errs());
544 //FIXME: gcc complains on this.
545 //loopInfo = &pass.getAnalysis<LoopInfoT>();
546 loopInfo = CFGTraits::getLoopInfo(pass);
548 errs() << "LoopInfo:\n";
549 PrintLoopinfo(*loopInfo, errs());
555 //Use the worse block ordering to test the algorithm.
556 ReverseVector(orderedBlks);
560 errs() << "Ordered blocks:\n";
561 printOrderedBlocks(errs());
566 bool makeProgress = false;
567 int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
573 errs() << "numIter = " << numIter
574 << ", numRemaintedBlk = " << numRemainedBlk << "\n";
577 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
578 iterBlk = orderedBlks.begin();
579 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
580 iterBlkEnd = orderedBlks.end();
582 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
583 sccBeginIter = iterBlk;
584 BlockT *sccBeginBlk = NULL;
585 int sccNumBlk = 0; // The number of active blocks, init to a
586 // maximum possible number.
587 int sccNumIter; // Number of iteration in this SCC.
589 while (iterBlk != iterBlkEnd) {
592 if (sccBeginBlk == NULL) {
593 sccBeginIter = iterBlk;
594 sccBeginBlk = curBlk;
596 sccNumBlk = numRemainedBlk; // Init to maximum possible number.
598 errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
603 if (!isRetiredBlock(curBlk)) {
604 patternMatch(curBlk);
609 bool contNextScc = true;
610 if (iterBlk == iterBlkEnd
611 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
612 // Just finish one scc.
614 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
615 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
617 errs() << "Can't reduce SCC " << getSCCNum(curBlk)
618 << ", sccNumIter = " << sccNumIter;
619 errs() << "doesn't make any progress\n";
622 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
623 sccNumBlk = sccRemainedNumBlk;
624 iterBlk = sccBeginIter;
627 errs() << "repeat processing SCC" << getSCCNum(curBlk)
628 << "sccNumIter = " << sccNumIter << "\n";
630 //func.viewCFGOnly();
633 // Finish the current scc.
637 // Continue on next component in the current scc.
644 } //while, "one iteration" over the function.
646 BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
647 if (entryBlk->succ_size() == 0) {
650 errs() << "Reduce to one block\n";
653 int newnumRemainedBlk
654 = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
655 // consider cloned blocks ??
656 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
658 numRemainedBlk = newnumRemainedBlk;
660 makeProgress = false;
662 errs() << "No progress\n";
666 } while (!finish && makeProgress);
668 // Misc wrap up to maintain the consistency of the Function representation.
669 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
671 // Detach retired Block, release memory.
672 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
673 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
674 if ((*iterMap).second && (*iterMap).second->isRetired) {
675 assert(((*iterMap).first)->getNumber() != -1);
677 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
679 (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
681 delete (*iterMap).second;
683 blockInfoMap.clear();
685 // clear loopLandInfoMap
686 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
687 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
688 delete (*iterMap).second;
690 loopLandInfoMap.clear();
698 assert(!"IRREDUCIBL_CF");
702 } //CFGStructurizer::run
704 /// Print the ordered Blocks.
706 template<class PassT>
707 void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
709 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
710 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
711 iterBlk != iterBlkEnd;
713 os << "BB" << (*iterBlk)->getNumber();
714 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
715 if (i != 0 && i % 10 == 0) {
721 } //printOrderedBlocks
723 /// Compute the reversed DFS post order of Blocks
725 template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
728 for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
729 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
730 std::vector<BlockT *> &sccNext = *sccIter;
731 for (typename std::vector<BlockT *>::const_iterator
732 blockIter = sccNext.begin(), blockEnd = sccNext.end();
733 blockIter != blockEnd; ++blockIter) {
735 orderedBlks.push_back(bb);
736 recordSccnum(bb, sccNum);
740 //walk through all the block in func to check for unreachable
741 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
742 blockEnd1 = FuncGTraits::nodes_end(funcRep);
743 blockIter1 != blockEnd1; ++blockIter1) {
744 BlockT *bb = &(*blockIter1);
745 sccNum = getSCCNum(bb);
746 if (sccNum == INVALIDSCCNUM) {
747 errs() << "unreachable block BB" << bb->getNumber() << "\n";
752 template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
757 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
760 while ((curMatch = patternMatchGroup(curBlk)) > 0) {
761 numMatch += curMatch;
765 errs() << "End patternMatch BB" << curBlk->getNumber()
766 << ", numMatch = " << numMatch << "\n";
772 template<class PassT>
773 int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
775 numMatch += serialPatternMatch(curBlk);
776 numMatch += ifPatternMatch(curBlk);
777 //numMatch += switchPatternMatch(curBlk);
778 numMatch += loopendPatternMatch(curBlk);
779 numMatch += loopPatternMatch(curBlk);
783 template<class PassT>
784 int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
785 if (curBlk->succ_size() != 1) {
789 BlockT *childBlk = *curBlk->succ_begin();
790 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
794 mergeSerialBlock(curBlk, childBlk);
795 ++numSerialPatternMatch;
797 } //serialPatternMatch
799 template<class PassT>
800 int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
802 if (curBlk->succ_size() != 2) {
806 if (hasBackEdge(curBlk)) {
810 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
811 if (branchInstr == NULL) {
815 assert(CFGTraits::isCondBranch(branchInstr));
817 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
818 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
823 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
824 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
825 landBlk = *trueBlk->succ_begin();
826 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
828 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
831 } else if (falseBlk->succ_size() == 1
832 && *falseBlk->succ_begin() == trueBlk) {
835 } else if (falseBlk->succ_size() == 1
836 && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
837 landBlk = *falseBlk->succ_begin();
838 } else if (trueBlk->succ_size() == 1
839 && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
840 landBlk = *trueBlk->succ_begin();
842 return handleJumpintoIf(curBlk, trueBlk, falseBlk);
845 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
846 // new BB created for landBlk==NULL may introduce new challenge to the
847 // reduction process.
848 if (landBlk != NULL &&
849 ((trueBlk && trueBlk->pred_size() > 1)
850 || (falseBlk && falseBlk->pred_size() > 1))) {
851 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
854 if (trueBlk && trueBlk->pred_size() > 1) {
855 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
859 if (falseBlk && falseBlk->pred_size() > 1) {
860 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
864 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
868 numClonedBlock += cloned;
873 template<class PassT>
874 int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
876 } //switchPatternMatch
878 template<class PassT>
879 int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
880 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
881 typename std::vector<LoopT *> nestedLoops;
883 nestedLoops.push_back(loopRep);
884 loopRep = loopRep->getParentLoop();
887 if (nestedLoops.size() == 0) {
891 // Process nested loop outside->inside, so "continue" to a outside loop won't
892 // be mistaken as "break" of the current loop.
894 for (typename std::vector<LoopT *>::reverse_iterator
895 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
896 iter != iterEnd; ++iter) {
899 if (getLoopLandBlock(loopRep) != NULL) {
903 BlockT *loopHeader = loopRep->getHeader();
905 int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
907 if (numBreak == -1) {
911 int numCont = loopcontPatternMatch(loopRep, loopHeader);
912 num += numBreak + numCont;
916 } //loopendPatternMatch
918 template<class PassT>
919 int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
920 if (curBlk->succ_size() != 0) {
925 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
926 while (loopRep && loopRep->getHeader() == curBlk) {
927 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
929 BlockT *landBlk = loopLand->landBlk;
931 if (!isRetiredBlock(landBlk)) {
932 mergeLooplandBlock(curBlk, loopLand);
936 loopRep = loopRep->getParentLoop();
939 numLoopPatternMatch += numLoop;
944 template<class PassT>
945 int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
946 BlockT *loopHeader) {
947 BlockTSmallerVector exitingBlks;
948 loopRep->getExitingBlocks(exitingBlks);
951 errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
954 if (exitingBlks.size() == 0) {
955 setLoopLandBlock(loopRep);
959 // Compute the corresponding exitBlks and exit block set.
960 BlockTSmallerVector exitBlks;
961 std::set<BlockT *> exitBlkSet;
962 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
963 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
964 BlockT *exitingBlk = *iter;
965 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
966 exitBlks.push_back(exitBlk);
967 exitBlkSet.insert(exitBlk); //non-duplicate insert
970 assert(exitBlkSet.size() > 0);
971 assert(exitBlks.size() == exitingBlks.size());
974 errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
978 BlockT *exitLandBlk = NULL;
982 if (exitBlkSet.size() == 1)
984 exitLandBlk = *exitBlkSet.begin();
986 exitLandBlk = findNearestCommonPostDom(exitBlkSet);
988 if (exitLandBlk == NULL) {
992 bool allInPath = true;
993 bool allNotInPath = true;
994 for (typename std::set<BlockT*>::const_iterator
995 iter = exitBlkSet.begin(),
996 iterEnd = exitBlkSet.end();
997 iter != iterEnd; ++iter) {
998 BlockT *exitBlk = *iter;
1000 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
1002 errs() << "BB" << exitBlk->getNumber()
1003 << " to BB" << exitLandBlk->getNumber() << " PathToKind="
1004 << pathKind << "\n";
1007 allInPath = allInPath && (pathKind == SinglePath_InPath);
1008 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
1010 if (!allInPath && !allNotInPath) {
1012 errs() << "singlePath check fail\n";
1016 } // check all exit blocks
1021 // TODO: Simplify, maybe separate function?
1022 //funcRep->viewCFG();
1023 LoopT *parentLoopRep = loopRep->getParentLoop();
1024 BlockT *parentLoopHeader = NULL;
1026 parentLoopHeader = parentLoopRep->getHeader();
1028 if (exitLandBlk == parentLoopHeader &&
1029 (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
1032 exitLandBlk)) != NULL) {
1034 errs() << "relocateLoopcontBlock success\n";
1036 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
1038 exitBlks)) != NULL) {
1040 errs() << "insertEndbranchBlock success\n";
1044 errs() << "loop exit fail\n";
1053 // Handle side entry to exit path.
1056 for (typename BlockTSmallerVector::iterator iterExiting =
1057 exitingBlks.begin(),
1058 iterExitingEnd = exitingBlks.end();
1059 iterExiting != iterExitingEnd; ++iterExiting) {
1060 BlockT *exitingBlk = *iterExiting;
1061 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
1062 BlockT *newExitBlk = exitBlk;
1064 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
1065 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
1069 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
1071 exitBlks.push_back(newExitBlk);
1072 exitBlkSet.insert(newExitBlk);
1075 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1076 iterExitEnd = exitBlks.end();
1077 iterExit != iterExitEnd; ++iterExit) {
1078 BlockT *exitBlk = *iterExit;
1079 numSerial += serialPatternMatch(exitBlk);
1082 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1083 iterExitEnd = exitBlks.end();
1084 iterExit != iterExitEnd; ++iterExit) {
1085 BlockT *exitBlk = *iterExit;
1086 if (exitBlk->pred_size() > 1) {
1087 if (exitBlk != exitLandBlk) {
1091 if (exitBlk != exitLandBlk &&
1092 (exitBlk->succ_size() != 1 ||
1093 *exitBlk->succ_begin() != exitLandBlk)) {
1100 // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
1101 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
1103 // Fold break into the breaking block. Leverage across level breaks.
1104 assert(exitingBlks.size() == exitBlks.size());
1105 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
1106 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
1107 iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
1108 BlockT *exitBlk = *iterExit;
1109 BlockT *exitingBlk = *iterExiting;
1110 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
1111 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
1112 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
1115 int numBreak = static_cast<int>(exitingBlks.size());
1116 numLoopbreakPatternMatch += numBreak;
1117 numClonedBlock += numCloned;
1118 return numBreak + numSerial + numCloned;
1119 } //loopbreakPatternMatch
1121 template<class PassT>
1122 int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
1123 BlockT *loopHeader) {
1125 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
1126 for (typename InvBlockGTraits::ChildIteratorType iter =
1127 InvBlockGTraits::child_begin(loopHeader),
1128 iterEnd = InvBlockGTraits::child_end(loopHeader);
1129 iter != iterEnd; ++iter) {
1130 BlockT *curBlk = *iter;
1131 if (loopRep->contains(curBlk)) {
1132 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
1133 loopHeader, loopRep);
1134 contBlk.push_back(curBlk);
1139 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
1140 iter = contBlk.begin(), iterEnd = contBlk.end();
1141 iter != iterEnd; ++iter) {
1142 (*iter)->removeSuccessor(loopHeader);
1145 numLoopcontPatternMatch += numCont;
1148 } //loopcontPatternMatch
1151 template<class PassT>
1152 bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1154 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1155 // same loop with LoopLandInfo without explicitly keeping track of
1156 // loopContBlks and loopBreakBlks, this is a method to get the information.
1158 if (src1Blk->succ_size() == 0) {
1159 LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1160 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1161 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1162 if (theEntry != NULL) {
1164 errs() << "isLoopContBreakBlock yes src1 = BB"
1165 << src1Blk->getNumber()
1166 << " src2 = BB" << src2Blk->getNumber() << "\n";
1173 } //isSameloopDetachedContbreak
1175 template<class PassT>
1176 int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1179 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1182 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1184 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1189 template<class PassT>
1190 int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1196 //trueBlk could be the common post dominator
1200 errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1201 << " true = BB" << trueBlk->getNumber()
1202 << ", numSucc=" << trueBlk->succ_size()
1203 << " false = BB" << falseBlk->getNumber() << "\n";
1208 errs() << "check down = BB" << downBlk->getNumber();
1211 if (//postDomTree->dominates(downBlk, falseBlk) &&
1212 singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1214 errs() << " working\n";
1217 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1218 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1220 numClonedBlock += num;
1221 num += serialPatternMatch(*headBlk->succ_begin());
1222 num += serialPatternMatch(*(++headBlk->succ_begin()));
1223 num += ifPatternMatch(headBlk);
1229 errs() << " not working\n";
1231 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1232 } // walk down the postDomTree
1235 } //handleJumpintoIf
1237 template<class PassT>
1238 void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1243 errs() << "head = BB" << headBlk->getNumber()
1244 << " size = " << headBlk->size();
1247 headBlk->print(errs());
1252 errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1253 << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1256 trueBlk->print(errs());
1261 errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1262 << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1265 falseBlk->print(errs());
1270 errs() << ", land = BB" << landBlk->getNumber() << " size = "
1271 << landBlk->size() << " numPred = " << landBlk->pred_size();
1274 landBlk->print(errs());
1280 } //showImproveSimpleJumpintoIf
1282 template<class PassT>
1283 int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1286 BlockT **plandBlk) {
1287 bool migrateTrue = false;
1288 bool migrateFalse = false;
1290 BlockT *landBlk = *plandBlk;
1292 assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1293 && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1295 if (trueBlk == falseBlk) {
1301 errs() << "improveSimpleJumpintoIf: ";
1302 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1306 // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
1307 // May consider the # landBlk->pred_size() as it represents the number of
1308 // assignment initReg = .. needed to insert.
1309 migrateTrue = needMigrateBlock(trueBlk);
1310 migrateFalse = needMigrateBlock(falseBlk);
1312 if (!migrateTrue && !migrateFalse) {
1316 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1317 // have more than one predecessors. without doing this, its predecessor
1318 // rather than headBlk will have undefined value in initReg.
1319 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1322 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1323 migrateFalse = true;
1327 errs() << "before improveSimpleJumpintoIf: ";
1328 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1329 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1332 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1334 // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1335 // {initReg = 0; org falseBlk branch }
1336 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1338 // if landBlk->pred_size() > 2, put the about if-else inside
1339 // if (initReg !=2) {...}
1341 // add initReg = initVal to headBlk
1343 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1345 funcRep->getRegInfo().createVirtualRegister(I32RC);
1346 if (!migrateTrue || !migrateFalse) {
1347 int initVal = migrateTrue ? 0 : 1;
1348 CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1353 if (landBlk == NULL) {
1354 landBlk = funcRep->CreateMachineBasicBlock();
1355 funcRep->push_back(landBlk); //insert to function
1358 trueBlk->addSuccessor(landBlk);
1360 headBlk->addSuccessor(landBlk);
1364 falseBlk->addSuccessor(landBlk);
1366 headBlk->addSuccessor(landBlk);
1372 bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1374 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1375 typename BlockT::iterator insertPos =
1376 CFGTraits::getInstrPos
1377 (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1379 if (landBlkHasOtherPred) {
1381 funcRep->getRegInfo().createVirtualRegister(I32RC);
1382 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1383 unsigned cmpResReg =
1384 funcRep->getRegInfo().createVirtualRegister(I32RC);
1386 CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1388 CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1389 AMDGPU::IF_LOGICALZ_i32, passRep,
1390 cmpResReg, DebugLoc());
1393 CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
1394 passRep, initReg, DebugLoc());
1397 migrateInstruction(trueBlk, landBlk, insertPos);
1398 // need to uncondionally insert the assignment to ensure a path from its
1399 // predecessor rather than headBlk has valid value in initReg if
1401 CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1403 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1406 migrateInstruction(falseBlk, landBlk, insertPos);
1407 // need to uncondionally insert the assignment to ensure a path from its
1408 // predecessor rather than headBlk has valid value in initReg if
1410 CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1412 //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1414 if (landBlkHasOtherPred) {
1416 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1418 // put initReg = 2 to other predecessors of landBlk
1419 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1420 predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1422 BlockT *curBlk = *predIter;
1423 if (curBlk != trueBlk && curBlk != falseBlk) {
1424 CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1429 errs() << "result from improveSimpleJumpintoIf: ";
1430 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1431 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1435 *plandBlk = landBlk;
1438 } //improveSimpleJumpintoIf
1440 template<class PassT>
1441 void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1447 errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1448 << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1450 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1452 RegiT initReg = INVALIDREGNUM;
1453 if (exitingLoop != exitLoop) {
1454 initReg = static_cast<int>
1455 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1456 assert(initReg != INVALIDREGNUM);
1457 addLoopBreakInitReg(exitLoop, initReg);
1458 while (exitingLoop != exitLoop && exitingLoop) {
1459 addLoopBreakOnReg(exitingLoop, initReg);
1460 exitingLoop = exitingLoop->getParentLoop();
1462 assert(exitingLoop == exitLoop);
1465 mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1469 template<class PassT>
1470 void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1475 errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1476 << " header = BB" << contBlk->getNumber() << "\n";
1478 errs() << "Trying to continue loop-depth = "
1479 << getLoopDepth(contLoop)
1480 << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1483 RegiT initReg = INVALIDREGNUM;
1484 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1485 if (contingLoop != contLoop) {
1486 initReg = static_cast<int>
1487 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1488 assert(initReg != INVALIDREGNUM);
1489 addLoopContInitReg(contLoop, initReg);
1490 while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1491 addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
1492 contingLoop = contingLoop->getParentLoop();
1494 assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1495 addLoopContOnReg(contingLoop, initReg);
1498 settleLoopcontBlock(contingBlk, contBlk, initReg);
1499 //contingBlk->removeSuccessor(loopHeader);
1500 } //handleLoopcontBlock
1502 template<class PassT>
1503 void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1505 errs() << "serialPattern BB" << dstBlk->getNumber()
1506 << " <= BB" << srcBlk->getNumber() << "\n";
1508 //removeUnconditionalBranch(dstBlk);
1509 dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
1511 dstBlk->removeSuccessor(srcBlk);
1512 CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1514 removeSuccessor(srcBlk);
1515 retireBlock(dstBlk, srcBlk);
1516 } //mergeSerialBlock
1518 template<class PassT>
1519 void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1525 errs() << "ifPattern BB" << curBlk->getNumber();
1528 errs() << "BB" << trueBlk->getNumber();
1530 errs() << " } else ";
1533 errs() << "BB" << falseBlk->getNumber();
1536 errs() << "landBlock: ";
1537 if (landBlk == NULL) {
1540 errs() << "BB" << landBlk->getNumber();
1545 int oldOpcode = branchInstr->getOpcode();
1546 DebugLoc branchDL = branchInstr->getDebugLoc();
1556 typename BlockT::iterator branchInstrPos =
1557 CFGTraits::getInstrPos(curBlk, branchInstr);
1558 CFGTraits::insertCondBranchBefore(branchInstrPos,
1559 CFGTraits::getBranchNzeroOpcode(oldOpcode),
1564 curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
1565 curBlk->removeSuccessor(trueBlk);
1566 if (landBlk && trueBlk->succ_size()!=0) {
1567 trueBlk->removeSuccessor(landBlk);
1569 retireBlock(curBlk, trueBlk);
1571 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1574 curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
1576 curBlk->removeSuccessor(falseBlk);
1577 if (landBlk && falseBlk->succ_size() != 0) {
1578 falseBlk->removeSuccessor(landBlk);
1580 retireBlock(curBlk, falseBlk);
1582 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1584 //curBlk->remove(branchInstrPos);
1585 branchInstr->eraseFromParent();
1587 if (landBlk && trueBlk && falseBlk) {
1588 curBlk->addSuccessor(landBlk);
1591 } //mergeIfthenelseBlock
1593 template<class PassT>
1594 void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1595 LoopLandInfo *loopLand) {
1596 BlockT *landBlk = loopLand->landBlk;
1599 errs() << "loopPattern header = BB" << dstBlk->getNumber()
1600 << " land = BB" << landBlk->getNumber() << "\n";
1603 // Loop contInitRegs are init at the beginning of the loop.
1604 for (typename std::set<RegiT>::const_iterator iter =
1605 loopLand->contInitRegs.begin(),
1606 iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1607 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1610 /* we last inserterd the DebugLoc in the
1611 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1612 * search for the DebugLoc in the that statement.
1613 * if not found, we have to insert the empty/default DebugLoc */
1614 InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1615 DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1617 CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1618 // Loop breakInitRegs are init before entering the loop.
1619 for (typename std::set<RegiT>::const_iterator iter =
1620 loopLand->breakInitRegs.begin(),
1621 iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
1623 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1625 // Loop endbranchInitRegs are init before entering the loop.
1626 for (typename std::set<RegiT>::const_iterator iter =
1627 loopLand->endbranchInitRegs.begin(),
1628 iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1629 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1632 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1633 * search for the DebugLoc in the continue statement.
1634 * if not found, we have to insert the empty/default DebugLoc */
1635 InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1636 DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1638 CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1639 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1641 for (typename std::set<RegiT>::const_iterator iter =
1642 loopLand->breakOnRegs.begin(),
1643 iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1644 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
1648 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1650 for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1651 iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1652 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1656 dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1658 for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1659 iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1660 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
1663 removeSuccessor(landBlk);
1664 retireBlock(dstBlk, landBlk);
1665 } //mergeLooplandBlock
1667 template<class PassT>
1668 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1670 BlockT *exitLandBlk,
1673 errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1674 << " exit = BB" << exitBlk->getNumber()
1675 << " land = BB" << exitLandBlk->getNumber() << "\n";
1678 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1679 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1681 DebugLoc DL = branchInstr->getDebugLoc();
1683 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1684 int oldOpcode = branchInstr->getOpcode();
1686 // transform exitingBlk to
1688 // exitBlk (if exitBlk != exitLandBlk)
1692 // successor = {orgSuccessor(exitingBlk) - exitBlk}
1694 typename BlockT::iterator branchInstrPos =
1695 CFGTraits::getInstrPos(exitingBlk, branchInstr);
1697 if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1700 (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
1701 : CFGTraits::getBreakZeroOpcode(oldOpcode);
1702 CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1705 (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1706 : CFGTraits::getBranchZeroOpcode(oldOpcode);
1707 CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1708 if (exitBlk != exitLandBlk) {
1709 //splice is insert-before ...
1710 exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1713 if (setReg != INVALIDREGNUM) {
1714 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1716 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1717 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1720 //now branchInst can be erase safely
1721 //exitingBlk->eraseFromParent(branchInstr);
1722 branchInstr->eraseFromParent();
1724 //now take care of successors, retire blocks
1725 exitingBlk->removeSuccessor(exitBlk);
1726 if (exitBlk != exitLandBlk) {
1727 //splice is insert-before ...
1728 exitBlk->removeSuccessor(exitLandBlk);
1729 retireBlock(exitingBlk, exitBlk);
1732 } //mergeLoopbreakBlock
1734 template<class PassT>
1735 void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1739 errs() << "settleLoopcontBlock conting = BB"
1740 << contingBlk->getNumber()
1741 << ", cont = BB" << contBlk->getNumber() << "\n";
1744 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1746 assert(CFGTraits::isCondBranch(branchInstr));
1747 typename BlockT::iterator branchInstrPos =
1748 CFGTraits::getInstrPos(contingBlk, branchInstr);
1749 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1750 int oldOpcode = branchInstr->getOpcode();
1751 DebugLoc DL = branchInstr->getDebugLoc();
1753 // transform contingBlk to
1755 // move instr after branchInstr
1761 // successor = {orgSuccessor(contingBlk) - loopHeader}
1763 bool useContinueLogical =
1764 (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1766 if (useContinueLogical == false)
1769 trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1770 : CFGTraits::getBranchZeroOpcode(oldOpcode);
1772 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1774 if (setReg != INVALIDREGNUM) {
1775 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1776 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1777 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1779 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1780 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1783 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1786 trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1787 : CFGTraits::getContinueZeroOpcode(oldOpcode);
1789 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1792 //contingBlk->eraseFromParent(branchInstr);
1793 branchInstr->eraseFromParent();
1795 /* if we've arrived here then we've already erased the branch instruction
1796 * travel back up the basic block to see the last reference of our debug location
1797 * we've just inserted that reference here so it should be representative */
1798 if (setReg != INVALIDREGNUM) {
1799 CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1800 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1801 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1803 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1804 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1808 } //settleLoopcontBlock
1810 // BBs in exitBlkSet are determined as in break-path for loopRep,
1811 // before we can put code for BBs as inside loop-body for loopRep
1812 // check whether those BBs are determined as cont-BB for parentLoopRep
1814 // If so, generate a new BB newBlk
1815 // (1) set newBlk common successor of BBs in exitBlkSet
1816 // (2) change the continue-instr in BBs in exitBlkSet to break-instr
1817 // (3) generate continue-instr in newBlk
1819 template<class PassT>
1820 typename CFGStructurizer<PassT>::BlockT *
1821 CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1823 std::set<BlockT *> &exitBlkSet,
1824 BlockT *exitLandBlk) {
1825 std::set<BlockT *> endBlkSet;
1827 // BlockT *parentLoopHead = parentLoopRep->getHeader();
1830 for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1831 iterEnd = exitBlkSet.end();
1832 iter != iterEnd; ++iter) {
1833 BlockT *exitBlk = *iter;
1834 BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1836 if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1839 endBlkSet.insert(endBlk);
1842 BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1843 funcRep->push_back(newBlk); //insert to function
1844 CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1845 SHOWNEWBLK(newBlk, "New continue block: ");
1847 for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1848 iterEnd = endBlkSet.end();
1849 iter != iterEnd; ++iter) {
1850 BlockT *endBlk = *iter;
1851 InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1853 contInstr->eraseFromParent();
1855 endBlk->addSuccessor(newBlk);
1857 errs() << "Add new continue Block to BB"
1858 << endBlk->getNumber() << " successors\n";
1863 } //relocateLoopcontBlock
1866 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1867 // LoopLandBlock. This BB branch on the loop endBranchInit register to the
1868 // pathes corresponding to the loop exiting branches.
1870 template<class PassT>
1871 typename CFGStructurizer<PassT>::BlockT *
1872 CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1873 BlockTSmallerVector &exitingBlks,
1874 BlockTSmallerVector &exitBlks) {
1875 const AMDILInstrInfo *tii =
1876 static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
1877 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1879 RegiT endBranchReg = static_cast<int>
1880 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1881 assert(endBranchReg >= 0);
1883 // reg = 0 before entering the loop
1884 addLoopEndbranchInitReg(loopRep, endBranchReg);
1886 uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1887 assert(numBlks >=2 && numBlks == exitBlks.size());
1889 BlockT *preExitingBlk = exitingBlks[0];
1890 BlockT *preExitBlk = exitBlks[0];
1891 BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1892 funcRep->push_back(preBranchBlk); //insert to function
1893 SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1895 BlockT *newLandBlk = preBranchBlk;
1897 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1899 preExitingBlk->removeSuccessor(preExitBlk);
1900 preExitingBlk->addSuccessor(newLandBlk);
1902 //it is redundant to add reg = 0 to exitingBlks[0]
1904 // For 1..n th exiting path (the last iteration handles two pathes) create the
1905 // branch to the previous path and the current path.
1906 for (uint32_t i = 1; i < numBlks; ++i) {
1907 BlockT *curExitingBlk = exitingBlks[i];
1908 BlockT *curExitBlk = exitBlks[i];
1909 BlockT *curBranchBlk;
1911 if (i == numBlks - 1) {
1912 curBranchBlk = curExitBlk;
1914 curBranchBlk = funcRep->CreateMachineBasicBlock();
1915 funcRep->push_back(curBranchBlk); //insert to function
1916 SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1919 // Add reg = i to exitingBlks[i].
1920 CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1923 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1924 // (exitingBlks[i], newLandBlk).
1925 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1927 curExitingBlk->removeSuccessor(curExitBlk);
1928 curExitingBlk->addSuccessor(newLandBlk);
1930 // add to preBranchBlk the branch instruction:
1931 // if (endBranchReg == preVal)
1936 // preValReg = i - 1
1939 RegiT preValReg = static_cast<int>
1940 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1942 preBranchBlk->insert(preBranchBlk->begin(),
1943 tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1946 // condResReg = (endBranchReg == preValReg)
1947 RegiT condResReg = static_cast<int>
1948 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1949 BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1950 .addReg(endBranchReg).addReg(preValReg);
1952 BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1953 .addMBB(preExitBlk).addReg(condResReg);
1955 preBranchBlk->addSuccessor(preExitBlk);
1956 preBranchBlk->addSuccessor(curBranchBlk);
1958 // Update preExitingBlk, preExitBlk, preBranchBlk.
1959 preExitingBlk = curExitingBlk;
1960 preExitBlk = curExitBlk;
1961 preBranchBlk = curBranchBlk;
1963 } //end for 1 .. n blocks
1966 } //addLoopEndbranchBlock
1968 template<class PassT>
1969 typename CFGStructurizer<PassT>::PathToKind
1970 CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1971 bool allowSideEntry) {
1974 if (srcBlk == dstBlk) {
1975 return SinglePath_InPath;
1978 while (srcBlk && srcBlk->succ_size() == 1) {
1979 srcBlk = *srcBlk->succ_begin();
1980 if (srcBlk == dstBlk) {
1981 return SinglePath_InPath;
1984 if (!allowSideEntry && srcBlk->pred_size() > 1) {
1985 return Not_SinglePath;
1989 if (srcBlk && srcBlk->succ_size()==0) {
1990 return SinglePath_NotInPath;
1993 return Not_SinglePath;
1996 // If there is a single path from srcBlk to dstBlk, return the last block before
1997 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the
1998 // last block in the path Otherwise, return NULL
1999 template<class PassT>
2000 typename CFGStructurizer<PassT>::BlockT *
2001 CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
2002 bool allowSideEntry) {
2005 if (srcBlk == dstBlk) {
2009 if (srcBlk->succ_size() == 0) {
2013 while (srcBlk && srcBlk->succ_size() == 1) {
2014 BlockT *preBlk = srcBlk;
2016 srcBlk = *srcBlk->succ_begin();
2017 if (srcBlk == NULL) {
2021 if (!allowSideEntry && srcBlk->pred_size() > 1) {
2026 if (srcBlk && srcBlk->succ_size()==0) {
2034 template<class PassT>
2035 int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
2038 assert(preBlk->isSuccessor(srcBlk));
2039 while (srcBlk && srcBlk != dstBlk) {
2040 assert(srcBlk->succ_size() == 1);
2041 if (srcBlk->pred_size() > 1) {
2042 srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
2047 srcBlk = *srcBlk->succ_begin();
2051 } //cloneOnSideEntryTo
2053 template<class PassT>
2054 typename CFGStructurizer<PassT>::BlockT *
2055 CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
2057 assert(predBlk->isSuccessor(curBlk) &&
2058 "succBlk is not a prececessor of curBlk");
2060 BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
2061 CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
2062 //srcBlk, oldBlk, newBlk
2064 predBlk->removeSuccessor(curBlk);
2065 predBlk->addSuccessor(cloneBlk);
2067 // add all successor to cloneBlk
2068 CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
2070 numClonedInstr += curBlk->size();
2073 errs() << "Cloned block: " << "BB"
2074 << curBlk->getNumber() << "size " << curBlk->size() << "\n";
2077 SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
2080 } //cloneBlockForPredecessor
2082 template<class PassT>
2083 typename CFGStructurizer<PassT>::BlockT *
2084 CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
2085 BlockT *exitingBlk) {
2086 BlockT *exitBlk = NULL;
2088 for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
2089 iterSuccEnd = exitingBlk->succ_end();
2090 iterSucc != iterSuccEnd; ++iterSucc) {
2091 BlockT *curBlk = *iterSucc;
2092 if (!loopRep->contains(curBlk)) {
2093 assert(exitBlk == NULL);
2098 assert(exitBlk != NULL);
2101 } //exitingBlock2ExitBlock
2103 template<class PassT>
2104 void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
2106 InstrIterator insertPos) {
2107 InstrIterator spliceEnd;
2108 //look for the input branchinstr, not the AMDIL branchinstr
2109 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2110 if (branchInstr == NULL) {
2112 errs() << "migrateInstruction don't see branch instr\n" ;
2114 spliceEnd = srcBlk->end();
2117 errs() << "migrateInstruction see branch instr\n" ;
2118 branchInstr->dump();
2120 spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
2123 errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
2124 << "srcSize = " << srcBlk->size() << "\n";
2127 //splice insert before insertPos
2128 dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
2131 errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
2132 << "srcSize = " << srcBlk->size() << "\n";
2134 } //migrateInstruction
2136 // normalizeInfiniteLoopExit change
2138 // uncond_br LoopHeader
2142 // cond_br 1 LoopHeader dummyExit
2143 // and return the newly added dummy exit block
2145 template<class PassT>
2146 typename CFGStructurizer<PassT>::BlockT *
2147 CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2150 loopHeader = LoopRep->getHeader();
2151 loopLatch = LoopRep->getLoopLatch();
2152 BlockT *dummyExitBlk = NULL;
2153 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2154 if (loopHeader!=NULL && loopLatch!=NULL) {
2155 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2156 if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2157 dummyExitBlk = funcRep->CreateMachineBasicBlock();
2158 funcRep->push_back(dummyExitBlk); //insert to function
2159 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2161 if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
2163 typename BlockT::iterator insertPos =
2164 CFGTraits::getInstrPos(loopLatch, branchInstr);
2166 funcRep->getRegInfo().createVirtualRegister(I32RC);
2167 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2169 CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2170 MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
2172 SHOWNEWINSTR(newInstr);
2174 branchInstr->eraseFromParent();
2175 loopLatch->addSuccessor(dummyExitBlk);
2179 return dummyExitBlk;
2180 } //normalizeInfiniteLoopExit
2182 template<class PassT>
2183 void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2184 InstrT *branchInstr;
2186 // I saw two unconditional branch in one basic block in example
2187 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2188 while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2189 && CFGTraits::isUncondBranch(branchInstr)) {
2191 errs() << "Removing unconditional branch instruction" ;
2192 branchInstr->dump();
2194 branchInstr->eraseFromParent();
2196 } //removeUnconditionalBranch
2198 template<class PassT>
2199 void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2200 if (srcBlk->succ_size() == 2) {
2201 BlockT *blk1 = *srcBlk->succ_begin();
2202 BlockT *blk2 = *(++srcBlk->succ_begin());
2205 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2206 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2208 errs() << "Removing unneeded conditional branch instruction" ;
2209 branchInstr->dump();
2211 branchInstr->eraseFromParent();
2212 SHOWNEWBLK(blk1, "Removing redundant successor");
2213 srcBlk->removeSuccessor(blk1);
2216 } //removeRedundantConditionalBranch
2218 template<class PassT>
2219 void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
2220 DEFAULT_VEC_SLOTS> &retBlks) {
2221 BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2222 funcRep->push_back(dummyExitBlk); //insert to function
2223 CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2225 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
2227 iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2228 BlockT *curBlk = *iter;
2229 InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2231 curInstr->eraseFromParent();
2234 if (curBlk->size()==0 && curBlk->pred_size() == 1) {
2236 errs() << "Replace empty block BB" << curBlk->getNumber()
2237 << " with dummyExitBlock\n";
2239 BlockT *predb = *curBlk->pred_begin();
2240 predb->removeSuccessor(curBlk);
2242 } //handle empty curBlk
2244 curBlk->addSuccessor(dummyExitBlk);
2246 errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2251 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2252 } //addDummyExitBlock
2254 template<class PassT>
2255 void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2256 while (srcBlk->succ_size()) {
2257 srcBlk->removeSuccessor(*srcBlk->succ_begin());
2261 template<class PassT>
2262 void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2263 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2265 if (srcBlkInfo == NULL) {
2266 srcBlkInfo = new BlockInfo();
2269 srcBlkInfo->sccNum = sccNum;
2272 template<class PassT>
2273 int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2274 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2275 return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2278 template<class PassT>
2279 void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2281 errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2284 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2286 if (srcBlkInfo == NULL) {
2287 srcBlkInfo = new BlockInfo();
2290 srcBlkInfo->isRetired = true;
2291 //int i = srcBlk->succ_size();
2292 //int j = srcBlk->pred_size();
2293 assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2294 && "can't retire block yet");
2297 template<class PassT>
2298 bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2299 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2300 return (srcBlkInfo && srcBlkInfo->isRetired);
2303 template<class PassT>
2304 bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2305 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2306 while (loopRep && loopRep->getHeader() == curBlk) {
2307 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2309 if(loopLand == NULL)
2312 BlockT *landBlk = loopLand->landBlk;
2314 if (!isRetiredBlock(landBlk)) {
2318 loopRep = loopRep->getParentLoop();
2322 } //isActiveLoophead
2324 template<class PassT>
2325 bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2326 const unsigned blockSizeThreshold = 30;
2327 const unsigned cloneInstrThreshold = 100;
2329 bool multiplePreds = blk && (blk->pred_size() > 1);
2334 unsigned blkSize = blk->size();
2335 return ((blkSize > blockSizeThreshold)
2336 && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2337 } //needMigrateBlock
2339 template<class PassT>
2340 typename CFGStructurizer<PassT>::BlockT *
2341 CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2342 BlockTSmallerVector &exitBlks,
2343 std::set<BlockT *> &exitBlkSet) {
2344 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
2346 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2347 predIterEnd = landBlk->pred_end();
2348 predIter != predIterEnd; ++predIter) {
2349 BlockT *curBlk = *predIter;
2350 if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2351 inpathBlks.push_back(curBlk);
2355 //if landBlk has predecessors that are not in the given loop,
2356 //create a new block
2357 BlockT *newLandBlk = landBlk;
2358 if (inpathBlks.size() != landBlk->pred_size()) {
2359 newLandBlk = funcRep->CreateMachineBasicBlock();
2360 funcRep->push_back(newLandBlk); //insert to function
2361 newLandBlk->addSuccessor(landBlk);
2362 for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
2364 iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2365 BlockT *curBlk = *iter;
2366 CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2367 //srcBlk, oldBlk, newBlk
2368 curBlk->removeSuccessor(landBlk);
2369 curBlk->addSuccessor(newLandBlk);
2371 for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2372 if (exitBlks[i] == landBlk) {
2373 exitBlks[i] = newLandBlk;
2376 SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2379 setLoopLandBlock(loopRep, newLandBlk);
2382 } // recordLoopbreakLand
2384 template<class PassT>
2385 void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2386 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2388 if (theEntry == NULL) {
2389 theEntry = new LoopLandInfo();
2391 assert(theEntry->landBlk == NULL);
2394 blk = funcRep->CreateMachineBasicBlock();
2395 funcRep->push_back(blk); //insert to function
2396 SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2399 theEntry->landBlk = blk;
2402 errs() << "setLoopLandBlock loop-header = BB"
2403 << loopRep->getHeader()->getNumber()
2404 << " landing-block = BB" << blk->getNumber() << "\n";
2406 } // setLoopLandBlock
2408 template<class PassT>
2409 void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2410 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2412 if (theEntry == NULL) {
2413 theEntry = new LoopLandInfo();
2416 theEntry->breakOnRegs.insert(regNum);
2419 errs() << "addLoopBreakOnReg loop-header = BB"
2420 << loopRep->getHeader()->getNumber()
2421 << " regNum = " << regNum << "\n";
2423 } // addLoopBreakOnReg
2425 template<class PassT>
2426 void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2427 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2429 if (theEntry == NULL) {
2430 theEntry = new LoopLandInfo();
2432 theEntry->contOnRegs.insert(regNum);
2435 errs() << "addLoopContOnReg loop-header = BB"
2436 << loopRep->getHeader()->getNumber()
2437 << " regNum = " << regNum << "\n";
2439 } // addLoopContOnReg
2441 template<class PassT>
2442 void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2443 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2445 if (theEntry == NULL) {
2446 theEntry = new LoopLandInfo();
2448 theEntry->breakInitRegs.insert(regNum);
2451 errs() << "addLoopBreakInitReg loop-header = BB"
2452 << loopRep->getHeader()->getNumber()
2453 << " regNum = " << regNum << "\n";
2455 } // addLoopBreakInitReg
2457 template<class PassT>
2458 void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2459 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2461 if (theEntry == NULL) {
2462 theEntry = new LoopLandInfo();
2464 theEntry->contInitRegs.insert(regNum);
2467 errs() << "addLoopContInitReg loop-header = BB"
2468 << loopRep->getHeader()->getNumber()
2469 << " regNum = " << regNum << "\n";
2471 } // addLoopContInitReg
2473 template<class PassT>
2474 void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2476 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2478 if (theEntry == NULL) {
2479 theEntry = new LoopLandInfo();
2481 theEntry->endbranchInitRegs.insert(regNum);
2485 errs() << "addLoopEndbranchInitReg loop-header = BB"
2486 << loopRep->getHeader()->getNumber()
2487 << " regNum = " << regNum << "\n";
2489 } // addLoopEndbranchInitReg
2491 template<class PassT>
2492 typename CFGStructurizer<PassT>::LoopLandInfo *
2493 CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2494 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2497 } // getLoopLandInfo
2499 template<class PassT>
2500 typename CFGStructurizer<PassT>::BlockT *
2501 CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2502 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2504 return theEntry ? theEntry->landBlk : NULL;
2505 } // getLoopLandBlock
2508 template<class PassT>
2509 bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2510 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2511 if (loopRep == NULL)
2514 BlockT *loopHeader = loopRep->getHeader();
2516 return curBlk->isSuccessor(loopHeader);
2520 template<class PassT>
2521 unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2522 return loopRep ? loopRep->getLoopDepth() : 0;
2525 template<class PassT>
2526 int CFGStructurizer<PassT>::countActiveBlock
2527 (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
2528 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
2530 while (iterStart != iterEnd) {
2531 if (!isRetiredBlock(*iterStart)) {
2538 } //countActiveBlock
2540 // This is work around solution for findNearestCommonDominator not avaiable to
2541 // post dom a proper fix should go to Dominators.h.
2543 template<class PassT>
2544 typename CFGStructurizer<PassT>::BlockT*
2545 CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2547 if (postDomTree->dominates(blk1, blk2)) {
2550 if (postDomTree->dominates(blk2, blk1)) {
2554 DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2555 DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2557 // Handle newly cloned node.
2558 if (node1 == NULL && blk1->succ_size() == 1) {
2559 return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2561 if (node2 == NULL && blk2->succ_size() == 1) {
2562 return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2565 if (node1 == NULL || node2 == NULL) {
2569 node1 = node1->getIDom();
2571 if (postDomTree->dominates(node1, node2)) {
2572 return node1->getBlock();
2574 node1 = node1->getIDom();
2580 template<class PassT>
2581 typename CFGStructurizer<PassT>::BlockT *
2582 CFGStructurizer<PassT>::findNearestCommonPostDom
2583 (typename std::set<BlockT *> &blks) {
2585 typename std::set<BlockT *>::const_iterator iter = blks.begin();
2586 typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2587 for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2588 BlockT *curBlk = *iter;
2589 if (curBlk != commonDom) {
2590 commonDom = findNearestCommonPostDom(curBlk, commonDom);
2595 errs() << "Common post dominator for exit blocks is ";
2597 errs() << "BB" << commonDom->getNumber() << "\n";
2604 } //findNearestCommonPostDom
2606 } //end namespace llvm
2611 //===----------------------------------------------------------------------===//
2613 // CFGStructurizer for AMDIL
2615 //===----------------------------------------------------------------------===//
2618 using namespace llvmCFGStruct;
2622 class AMDILCFGStructurizer : public MachineFunctionPass
2625 typedef MachineInstr InstructionType;
2626 typedef MachineFunction FunctionType;
2627 typedef MachineBasicBlock BlockType;
2628 typedef MachineLoopInfo LoopinfoType;
2629 typedef MachineDominatorTree DominatortreeType;
2630 typedef MachinePostDominatorTree PostDominatortreeType;
2631 typedef MachineDomTreeNode DomTreeNodeType;
2632 typedef MachineLoop LoopType;
2636 const TargetInstrInfo *TII;
2637 const AMDILRegisterInfo *TRI;
2640 AMDILCFGStructurizer(char &pid, TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
2641 const TargetInstrInfo *getTargetInstrInfo() const;
2642 //bool runOnMachineFunction(MachineFunction &F);
2646 }; //end of class AMDILCFGStructurizer
2648 //char AMDILCFGStructurizer::ID = 0;
2649 } //end of namespace llvm
2650 AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid, TargetMachine &tm
2651 AMDIL_OPT_LEVEL_DECL)
2652 : MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
2653 TRI(static_cast<const AMDILRegisterInfo *>(tm.getRegisterInfo())
2657 const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const {
2660 //===----------------------------------------------------------------------===//
2664 //===----------------------------------------------------------------------===//
2667 using namespace llvmCFGStruct;
2671 class AMDILCFGPrepare : public AMDILCFGStructurizer
2677 AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
2679 virtual const char *getPassName() const;
2680 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2682 bool runOnMachineFunction(MachineFunction &F);
2686 }; //end of class AMDILCFGPrepare
2688 char AMDILCFGPrepare::ID = 0;
2689 } //end of namespace llvm
2691 AMDILCFGPrepare::AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
2692 : AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
2695 const char *AMDILCFGPrepare::getPassName() const {
2696 return "AMD IL Control Flow Graph Preparation Pass";
2699 void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2700 AU.addPreserved<MachineFunctionAnalysis>();
2701 AU.addRequired<MachineFunctionAnalysis>();
2702 AU.addRequired<MachineDominatorTree>();
2703 AU.addRequired<MachinePostDominatorTree>();
2704 AU.addRequired<MachineLoopInfo>();
2707 //===----------------------------------------------------------------------===//
2711 //===----------------------------------------------------------------------===//
2714 using namespace llvmCFGStruct;
2718 class AMDILCFGPerform : public AMDILCFGStructurizer
2724 AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
2725 virtual const char *getPassName() const;
2726 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2727 bool runOnMachineFunction(MachineFunction &F);
2731 }; //end of class AMDILCFGPerform
2733 char AMDILCFGPerform::ID = 0;
2734 } //end of namespace llvm
2736 AMDILCFGPerform::AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
2737 : AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
2741 const char *AMDILCFGPerform::getPassName() const {
2742 return "AMD IL Control Flow Graph structurizer Pass";
2745 void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2746 AU.addPreserved<MachineFunctionAnalysis>();
2747 AU.addRequired<MachineFunctionAnalysis>();
2748 AU.addRequired<MachineDominatorTree>();
2749 AU.addRequired<MachinePostDominatorTree>();
2750 AU.addRequired<MachineLoopInfo>();
2753 //===----------------------------------------------------------------------===//
2755 // CFGStructTraits<AMDILCFGStructurizer>
2757 //===----------------------------------------------------------------------===//
2759 namespace llvmCFGStruct
2761 // this class is tailor to the AMDIL backend
2763 struct CFGStructTraits<AMDILCFGStructurizer>
2767 static int getBreakNzeroOpcode(int oldOpcode) {
2769 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ);
2771 assert(0 && "internal error");
2776 static int getBreakZeroOpcode(int oldOpcode) {
2778 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ);
2780 assert(0 && "internal error");
2785 static int getBranchNzeroOpcode(int oldOpcode) {
2787 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
2789 assert(0 && "internal error");
2794 static int getBranchZeroOpcode(int oldOpcode) {
2796 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
2798 assert(0 && "internal error");
2803 static int getContinueNzeroOpcode(int oldOpcode)
2806 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ);
2808 assert(0 && "internal error");
2813 static int getContinueZeroOpcode(int oldOpcode) {
2815 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ);
2817 assert(0 && "internal error");
2822 // the explicitly represented branch target is the true branch target
2823 #define getExplicitBranch getTrueBranch
2824 #define setExplicitBranch setTrueBranch
2826 static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2827 return instr->getOperand(0).getMBB();
2830 static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2831 instr->getOperand(0).setMBB(blk);
2834 static MachineBasicBlock *
2835 getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2836 assert(blk->succ_size() == 2);
2837 MachineBasicBlock *trueBranch = getTrueBranch(instr);
2838 MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2839 MachineBasicBlock::succ_iterator iterNext = iter;
2842 return (*iter == trueBranch) ? *iterNext : *iter;
2845 static bool isCondBranch(MachineInstr *instr) {
2846 switch (instr->getOpcode()) {
2847 ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
2855 static bool isUncondBranch(MachineInstr *instr) {
2856 switch (instr->getOpcode()) {
2857 case AMDGPU::BRANCH:
2865 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2866 //get DebugLoc from the first MachineBasicBlock instruction with debug info
2868 for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2869 MachineInstr *instr = &(*iter);
2870 if (instr->getDebugLoc().isUnknown() == false) {
2871 DL = instr->getDebugLoc();
2877 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2878 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2879 MachineInstr *instr = &*iter;
2880 if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2886 // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2888 // BB with backward-edge could have move instructions after the branch
2889 // instruction. Such move instruction "belong to" the loop backward-edge.
2891 static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2892 const AMDILInstrInfo * TII = static_cast<const AMDILInstrInfo *>(
2893 blk->getParent()->getTarget().getInstrInfo());
2895 for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2896 iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2898 MachineInstr *instr = &*iter;
2900 if (isCondBranch(instr) || isUncondBranch(instr)) {
2902 } else if (!TII->isMov(instr->getOpcode())) {
2910 static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2911 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2912 if (iter != blk->rend()) {
2913 MachineInstr *instr = &(*iter);
2914 if (instr->getOpcode() == AMDGPU::RETURN) {
2921 static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2922 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2923 if (iter != blk->rend()) {
2924 MachineInstr *instr = &(*iter);
2925 if (instr->getOpcode() == AMDGPU::CONTINUE) {
2932 static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2933 for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2934 MachineInstr *instr = &(*iter);
2935 if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
2942 static bool isReturnBlock(MachineBasicBlock *blk) {
2943 MachineInstr *instr = getReturnInstr(blk);
2944 bool isReturn = (blk->succ_size() == 0);
2947 } else if (isReturn) {
2949 errs() << "BB" << blk->getNumber()
2950 <<" is return block without RETURN instr\n";
2957 static MachineBasicBlock::iterator
2958 getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2959 assert(instr->getParent() == blk && "instruction doesn't belong to block");
2960 MachineBasicBlock::iterator iter = blk->begin();
2961 MachineBasicBlock::iterator iterEnd = blk->end();
2962 while (&(*iter) != instr && iter != iterEnd) {
2966 assert(iter != iterEnd);
2970 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2971 AMDILCFGStructurizer *passRep) {
2972 return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
2973 } //insertInstrBefore
2975 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2976 AMDILCFGStructurizer *passRep, DebugLoc DL) {
2977 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2978 MachineInstr *newInstr =
2979 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
2981 MachineBasicBlock::iterator res;
2982 if (blk->begin() != blk->end()) {
2983 blk->insert(blk->begin(), newInstr);
2985 blk->push_back(newInstr);
2988 SHOWNEWINSTR(newInstr);
2991 } //insertInstrBefore
2993 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2994 AMDILCFGStructurizer *passRep) {
2995 insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
2998 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2999 AMDILCFGStructurizer *passRep, DebugLoc DL) {
3000 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3001 MachineInstr *newInstr = blk->getParent()
3002 ->CreateMachineInstr(tii->get(newOpcode), DL);
3004 blk->push_back(newInstr);
3005 //assume the instruction doesn't take any reg operand ...
3007 SHOWNEWINSTR(newInstr);
3010 static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
3012 AMDILCFGStructurizer *passRep) {
3013 MachineInstr *oldInstr = &(*instrPos);
3014 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3015 MachineBasicBlock *blk = oldInstr->getParent();
3016 MachineInstr *newInstr =
3017 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3020 blk->insert(instrPos, newInstr);
3021 //assume the instruction doesn't take any reg operand ...
3023 SHOWNEWINSTR(newInstr);
3025 } //insertInstrBefore
3027 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
3029 AMDILCFGStructurizer *passRep,
3031 MachineInstr *oldInstr = &(*instrPos);
3032 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3033 MachineBasicBlock *blk = oldInstr->getParent();
3034 MachineInstr *newInstr =
3035 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3038 blk->insert(instrPos, newInstr);
3039 MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
3042 SHOWNEWINSTR(newInstr);
3043 //erase later oldInstr->eraseFromParent();
3044 } //insertCondBranchBefore
3046 static void insertCondBranchBefore(MachineBasicBlock *blk,
3047 MachineBasicBlock::iterator insertPos,
3049 AMDILCFGStructurizer *passRep,
3052 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3054 MachineInstr *newInstr =
3055 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
3058 blk->insert(insertPos, newInstr);
3059 MachineInstrBuilder(newInstr).addReg(regNum, false);
3061 SHOWNEWINSTR(newInstr);
3062 } //insertCondBranchBefore
3064 static void insertCondBranchEnd(MachineBasicBlock *blk,
3066 AMDILCFGStructurizer *passRep,
3068 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3069 MachineInstr *newInstr =
3070 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
3072 blk->push_back(newInstr);
3073 MachineInstrBuilder(newInstr).addReg(regNum, false);
3075 SHOWNEWINSTR(newInstr);
3076 } //insertCondBranchEnd
3079 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
3080 AMDILCFGStructurizer *passRep,
3081 RegiT regNum, int regVal) {
3082 MachineInstr *oldInstr = &(*instrPos);
3083 const AMDILInstrInfo *tii =
3084 static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
3085 MachineBasicBlock *blk = oldInstr->getParent();
3086 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3088 blk->insert(instrPos, newInstr);
3090 SHOWNEWINSTR(newInstr);
3091 } //insertAssignInstrBefore
3093 static void insertAssignInstrBefore(MachineBasicBlock *blk,
3094 AMDILCFGStructurizer *passRep,
3095 RegiT regNum, int regVal) {
3096 const AMDILInstrInfo *tii =
3097 static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
3099 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3101 if (blk->begin() != blk->end()) {
3102 blk->insert(blk->begin(), newInstr);
3104 blk->push_back(newInstr);
3107 SHOWNEWINSTR(newInstr);
3109 } //insertInstrBefore
3111 static void insertCompareInstrBefore(MachineBasicBlock *blk,
3112 MachineBasicBlock::iterator instrPos,
3113 AMDILCFGStructurizer *passRep,
3114 RegiT dstReg, RegiT src1Reg,
3116 const AMDILInstrInfo *tii =
3117 static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
3118 MachineInstr *newInstr =
3119 blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
3121 MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
3122 MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
3123 MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
3125 blk->insert(instrPos, newInstr);
3126 SHOWNEWINSTR(newInstr);
3128 } //insertCompareInstrBefore
3130 static void cloneSuccessorList(MachineBasicBlock *dstBlk,
3131 MachineBasicBlock *srcBlk) {
3132 for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
3133 iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
3134 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
3136 } //cloneSuccessorList
3138 static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
3139 MachineFunction *func = srcBlk->getParent();
3140 MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
3141 func->push_back(newBlk); //insert to function
3142 //newBlk->setNumber(srcBlk->getNumber());
3143 for (MachineBasicBlock::iterator iter = srcBlk->begin(),
3144 iterEnd = srcBlk->end();
3145 iter != iterEnd; ++iter) {
3146 MachineInstr *instr = func->CloneMachineInstr(iter);
3147 newBlk->push_back(instr);
3152 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
3153 //the AMDIL instruction is not recognized as terminator fix this and retire
3155 static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
3156 MachineBasicBlock *oldBlk,
3157 MachineBasicBlock *newBlk) {
3158 MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
3159 if (branchInstr && isCondBranch(branchInstr) &&
3160 getExplicitBranch(branchInstr) == oldBlk) {
3161 setExplicitBranch(branchInstr, newBlk);
3165 static void wrapup(MachineBasicBlock *entryBlk) {
3166 assert((!entryBlk->getParent()->getJumpTableInfo()
3167 || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
3168 && "found a jump table");
3170 //collect continue right before endloop
3171 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
3172 MachineBasicBlock::iterator pre = entryBlk->begin();
3173 MachineBasicBlock::iterator iterEnd = entryBlk->end();
3174 MachineBasicBlock::iterator iter = pre;
3175 while (iter != iterEnd) {
3176 if (pre->getOpcode() == AMDGPU::CONTINUE
3177 && iter->getOpcode() == AMDGPU::ENDLOOP) {
3178 contInstr.push_back(pre);
3184 //delete continue right before endloop
3185 for (unsigned i = 0; i < contInstr.size(); ++i) {
3186 contInstr[i]->eraseFromParent();
3189 // TODO to fix up jump table so later phase won't be confused. if
3190 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3191 // there isn't such an interface yet. alternatively, replace all the other
3192 // blocks in the jump table with the entryBlk //}
3196 static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) {
3197 return &pass.getAnalysis<MachineDominatorTree>();
3200 static MachinePostDominatorTree*
3201 getPostDominatorTree(AMDILCFGStructurizer &pass) {
3202 return &pass.getAnalysis<MachinePostDominatorTree>();
3205 static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) {
3206 return &pass.getAnalysis<MachineLoopInfo>();
3208 }; // template class CFGStructTraits
3209 } //end of namespace llvm
3211 // createAMDILCFGPreparationPass- Returns a pass
3212 FunctionPass *llvm::createAMDILCFGPreparationPass(TargetMachine &tm
3213 AMDIL_OPT_LEVEL_DECL) {
3214 return new AMDILCFGPrepare(tm AMDIL_OPT_LEVEL_VAR);
3217 bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3218 return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func,
3223 // createAMDILCFGStructurizerPass- Returns a pass
3224 FunctionPass *llvm::createAMDILCFGStructurizerPass(TargetMachine &tm
3225 AMDIL_OPT_LEVEL_DECL) {
3226 return new AMDILCFGPerform(tm AMDIL_OPT_LEVEL_VAR);
3229 bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) {
3230 return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func,
3235 //end of file newline goes below