X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FAMDILCFGStructurizer.cpp;h=d72de276c52a2ca31a20ba4badea9e866c746ad9;hb=88ed6409303676b83800b4d69d2b891b6a39cc9e;hp=9de97b6269940c61e3be9a01cc477ae54a1e8b69;hpb=6b207d3cfa6b7be87ebde25c6c002f776f3d1595;p=oota-llvm.git diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index 9de97b62699..d72de276c52 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -8,17 +8,16 @@ /// \file //==-----------------------------------------------------------------------===// -#define DEBUGME 0 -#define DEBUG_TYPE "structcfg" +#include +#include "AMDGPU.h" #include "AMDGPUInstrInfo.h" -#include "AMDIL.h" +#include "R600InstrInfo.h" +#include "AMDGPUSubtarget.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/DominatorInternals.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -26,11 +25,20 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "structcfg" + +#define DEFAULT_VEC_SLOTS 8 + // TODO: move-begin. //===----------------------------------------------------------------------===// @@ -43,53 +51,43 @@ STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " "matched"); STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " "matched"); -STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " - "pattern matched"); STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " "pattern matched"); -STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " - "matched"); STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); +namespace llvm { + void initializeAMDGPUCFGStructurizerPass(PassRegistry&); +} + //===----------------------------------------------------------------------===// // // Miscellaneous utility for CFGStructurizer. // //===----------------------------------------------------------------------===// -namespace llvmCFGStruct { +namespace { #define SHOWNEWINSTR(i) \ - if (DEBUGME) errs() << "New instr: " << *i << "\n" + DEBUG(dbgs() << "New instr: " << *i << "\n"); #define SHOWNEWBLK(b, msg) \ -if (DEBUGME) { \ - errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ - errs() << "\n"; \ -} +DEBUG( \ + dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + dbgs() << "\n"; \ +); #define SHOWBLK_DETAIL(b, msg) \ -if (DEBUGME) { \ +DEBUG( \ if (b) { \ - errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ - b->print(errs()); \ - errs() << "\n"; \ + dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + b->print(dbgs()); \ + dbgs() << "\n"; \ } \ -} +); #define INVALIDSCCNUM -1 -#define INVALIDREGNUM 0 - -template -void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { - for (typename LoopinfoT::iterator iter = LoopInfo.begin(), - iterEnd = LoopInfo.end(); - iter != iterEnd; ++iter) { - (*iter)->print(OS, 0); - } -} template -void ReverseVector(SmallVector &Src) { +void ReverseVector(SmallVectorImpl &Src) { size_t sz = Src.size(); for (size_t i = 0; i < sz/2; ++i) { NodeT *t = Src[i]; @@ -98,7 +96,7 @@ void ReverseVector(SmallVector &Src) { } } -} //end namespace llvmCFGStruct +} // end anonymous namespace //===----------------------------------------------------------------------===// // @@ -106,43 +104,17 @@ void ReverseVector(SmallVector &Src) { // //===----------------------------------------------------------------------===// -namespace llvmCFGStruct { -template -struct CFGStructTraits { -}; -template -class BlockInformation { -public: - bool isRetired; - int sccNum; - //SmallVector succInstr; - //Instructions defining the corresponding successor. - BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} -}; +namespace { -template -class LandInformation { +class BlockInformation { public: - BlockT *landBlk; - std::set breakInitRegs; //Registers that need to "reg = 0", before - //WHILELOOP(thisloop) init before entering - //thisloop. - std::set contInitRegs; //Registers that need to "reg = 0", after - //WHILELOOP(thisloop) init after entering - //thisloop. - std::set endbranchInitRegs; //Init before entering this loop, at loop - //land block, branch cond on this reg. - std::set breakOnRegs; //registers that need to "if (reg) break - //endif" after ENDLOOP(thisloop) break - //outerLoopOf(thisLoop). - std::set contOnRegs; //registers that need to "if (reg) continue - //endif" after ENDLOOP(thisloop) continue on - //outerLoopOf(thisLoop). - LandInformation() : landBlk(NULL) {} + bool IsRetired; + int SccNum; + BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {} }; -} //end of namespace llvmCFGStruct +} // end anonymous namespace //===----------------------------------------------------------------------===// // @@ -150,1026 +122,1208 @@ public: // //===----------------------------------------------------------------------===// -namespace llvmCFGStruct { -// bixia TODO: port it to BasicBlock, not just MachineBasicBlock. -template -class CFGStructurizer { +namespace { +class AMDGPUCFGStructurizer : public MachineFunctionPass { public: - typedef enum { + typedef SmallVector MBBVector; + typedef std::map MBBInfoMap; + typedef std::map LoopLandInfoMap; + + enum PathToKind { Not_SinglePath = 0, SinglePath_InPath = 1, SinglePath_NotInPath = 2 - } PathToKind; + }; -public: - typedef typename PassT::InstructionType InstrT; - typedef typename PassT::FunctionType FuncT; - typedef typename PassT::DominatortreeType DomTreeT; - typedef typename PassT::PostDominatortreeType PostDomTreeT; - typedef typename PassT::DomTreeNodeType DomTreeNodeT; - typedef typename PassT::LoopinfoType LoopInfoT; - - typedef GraphTraits FuncGTraits; - //typedef FuncGTraits::nodes_iterator BlockIterator; - typedef typename FuncT::iterator BlockIterator; - - typedef typename FuncGTraits::NodeType BlockT; - typedef GraphTraits BlockGTraits; - typedef GraphTraits > InvBlockGTraits; - //typedef BlockGTraits::succ_iterator InstructionIterator; - typedef typename BlockT::iterator InstrIterator; - - typedef CFGStructTraits CFGTraits; - typedef BlockInformation BlockInfo; - typedef std::map BlockInfoMap; - - typedef int RegiT; - typedef typename PassT::LoopType LoopT; - typedef LandInformation LoopLandInfo; - typedef std::map LoopLandInfoMap; - //landing info for loop break - typedef SmallVector BlockTSmallerVector; + static char ID; -public: - CFGStructurizer(); - ~CFGStructurizer(); + AMDGPUCFGStructurizer() : + MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) { + initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "AMDGPU Control Flow Graph structurizer Pass"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } /// Perform the CFG structurization - bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); + bool run(); /// Perform the CFG preparation - bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); + /// This step will remove every unconditionnal/dead jump instructions and make + /// sure all loops have an exit block + bool prepare(); + + bool runOnMachineFunction(MachineFunction &MF) override { + TII = static_cast(MF.getSubtarget().getInstrInfo()); + TRI = &TII->getRegisterInfo(); + DEBUG(MF.dump();); + OrderedBlks.clear(); + Visited.clear(); + FuncRep = &MF; + MLI = &getAnalysis(); + DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); + MDT = &getAnalysis(); + DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr);); + PDT = &getAnalysis(); + DEBUG(PDT->print(dbgs());); + prepare(); + run(); + DEBUG(MF.dump();); + return true; + } -private: - void reversePredicateSetter(typename BlockT::iterator); - void orderBlocks(); - void printOrderedBlocks(llvm::raw_ostream &OS); - int patternMatch(BlockT *CurBlock); - int patternMatchGroup(BlockT *CurBlock); - - int serialPatternMatch(BlockT *CurBlock); - int ifPatternMatch(BlockT *CurBlock); - int switchPatternMatch(BlockT *CurBlock); - int loopendPatternMatch(BlockT *CurBlock); - int loopPatternMatch(BlockT *CurBlock); - - int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); - int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); - //int loopWithoutBreak(BlockT *); - - void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, - BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); - void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, - BlockT *ContBlock, LoopT *contLoop); - bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); - int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock); - int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock); - int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock, BlockT **LandBlockPtr); - void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock, BlockT *LandBlock, - bool Detail = false); - PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, - bool AllowSideEntry = true); - BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, - bool AllowSideEntry = true); - int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); - void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); - - void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, - BlockT *TrueBlock, BlockT *FalseBlock, - BlockT *LandBlock); - void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); - void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, - BlockT *ExitLandBlock, RegiT SetReg); - void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, - RegiT SetReg); - BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, - std::set &ExitBlockSet, - BlockT *ExitLandBlk); - BlockT *addLoopEndbranchBlock(LoopT *LoopRep, - BlockTSmallerVector &ExitingBlocks, - BlockTSmallerVector &ExitBlocks); - BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); - void removeUnconditionalBranch(BlockT *SrcBlock); - void removeRedundantConditionalBranch(BlockT *SrcBlock); - void addDummyExitBlock(SmallVector &RetBlocks); - - void removeSuccessor(BlockT *SrcBlock); - BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); - BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); - - void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, - InstrIterator InsertPos); - - void recordSccnum(BlockT *SrcBlock, int SCCNum); - int getSCCNum(BlockT *srcBlk); - - void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); - bool isRetiredBlock(BlockT *SrcBlock); - bool isActiveLoophead(BlockT *CurBlock); - bool needMigrateBlock(BlockT *Block); - - BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, - BlockTSmallerVector &exitBlocks, - std::set &ExitBlockSet); - void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); - BlockT *getLoopLandBlock(LoopT *LoopRep); - LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); - - void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); - void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); - void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); - void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); - void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); - - bool hasBackEdge(BlockT *curBlock); - unsigned getLoopDepth (LoopT *LoopRep); - int countActiveBlock( - typename SmallVector::const_iterator IterStart, - typename SmallVector::const_iterator IterEnd); - BlockT *findNearestCommonPostDom(std::set&); - BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); +protected: + MachineDominatorTree *MDT; + MachinePostDominatorTree *PDT; + MachineLoopInfo *MLI; + const R600InstrInfo *TII; + const AMDGPURegisterInfo *TRI; + + // PRINT FUNCTIONS + /// Print the ordered Blocks. + void printOrderedBlocks() const { + size_t i = 0; + for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), + iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { + dbgs() << "BB" << (*iterBlk)->getNumber(); + dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; + if (i != 0 && i % 10 == 0) { + dbgs() << "\n"; + } else { + dbgs() << " "; + } + } + } + static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { + for (MachineLoop::iterator iter = LoopInfo.begin(), + iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { + (*iter)->print(dbgs(), 0); + } + } + + // UTILITY FUNCTIONS + int getSCCNum(MachineBasicBlock *MBB) const; + MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; + bool hasBackEdge(MachineBasicBlock *MBB) const; + static unsigned getLoopDepth(MachineLoop *LoopRep); + bool isRetiredBlock(MachineBasicBlock *MBB) const; + bool isActiveLoophead(MachineBasicBlock *MBB) const; + PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, + bool AllowSideEntry = true) const; + int countActiveBlock(MBBVector::const_iterator It, + MBBVector::const_iterator E) const; + bool needMigrateBlock(MachineBasicBlock *MBB) const; + + // Utility Functions + void reversePredicateSetter(MachineBasicBlock::iterator I); + /// Compute the reversed DFS post order of Blocks + void orderBlocks(MachineFunction *MF); + + // Function originally from CFGStructTraits + void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, + DebugLoc DL = DebugLoc()); + MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, + DebugLoc DL = DebugLoc()); + MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); + void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, + DebugLoc DL); + void insertCondBranchBefore(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, int NewOpcode, int RegNum, + DebugLoc DL); + void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum); + static int getBranchNzeroOpcode(int OldOpcode); + static int getBranchZeroOpcode(int OldOpcode); + static int getContinueNzeroOpcode(int OldOpcode); + static int getContinueZeroOpcode(int OldOpcode); + static MachineBasicBlock *getTrueBranch(MachineInstr *MI); + static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); + static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, + MachineInstr *MI); + static bool isCondBranch(MachineInstr *MI); + static bool isUncondBranch(MachineInstr *MI); + static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); + static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); + /// The correct naming for this is getPossibleLoopendBlockBranchInstr. + /// + /// BB with backward-edge could have move instructions after the branch + /// instruction. Such move instruction "belong to" the loop backward-edge. + MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); + static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); + static MachineInstr *getContinueInstr(MachineBasicBlock *MBB); + static bool isReturnBlock(MachineBasicBlock *MBB); + static void cloneSuccessorList(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB) ; + static MachineBasicBlock *clone(MachineBasicBlock *MBB); + /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose + /// because the AMDGPU instruction is not recognized as terminator fix this + /// and retire this routine + void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, + MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); + static void wrapup(MachineBasicBlock *MBB); + + + int patternMatch(MachineBasicBlock *MBB); + int patternMatchGroup(MachineBasicBlock *MBB); + int serialPatternMatch(MachineBasicBlock *MBB); + int ifPatternMatch(MachineBasicBlock *MBB); + int loopendPatternMatch(); + int mergeLoop(MachineLoop *LoopRep); + int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader); + + void handleLoopcontBlock(MachineBasicBlock *ContingMBB, + MachineLoop *ContingLoop, MachineBasicBlock *ContMBB, + MachineLoop *ContLoop); + /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in + /// the same loop with LoopLandInfo without explicitly keeping track of + /// loopContBlks and loopBreakBlks, this is a method to get the information. + bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, + MachineBasicBlock *Src2MBB); + int handleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); + int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); + int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, + MachineBasicBlock **LandMBBPtr); + void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, + MachineBasicBlock *LandMBB, bool Detail = false); + int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); + void mergeSerialBlock(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB); + + void mergeIfthenelseBlock(MachineInstr *BranchMI, + MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); + void mergeLooplandBlock(MachineBasicBlock *DstMBB, + MachineBasicBlock *LandMBB); + void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, + MachineBasicBlock *LandMBB); + void settleLoopcontBlock(MachineBasicBlock *ContingMBB, + MachineBasicBlock *ContMBB); + /// normalizeInfiniteLoopExit change + /// B1: + /// uncond_br LoopHeader + /// + /// to + /// B1: + /// cond_br 1 LoopHeader dummyExit + /// and return the newly added dummy exit block + MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); + void removeUnconditionalBranch(MachineBasicBlock *MBB); + /// Remove duplicate branches instructions in a block. + /// For instance + /// B0: + /// cond_br X B1 B2 + /// cond_br X B1 B2 + /// is transformed to + /// B0: + /// cond_br X B1 B2 + void removeRedundantConditionalBranch(MachineBasicBlock *MBB); + void addDummyExitBlock(SmallVectorImpl &RetMBB); + void removeSuccessor(MachineBasicBlock *MBB); + MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, + MachineBasicBlock *PredMBB); + void migrateInstruction(MachineBasicBlock *SrcMBB, + MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); + void recordSccnum(MachineBasicBlock *MBB, int SCCNum); + void retireBlock(MachineBasicBlock *MBB); + void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr); + + MachineBasicBlock *findNearestCommonPostDom(std::set&); + /// This is work around solution for findNearestCommonDominator not available + /// to post dom a proper fix should go to Dominators.h. + MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2); private: - DomTreeT *domTree; - PostDomTreeT *postDomTree; - LoopInfoT *loopInfo; - PassT *passRep; - FuncT *funcRep; - - BlockInfoMap blockInfoMap; - LoopLandInfoMap loopLandInfoMap; - SmallVector orderedBlks; - const AMDGPURegisterInfo *TRI; + MBBInfoMap BlockInfoMap; + LoopLandInfoMap LLInfoMap; + std::map Visited; + MachineFunction *FuncRep; + SmallVector OrderedBlks; +}; + +int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { + MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); + if (It == BlockInfoMap.end()) + return INVALIDSCCNUM; + return (*It).second->SccNum; +} + +MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) + const { + LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); + if (It == LLInfoMap.end()) + return nullptr; + return (*It).second; +} + +bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { + MachineLoop *LoopRep = MLI->getLoopFor(MBB); + if (!LoopRep) + return false; + MachineBasicBlock *LoopHeader = LoopRep->getHeader(); + return MBB->isSuccessor(LoopHeader); +} -}; //template class CFGStructurizer +unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) { + return LoopRep ? LoopRep->getLoopDepth() : 0; +} -template CFGStructurizer::CFGStructurizer() - : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { +bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { + MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); + if (It == BlockInfoMap.end()) + return false; + return (*It).second->IsRetired; } -template CFGStructurizer::~CFGStructurizer() { - for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), - E = blockInfoMap.end(); I != E; ++I) { - delete I->second; +bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { + MachineLoop *LoopRep = MLI->getLoopFor(MBB); + while (LoopRep && LoopRep->getHeader() == MBB) { + MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); + if(!LoopLand) + return true; + if (!isRetiredBlock(LoopLand)) + return true; + LoopRep = LoopRep->getParentLoop(); + } + return false; +} +AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, + bool AllowSideEntry) const { + assert(DstMBB); + if (SrcMBB == DstMBB) + return SinglePath_InPath; + while (SrcMBB && SrcMBB->succ_size() == 1) { + SrcMBB = *SrcMBB->succ_begin(); + if (SrcMBB == DstMBB) + return SinglePath_InPath; + if (!AllowSideEntry && SrcMBB->pred_size() > 1) + return Not_SinglePath; } + if (SrcMBB && SrcMBB->succ_size()==0) + return SinglePath_NotInPath; + return Not_SinglePath; } -template -bool CFGStructurizer::prepare(FuncT &func, PassT &pass, - const AMDGPURegisterInfo * tri) { - passRep = &pass; - funcRep = &func; - TRI = tri; +int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, + MBBVector::const_iterator E) const { + int Count = 0; + while (It != E) { + if (!isRetiredBlock(*It)) + ++Count; + ++It; + } + return Count; +} - bool changed = false; +bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { + unsigned BlockSizeThreshold = 30; + unsigned CloneInstrThreshold = 100; + bool MultiplePreds = MBB && (MBB->pred_size() > 1); - //FIXME: if not reducible flow graph, make it so ??? + if(!MultiplePreds) + return false; + unsigned BlkSize = MBB->size(); + return ((BlkSize > BlockSizeThreshold) && + (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); +} - if (DEBUGME) { - errs() << "AMDGPUCFGStructurizer::prepare\n"; +void AMDGPUCFGStructurizer::reversePredicateSetter( + MachineBasicBlock::iterator I) { + while (I--) { + if (I->getOpcode() == AMDGPU::PRED_X) { + switch (static_cast(I)->getOperand(2).getImm()) { + case OPCODE_IS_ZERO_INT: + static_cast(I)->getOperand(2) + .setImm(OPCODE_IS_NOT_ZERO_INT); + return; + case OPCODE_IS_NOT_ZERO_INT: + static_cast(I)->getOperand(2) + .setImm(OPCODE_IS_ZERO_INT); + return; + case OPCODE_IS_ZERO: + static_cast(I)->getOperand(2) + .setImm(OPCODE_IS_NOT_ZERO); + return; + case OPCODE_IS_NOT_ZERO: + static_cast(I)->getOperand(2) + .setImm(OPCODE_IS_ZERO); + return; + default: + llvm_unreachable("PRED_X Opcode invalid!"); + } + } } +} + +void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, + int NewOpcode, DebugLoc DL) { + MachineInstr *MI = MBB->getParent() + ->CreateMachineInstr(TII->get(NewOpcode), DL); + MBB->push_back(MI); + //assume the instruction doesn't take any reg operand ... + SHOWNEWINSTR(MI); +} + +MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, + int NewOpcode, DebugLoc DL) { + MachineInstr *MI = + MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); + if (MBB->begin() != MBB->end()) + MBB->insert(MBB->begin(), MI); + else + MBB->push_back(MI); + SHOWNEWINSTR(MI); + return MI; +} + +MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( + MachineBasicBlock::iterator I, int NewOpcode) { + MachineInstr *OldMI = &(*I); + MachineBasicBlock *MBB = OldMI->getParent(); + MachineInstr *NewMBB = + MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); + MBB->insert(I, NewMBB); + //assume the instruction doesn't take any reg operand ... + SHOWNEWINSTR(NewMBB); + return NewMBB; +} + +void AMDGPUCFGStructurizer::insertCondBranchBefore( + MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) { + MachineInstr *OldMI = &(*I); + MachineBasicBlock *MBB = OldMI->getParent(); + MachineFunction *MF = MBB->getParent(); + MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); + MBB->insert(I, NewMI); + MachineInstrBuilder MIB(*MF, NewMI); + MIB.addReg(OldMI->getOperand(1).getReg(), false); + SHOWNEWINSTR(NewMI); + //erase later oldInstr->eraseFromParent(); +} + +void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk, + MachineBasicBlock::iterator I, int NewOpcode, int RegNum, + DebugLoc DL) { + MachineFunction *MF = blk->getParent(); + MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); + //insert before + blk->insert(I, NewInstr); + MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); + SHOWNEWINSTR(NewInstr); +} + +void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB, + int NewOpcode, int RegNum) { + MachineFunction *MF = MBB->getParent(); + MachineInstr *NewInstr = + MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); + MBB->push_back(NewInstr); + MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); + SHOWNEWINSTR(NewInstr); +} - loopInfo = CFGTraits::getLoopInfo(pass); - if (DEBUGME) { - errs() << "LoopInfo:\n"; - PrintLoopinfo(*loopInfo, errs()); +int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case AMDGPU::JUMP_COND: + case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; + case AMDGPU::BRANCH_COND_i32: + case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; + default: llvm_unreachable("internal error"); } + return -1; +} - orderBlocks(); - if (DEBUGME) { - errs() << "Ordered blocks:\n"; - printOrderedBlocks(errs()); +int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case AMDGPU::JUMP_COND: + case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; + case AMDGPU::BRANCH_COND_i32: + case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; + default: llvm_unreachable("internal error"); } + return -1; +} - SmallVector retBlks; - - for (typename LoopInfoT::iterator iter = loopInfo->begin(), - iterEnd = loopInfo->end(); - iter != iterEnd; ++iter) { - LoopT* loopRep = (*iter); - BlockTSmallerVector exitingBlks; - loopRep->getExitingBlocks(exitingBlks); - - if (exitingBlks.size() == 0) { - BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); - if (dummyExitBlk != NULL) - retBlks.push_back(dummyExitBlk); - } +int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case AMDGPU::JUMP_COND: + case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; + default: llvm_unreachable("internal error"); + }; + return -1; +} + +int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case AMDGPU::JUMP_COND: + case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; + default: llvm_unreachable("internal error"); } + return -1; +} - // Remove unconditional branch instr. - // Add dummy exit block iff there are multiple returns. +MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { + return MI->getOperand(0).getMBB(); +} + +void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, + MachineBasicBlock *MBB) { + MI->getOperand(0).setMBB(MBB); +} + +MachineBasicBlock * +AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, + MachineInstr *MI) { + assert(MBB->succ_size() == 2); + MachineBasicBlock *TrueBranch = getTrueBranch(MI); + MachineBasicBlock::succ_iterator It = MBB->succ_begin(); + MachineBasicBlock::succ_iterator Next = It; + ++Next; + return (*It == TrueBranch) ? *Next : *It; +} + +bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { + switch (MI->getOpcode()) { + case AMDGPU::JUMP_COND: + case AMDGPU::BRANCH_COND_i32: + case AMDGPU::BRANCH_COND_f32: return true; + default: + return false; + } + return false; +} - for (typename SmallVector::const_iterator - iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); - iterBlk != iterEndBlk; - ++iterBlk) { - BlockT *curBlk = *iterBlk; - removeUnconditionalBranch(curBlk); - removeRedundantConditionalBranch(curBlk); - if (CFGTraits::isReturnBlock(curBlk)) { - retBlks.push_back(curBlk); +bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { + switch (MI->getOpcode()) { + case AMDGPU::JUMP: + case AMDGPU::BRANCH: + return true; + default: + return false; + } + return false; +} + +DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { + //get DebugLoc from the first MachineBasicBlock instruction with debug info + DebugLoc DL; + for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); + ++It) { + MachineInstr *instr = &(*It); + if (instr->getDebugLoc().isUnknown() == false) + DL = instr->getDebugLoc(); + } + return DL; +} + +MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( + MachineBasicBlock *MBB) { + MachineBasicBlock::reverse_iterator It = MBB->rbegin(); + MachineInstr *MI = &*It; + if (MI && (isCondBranch(MI) || isUncondBranch(MI))) + return MI; + return nullptr; +} + +MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( + MachineBasicBlock *MBB) { + for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); + It != E; ++It) { + // FIXME: Simplify + MachineInstr *MI = &*It; + if (MI) { + if (isCondBranch(MI) || isUncondBranch(MI)) + return MI; + else if (!TII->isMov(MI->getOpcode())) + break; } - assert(curBlk->succ_size() <= 2); - } //for + } + return nullptr; +} + +MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { + MachineBasicBlock::reverse_iterator It = MBB->rbegin(); + if (It != MBB->rend()) { + MachineInstr *instr = &(*It); + if (instr->getOpcode() == AMDGPU::RETURN) + return instr; + } + return nullptr; +} - if (retBlks.size() >= 2) { - addDummyExitBlock(retBlks); - changed = true; +MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) { + MachineBasicBlock::reverse_iterator It = MBB->rbegin(); + if (It != MBB->rend()) { + MachineInstr *MI = &(*It); + if (MI->getOpcode() == AMDGPU::CONTINUE) + return MI; } + return nullptr; +} - return changed; -} //CFGStructurizer::prepare +bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { + MachineInstr *MI = getReturnInstr(MBB); + bool IsReturn = (MBB->succ_size() == 0); + if (MI) + assert(IsReturn); + else if (IsReturn) + DEBUG( + dbgs() << "BB" << MBB->getNumber() + <<" is return block without RETURN instr\n";); + return IsReturn; +} -template -bool CFGStructurizer::run(FuncT &func, PassT &pass, - const AMDGPURegisterInfo * tri) { - passRep = &pass; - funcRep = &func; - TRI = tri; +void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB) { + for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), + iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) + DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of +} - //Assume reducible CFG... - if (DEBUGME) { - errs() << "AMDGPUCFGStructurizer::run\n"; - func.viewCFG(); +MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { + MachineFunction *Func = MBB->getParent(); + MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); + Func->push_back(NewMBB); //insert to function + for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end(); + It != E; ++It) { + MachineInstr *MI = Func->CloneMachineInstr(It); + NewMBB->push_back(MI); } + return NewMBB; +} + +void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( + MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, + MachineBasicBlock *NewBlk) { + MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); + if (BranchMI && isCondBranch(BranchMI) && + getTrueBranch(BranchMI) == OldMBB) + setTrueBranch(BranchMI, NewBlk); +} + +void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { + assert((!MBB->getParent()->getJumpTableInfo() + || MBB->getParent()->getJumpTableInfo()->isEmpty()) + && "found a jump table"); + + //collect continue right before endloop + SmallVector ContInstr; + MachineBasicBlock::iterator Pre = MBB->begin(); + MachineBasicBlock::iterator E = MBB->end(); + MachineBasicBlock::iterator It = Pre; + while (It != E) { + if (Pre->getOpcode() == AMDGPU::CONTINUE + && It->getOpcode() == AMDGPU::ENDLOOP) + ContInstr.push_back(Pre); + Pre = It; + ++It; + } + + //delete continue right before endloop + for (unsigned i = 0; i < ContInstr.size(); ++i) + ContInstr[i]->eraseFromParent(); + + // TODO to fix up jump table so later phase won't be confused. if + // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but + // there isn't such an interface yet. alternatively, replace all the other + // blocks in the jump table with the entryBlk //} + +} + + +bool AMDGPUCFGStructurizer::prepare() { + bool Changed = false; + + //FIXME: if not reducible flow graph, make it so ??? - domTree = CFGTraits::getDominatorTree(pass); - if (DEBUGME) { - domTree->print(errs(), (const llvm::Module*)0); + DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); + + orderBlocks(FuncRep); + + SmallVector RetBlks; + + // Add an ExitBlk to loop that don't have one + for (MachineLoopInfo::iterator It = MLI->begin(), + E = MLI->end(); It != E; ++It) { + MachineLoop *LoopRep = (*It); + MBBVector ExitingMBBs; + LoopRep->getExitingBlocks(ExitingMBBs); + + if (ExitingMBBs.size() == 0) { + MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); + if (DummyExitBlk) + RetBlks.push_back(DummyExitBlk); + } } - postDomTree = CFGTraits::getPostDominatorTree(pass); - if (DEBUGME) { - postDomTree->print(errs()); + // Remove unconditional branch instr. + // Add dummy exit block iff there are multiple returns. + for (SmallVectorImpl::const_iterator + It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { + MachineBasicBlock *MBB = *It; + removeUnconditionalBranch(MBB); + removeRedundantConditionalBranch(MBB); + if (isReturnBlock(MBB)) { + RetBlks.push_back(MBB); + } + assert(MBB->succ_size() <= 2); } - loopInfo = CFGTraits::getLoopInfo(pass); - if (DEBUGME) { - errs() << "LoopInfo:\n"; - PrintLoopinfo(*loopInfo, errs()); + if (RetBlks.size() >= 2) { + addDummyExitBlock(RetBlks); + Changed = true; } - orderBlocks(); + return Changed; +} + +bool AMDGPUCFGStructurizer::run() { + + //Assume reducible CFG... + DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); + #ifdef STRESSTEST //Use the worse block ordering to test the algorithm. ReverseVector(orderedBlks); #endif - if (DEBUGME) { - errs() << "Ordered blocks:\n"; - printOrderedBlocks(errs()); - } - int numIter = 0; - bool finish = false; - BlockT *curBlk; - bool makeProgress = false; - int numRemainedBlk = countActiveBlock(orderedBlks.begin(), - orderedBlks.end()); + DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); + int NumIter = 0; + bool Finish = false; + MachineBasicBlock *MBB; + bool MakeProgress = false; + int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), + OrderedBlks.end()); do { - ++numIter; - if (DEBUGME) { - errs() << "numIter = " << numIter - << ", numRemaintedBlk = " << numRemainedBlk << "\n"; - } - - typename SmallVector::const_iterator - iterBlk = orderedBlks.begin(); - typename SmallVector::const_iterator - iterBlkEnd = orderedBlks.end(); - - typename SmallVector::const_iterator - sccBeginIter = iterBlk; - BlockT *sccBeginBlk = NULL; - int sccNumBlk = 0; // The number of active blocks, init to a + ++NumIter; + DEBUG( + dbgs() << "numIter = " << NumIter + << ", numRemaintedBlk = " << NumRemainedBlk << "\n"; + ); + + SmallVectorImpl::const_iterator It = + OrderedBlks.begin(); + SmallVectorImpl::const_iterator E = + OrderedBlks.end(); + + SmallVectorImpl::const_iterator SccBeginIter = + It; + MachineBasicBlock *SccBeginMBB = nullptr; + int SccNumBlk = 0; // The number of active blocks, init to a // maximum possible number. - int sccNumIter; // Number of iteration in this SCC. - - while (iterBlk != iterBlkEnd) { - curBlk = *iterBlk; - - if (sccBeginBlk == NULL) { - sccBeginIter = iterBlk; - sccBeginBlk = curBlk; - sccNumIter = 0; - sccNumBlk = numRemainedBlk; // Init to maximum possible number. - if (DEBUGME) { - errs() << "start processing SCC" << getSCCNum(sccBeginBlk); - errs() << "\n"; - } + int SccNumIter; // Number of iteration in this SCC. + + while (It != E) { + MBB = *It; + + if (!SccBeginMBB) { + SccBeginIter = It; + SccBeginMBB = MBB; + SccNumIter = 0; + SccNumBlk = NumRemainedBlk; // Init to maximum possible number. + DEBUG( + dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); + dbgs() << "\n"; + ); } - if (!isRetiredBlock(curBlk)) { - patternMatch(curBlk); - } + if (!isRetiredBlock(MBB)) + patternMatch(MBB); - ++iterBlk; + ++It; - bool contNextScc = true; - if (iterBlk == iterBlkEnd - || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { + bool ContNextScc = true; + if (It == E + || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { // Just finish one scc. - ++sccNumIter; - int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); - if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { - if (DEBUGME) { - errs() << "Can't reduce SCC " << getSCCNum(curBlk) - << ", sccNumIter = " << sccNumIter; - errs() << "doesn't make any progress\n"; - } - contNextScc = true; - } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { - sccNumBlk = sccRemainedNumBlk; - iterBlk = sccBeginIter; - contNextScc = false; - if (DEBUGME) { - errs() << "repeat processing SCC" << getSCCNum(curBlk) - << "sccNumIter = " << sccNumIter << "\n"; - func.viewCFG(); - } + ++SccNumIter; + int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); + if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { + DEBUG( + dbgs() << "Can't reduce SCC " << getSCCNum(MBB) + << ", sccNumIter = " << SccNumIter; + dbgs() << "doesn't make any progress\n"; + ); + ContNextScc = true; + } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { + SccNumBlk = sccRemainedNumBlk; + It = SccBeginIter; + ContNextScc = false; + DEBUG( + dbgs() << "repeat processing SCC" << getSCCNum(MBB) + << "sccNumIter = " << SccNumIter << '\n'; + ); } else { // Finish the current scc. - contNextScc = true; + ContNextScc = true; } } else { // Continue on next component in the current scc. - contNextScc = false; + ContNextScc = false; } - if (contNextScc) { - sccBeginBlk = NULL; - } + if (ContNextScc) + SccBeginMBB = nullptr; } //while, "one iteration" over the function. - BlockT *entryBlk = FuncGTraits::nodes_begin(&func); - if (entryBlk->succ_size() == 0) { - finish = true; - if (DEBUGME) { - errs() << "Reduce to one block\n"; - } + MachineBasicBlock *EntryMBB = + GraphTraits::nodes_begin(FuncRep); + if (EntryMBB->succ_size() == 0) { + Finish = true; + DEBUG( + dbgs() << "Reduce to one block\n"; + ); } else { - int newnumRemainedBlk - = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); + int NewnumRemainedBlk + = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); // consider cloned blocks ?? - if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { - makeProgress = true; - numRemainedBlk = newnumRemainedBlk; + if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { + MakeProgress = true; + NumRemainedBlk = NewnumRemainedBlk; } else { - makeProgress = false; - if (DEBUGME) { - errs() << "No progress\n"; - } + MakeProgress = false; + DEBUG( + dbgs() << "No progress\n"; + ); } } - } while (!finish && makeProgress); + } while (!Finish && MakeProgress); // Misc wrap up to maintain the consistency of the Function representation. - CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); + wrapup(GraphTraits::nodes_begin(FuncRep)); // Detach retired Block, release memory. - for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), - iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { - if ((*iterMap).second && (*iterMap).second->isRetired) { - assert(((*iterMap).first)->getNumber() != -1); - if (DEBUGME) { - errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; - } - (*iterMap).first->eraseFromParent(); //Remove from the parent Function. + for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); + It != E; ++It) { + if ((*It).second && (*It).second->IsRetired) { + assert(((*It).first)->getNumber() != -1); + DEBUG( + dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n"; + ); + (*It).first->eraseFromParent(); //Remove from the parent Function. } - delete (*iterMap).second; + delete (*It).second; } - blockInfoMap.clear(); + BlockInfoMap.clear(); + LLInfoMap.clear(); - // clear loopLandInfoMap - for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), - iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { - delete (*iterMap).second; + if (!Finish) { + DEBUG(FuncRep->viewCFG()); + llvm_unreachable("IRREDUCIBLE_CFG"); } - loopLandInfoMap.clear(); - if (DEBUGME) { - func.viewCFG(); - } + return true; +} - if (!finish) { - assert(!"IRREDUCIBL_CF"); - } - return true; -} //CFGStructurizer::run - -/// Print the ordered Blocks. -/// -template -void CFGStructurizer::printOrderedBlocks(llvm::raw_ostream &os) { - size_t i = 0; - for (typename SmallVector::const_iterator - iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); - iterBlk != iterBlkEnd; - ++iterBlk, ++i) { - os << "BB" << (*iterBlk)->getNumber(); - os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; - if (i != 0 && i % 10 == 0) { - os << "\n"; - } else { - os << " "; - } - } -} //printOrderedBlocks - -/// Compute the reversed DFS post order of Blocks -/// -template void CFGStructurizer::orderBlocks() { - int sccNum = 0; - BlockT *bb; - for (scc_iterator sccIter = scc_begin(funcRep), - sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { - std::vector &sccNext = *sccIter; - for (typename std::vector::const_iterator - blockIter = sccNext.begin(), blockEnd = sccNext.end(); + +void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { + int SccNum = 0; + MachineBasicBlock *MBB; + for (scc_iterator It = scc_begin(MF); !It.isAtEnd(); + ++It, ++SccNum) { + const std::vector &SccNext = *It; + for (std::vector::const_iterator + blockIter = SccNext.begin(), blockEnd = SccNext.end(); blockIter != blockEnd; ++blockIter) { - bb = *blockIter; - orderedBlks.push_back(bb); - recordSccnum(bb, sccNum); + MBB = *blockIter; + OrderedBlks.push_back(MBB); + recordSccnum(MBB, SccNum); } } //walk through all the block in func to check for unreachable - for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), - blockEnd1 = FuncGTraits::nodes_end(funcRep); - blockIter1 != blockEnd1; ++blockIter1) { - BlockT *bb = &(*blockIter1); - sccNum = getSCCNum(bb); - if (sccNum == INVALIDSCCNUM) { - errs() << "unreachable block BB" << bb->getNumber() << "\n"; - } + typedef GraphTraits GTM; + MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF); + for (; It != E; ++It) { + MachineBasicBlock *MBB = &(*It); + SccNum = getSCCNum(MBB); + if (SccNum == INVALIDSCCNUM) + dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; } -} //orderBlocks +} -template int CFGStructurizer::patternMatch(BlockT *curBlk) { - int numMatch = 0; - int curMatch; +int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { + int NumMatch = 0; + int CurMatch; - if (DEBUGME) { - errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; - } + DEBUG( + dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n"; + ); - while ((curMatch = patternMatchGroup(curBlk)) > 0) { - numMatch += curMatch; - } + while ((CurMatch = patternMatchGroup(MBB)) > 0) + NumMatch += CurMatch; + + DEBUG( + dbgs() << "End patternMatch BB" << MBB->getNumber() + << ", numMatch = " << NumMatch << "\n"; + ); + + return NumMatch; +} + +int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { + int NumMatch = 0; + NumMatch += loopendPatternMatch(); + NumMatch += serialPatternMatch(MBB); + NumMatch += ifPatternMatch(MBB); + return NumMatch; +} - if (DEBUGME) { - errs() << "End patternMatch BB" << curBlk->getNumber() - << ", numMatch = " << numMatch << "\n"; - } - return numMatch; -} //patternMatch - -template -int CFGStructurizer::patternMatchGroup(BlockT *curBlk) { - int numMatch = 0; - numMatch += serialPatternMatch(curBlk); - numMatch += ifPatternMatch(curBlk); - numMatch += loopendPatternMatch(curBlk); - numMatch += loopPatternMatch(curBlk); - return numMatch; -}//patternMatchGroup - -template -int CFGStructurizer::serialPatternMatch(BlockT *curBlk) { - if (curBlk->succ_size() != 1) { +int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { + if (MBB->succ_size() != 1) return 0; - } - BlockT *childBlk = *curBlk->succ_begin(); - if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { + MachineBasicBlock *childBlk = *MBB->succ_begin(); + if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) return 0; - } - mergeSerialBlock(curBlk, childBlk); + mergeSerialBlock(MBB, childBlk); ++numSerialPatternMatch; return 1; -} //serialPatternMatch +} -template -int CFGStructurizer::ifPatternMatch(BlockT *curBlk) { +int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { //two edges - if (curBlk->succ_size() != 2) { + if (MBB->succ_size() != 2) return 0; - } - - if (hasBackEdge(curBlk)) { + if (hasBackEdge(MBB)) return 0; - } - - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); - if (branchInstr == NULL) { + MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); + if (!BranchMI) return 0; - } - assert(CFGTraits::isCondBranch(branchInstr)); + assert(isCondBranch(BranchMI)); + int NumMatch = 0; - BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); - BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); - BlockT *landBlk; - int cloned = 0; + MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); + NumMatch += serialPatternMatch(TrueMBB); + NumMatch += ifPatternMatch(TrueMBB); + MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); + NumMatch += serialPatternMatch(FalseMBB); + NumMatch += ifPatternMatch(FalseMBB); + MachineBasicBlock *LandBlk; + int Cloned = 0; + assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); // TODO: Simplify - if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 - && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { - landBlk = *trueBlk->succ_begin(); - } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { - landBlk = NULL; - } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { - landBlk = falseBlk; - falseBlk = NULL; - } else if (falseBlk->succ_size() == 1 - && *falseBlk->succ_begin() == trueBlk) { - landBlk = trueBlk; - trueBlk = NULL; - } else if (falseBlk->succ_size() == 1 - && isSameloopDetachedContbreak(trueBlk, falseBlk)) { - landBlk = *falseBlk->succ_begin(); - } else if (trueBlk->succ_size() == 1 - && isSameloopDetachedContbreak(falseBlk, trueBlk)) { - landBlk = *trueBlk->succ_begin(); + if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 + && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { + // Diamond pattern + LandBlk = *TrueMBB->succ_begin(); + } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { + // Triangle pattern, false is empty + LandBlk = FalseMBB; + FalseMBB = nullptr; + } else if (FalseMBB->succ_size() == 1 + && *FalseMBB->succ_begin() == TrueMBB) { + // Triangle pattern, true is empty + // We reverse the predicate to make a triangle, empty false pattern; + std::swap(TrueMBB, FalseMBB); + reversePredicateSetter(MBB->end()); + LandBlk = FalseMBB; + FalseMBB = nullptr; + } else if (FalseMBB->succ_size() == 1 + && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { + LandBlk = *FalseMBB->succ_begin(); + } else if (TrueMBB->succ_size() == 1 + && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { + LandBlk = *TrueMBB->succ_begin(); } else { - return handleJumpintoIf(curBlk, trueBlk, falseBlk); + return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); } // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the // new BB created for landBlk==NULL may introduce new challenge to the // reduction process. - if (landBlk != NULL && - ((trueBlk && trueBlk->pred_size() > 1) - || (falseBlk && falseBlk->pred_size() > 1))) { - cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); + if (LandBlk && + ((TrueMBB && TrueMBB->pred_size() > 1) + || (FalseMBB && FalseMBB->pred_size() > 1))) { + Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); } - if (trueBlk && trueBlk->pred_size() > 1) { - trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); - ++cloned; + if (TrueMBB && TrueMBB->pred_size() > 1) { + TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); + ++Cloned; } - if (falseBlk && falseBlk->pred_size() > 1) { - falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); - ++cloned; + if (FalseMBB && FalseMBB->pred_size() > 1) { + FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); + ++Cloned; } - mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); + mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); ++numIfPatternMatch; - numClonedBlock += cloned; + numClonedBlock += Cloned; - return 1 + cloned; -} //ifPatternMatch - -template -int CFGStructurizer::switchPatternMatch(BlockT *curBlk) { - return 0; -} //switchPatternMatch + return 1 + Cloned + NumMatch; +} -template -int CFGStructurizer::loopendPatternMatch(BlockT *curBlk) { - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - typename std::vector nestedLoops; - while (loopRep) { - nestedLoops.push_back(loopRep); - loopRep = loopRep->getParentLoop(); - } +int AMDGPUCFGStructurizer::loopendPatternMatch() { + std::deque NestedLoops; + for (auto &It: *MLI) + for (MachineLoop *ML : depth_first(It)) + NestedLoops.push_front(ML); - if (nestedLoops.size() == 0) { + if (NestedLoops.size() == 0) return 0; - } - // Process nested loop outside->inside, so "continue" to a outside loop won't - // be mistaken as "break" of the current loop. - int num = 0; - for (typename std::vector::reverse_iterator - iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); - iter != iterEnd; ++iter) { - loopRep = *iter; - - if (getLoopLandBlock(loopRep) != NULL) { + // Process nested loop outside->inside (we did push_front), + // so "continue" to a outside loop won't be mistaken as "break" + // of the current loop. + int Num = 0; + for (MachineLoop *ExaminedLoop : NestedLoops) { + if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) continue; - } - - BlockT *loopHeader = loopRep->getHeader(); - - int numBreak = loopbreakPatternMatch(loopRep, loopHeader); - - if (numBreak == -1) { + DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); + int NumBreak = mergeLoop(ExaminedLoop); + if (NumBreak == -1) break; - } - - int numCont = loopcontPatternMatch(loopRep, loopHeader); - num += numBreak + numCont; + Num += NumBreak; } + return Num; +} - return num; -} //loopendPatternMatch - -template -int CFGStructurizer::loopPatternMatch(BlockT *curBlk) { - if (curBlk->succ_size() != 0) { - return 0; - } +int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { + MachineBasicBlock *LoopHeader = LoopRep->getHeader(); + MBBVector ExitingMBBs; + LoopRep->getExitingBlocks(ExitingMBBs); + assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); + DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";); + // We assume a single ExitBlk + MBBVector ExitBlks; + LoopRep->getExitBlocks(ExitBlks); + SmallPtrSet ExitBlkSet; + for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) + ExitBlkSet.insert(ExitBlks[i]); + assert(ExitBlkSet.size() == 1); + MachineBasicBlock *ExitBlk = *ExitBlks.begin(); + assert(ExitBlk && "Loop has several exit block"); + MBBVector LatchBlks; + typedef GraphTraits > InvMBBTraits; + InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader), + PE = InvMBBTraits::child_end(LoopHeader); + for (; PI != PE; PI++) { + if (LoopRep->contains(*PI)) + LatchBlks.push_back(*PI); + } + + for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) + mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); + for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) + settleLoopcontBlock(LatchBlks[i], LoopHeader); + int Match = 0; + do { + Match = 0; + Match += serialPatternMatch(LoopHeader); + Match += ifPatternMatch(LoopHeader); + } while (Match > 0); + mergeLooplandBlock(LoopHeader, ExitBlk); + MachineLoop *ParentLoop = LoopRep->getParentLoop(); + if (ParentLoop) + MLI->changeLoopFor(LoopHeader, ParentLoop); + else + MLI->removeBlock(LoopHeader); + Visited[LoopRep] = true; + return 1; +} - int numLoop = 0; - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - while (loopRep && loopRep->getHeader() == curBlk) { - LoopLandInfo *loopLand = getLoopLandInfo(loopRep); - if (loopLand) { - BlockT *landBlk = loopLand->landBlk; - assert(landBlk); - if (!isRetiredBlock(landBlk)) { - mergeLooplandBlock(curBlk, loopLand); - ++numLoop; - } +int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep, + MachineBasicBlock *LoopHeader) { + int NumCont = 0; + SmallVector ContMBB; + typedef GraphTraits > GTIM; + GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader), + E = GTIM::child_end(LoopHeader); + for (; It != E; ++It) { + MachineBasicBlock *MBB = *It; + if (LoopRep->contains(MBB)) { + handleLoopcontBlock(MBB, MLI->getLoopFor(MBB), + LoopHeader, LoopRep); + ContMBB.push_back(MBB); + ++NumCont; } - loopRep = loopRep->getParentLoop(); } - numLoopPatternMatch += numLoop; - - return numLoop; -} //loopPatternMatch - -template -int CFGStructurizer::loopbreakPatternMatch(LoopT *loopRep, - BlockT *loopHeader) { - BlockTSmallerVector exitingBlks; - loopRep->getExitingBlocks(exitingBlks); - - if (DEBUGME) { - errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; + for (SmallVectorImpl::iterator It = ContMBB.begin(), + E = ContMBB.end(); It != E; ++It) { + (*It)->removeSuccessor(LoopHeader); } - if (exitingBlks.size() == 0) { - setLoopLandBlock(loopRep); - return 0; - } + numLoopcontPatternMatch += NumCont; - // Compute the corresponding exitBlks and exit block set. - BlockTSmallerVector exitBlks; - std::set exitBlkSet; - for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), - iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { - BlockT *exitingBlk = *iter; - BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); - exitBlks.push_back(exitBlk); - exitBlkSet.insert(exitBlk); //non-duplicate insert - } + return NumCont; +} - assert(exitBlkSet.size() > 0); - assert(exitBlks.size() == exitingBlks.size()); - if (DEBUGME) { - errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; +bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( + MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { + if (Src1MBB->succ_size() == 0) { + MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); + if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { + MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; + if (TheEntry) { + DEBUG( + dbgs() << "isLoopContBreakBlock yes src1 = BB" + << Src1MBB->getNumber() + << " src2 = BB" << Src2MBB->getNumber() << "\n"; + ); + return true; + } + } } + return false; +} - // Find exitLandBlk. - BlockT *exitLandBlk = NULL; - int numCloned = 0; - int numSerial = 0; - - if (exitBlkSet.size() == 1) { - exitLandBlk = *exitBlkSet.begin(); - } else { - exitLandBlk = findNearestCommonPostDom(exitBlkSet); - - if (exitLandBlk == NULL) { - return -1; - } +int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { + int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); + if (Num == 0) { + DEBUG( + dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; + ); + Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); + } + return Num; +} - bool allInPath = true; - bool allNotInPath = true; - for (typename std::set::const_iterator - iter = exitBlkSet.begin(), - iterEnd = exitBlkSet.end(); - iter != iterEnd; ++iter) { - BlockT *exitBlk = *iter; - - PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); - if (DEBUGME) { - errs() << "BB" << exitBlk->getNumber() - << " to BB" << exitLandBlk->getNumber() << " PathToKind=" - << pathKind << "\n"; - } +int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { + int Num = 0; + MachineBasicBlock *DownBlk; - allInPath = allInPath && (pathKind == SinglePath_InPath); - allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); + //trueBlk could be the common post dominator + DownBlk = TrueMBB; + + DEBUG( + dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() + << " true = BB" << TrueMBB->getNumber() + << ", numSucc=" << TrueMBB->succ_size() + << " false = BB" << FalseMBB->getNumber() << "\n"; + ); + + while (DownBlk) { + DEBUG( + dbgs() << "check down = BB" << DownBlk->getNumber(); + ); + + if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { + DEBUG( + dbgs() << " working\n"; + ); + + Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); + Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); + + numClonedBlock += Num; + Num += serialPatternMatch(*HeadMBB->succ_begin()); + Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); + Num += ifPatternMatch(HeadMBB); + assert(Num > 0); - if (!allInPath && !allNotInPath) { - if (DEBUGME) { - errs() << "singlePath check fail\n"; - } - return -1; - } - } // check all exit blocks - - if (allNotInPath) { - - // TODO: Simplify, maybe separate function? - LoopT *parentLoopRep = loopRep->getParentLoop(); - BlockT *parentLoopHeader = NULL; - if (parentLoopRep) - parentLoopHeader = parentLoopRep->getHeader(); - - if (exitLandBlk == parentLoopHeader && - (exitLandBlk = relocateLoopcontBlock(parentLoopRep, - loopRep, - exitBlkSet, - exitLandBlk)) != NULL) { - if (DEBUGME) { - errs() << "relocateLoopcontBlock success\n"; - } - } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, - exitingBlks, - exitBlks)) != NULL) { - if (DEBUGME) { - errs() << "insertEndbranchBlock success\n"; - } - } else { - if (DEBUGME) { - errs() << "loop exit fail\n"; - } - return -1; - } + break; } + DEBUG( + dbgs() << " not working\n"; + ); + DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; + } // walk down the postDomTree - // Handle side entry to exit path. - exitBlks.clear(); - exitBlkSet.clear(); - for (typename BlockTSmallerVector::iterator iterExiting = - exitingBlks.begin(), - iterExitingEnd = exitingBlks.end(); - iterExiting != iterExitingEnd; ++iterExiting) { - BlockT *exitingBlk = *iterExiting; - BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); - BlockT *newExitBlk = exitBlk; - - if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { - newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); - ++numCloned; - } - - numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); + return Num; +} - exitBlks.push_back(newExitBlk); - exitBlkSet.insert(newExitBlk); - } +void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( + MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { + dbgs() << "head = BB" << HeadMBB->getNumber() + << " size = " << HeadMBB->size(); + if (Detail) { + dbgs() << "\n"; + HeadMBB->print(dbgs()); + dbgs() << "\n"; + } - for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), - iterExitEnd = exitBlks.end(); - iterExit != iterExitEnd; ++iterExit) { - BlockT *exitBlk = *iterExit; - numSerial += serialPatternMatch(exitBlk); + if (TrueMBB) { + dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " + << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); + if (Detail) { + dbgs() << "\n"; + TrueMBB->print(dbgs()); + dbgs() << "\n"; } - - for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), - iterExitEnd = exitBlks.end(); - iterExit != iterExitEnd; ++iterExit) { - BlockT *exitBlk = *iterExit; - if (exitBlk->pred_size() > 1) { - if (exitBlk != exitLandBlk) { - return -1; - } - } else { - if (exitBlk != exitLandBlk && - (exitBlk->succ_size() != 1 || - *exitBlk->succ_begin() != exitLandBlk)) { - return -1; - } - } + } + if (FalseMBB) { + dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " + << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); + if (Detail) { + dbgs() << "\n"; + FalseMBB->print(dbgs()); + dbgs() << "\n"; } - } // else - - exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); - - // Fold break into the breaking block. Leverage across level breaks. - assert(exitingBlks.size() == exitBlks.size()); - for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), - iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); - iterExit != iterExitEnd; ++iterExit, ++iterExiting) { - BlockT *exitBlk = *iterExit; - BlockT *exitingBlk = *iterExiting; - assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); - LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); - handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); } - - int numBreak = static_cast(exitingBlks.size()); - numLoopbreakPatternMatch += numBreak; - numClonedBlock += numCloned; - return numBreak + numSerial + numCloned; -} //loopbreakPatternMatch - -template -int CFGStructurizer::loopcontPatternMatch(LoopT *loopRep, - BlockT *loopHeader) { - int numCont = 0; - SmallVector contBlk; - for (typename InvBlockGTraits::ChildIteratorType iter = - InvBlockGTraits::child_begin(loopHeader), - iterEnd = InvBlockGTraits::child_end(loopHeader); - iter != iterEnd; ++iter) { - BlockT *curBlk = *iter; - if (loopRep->contains(curBlk)) { - handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), - loopHeader, loopRep); - contBlk.push_back(curBlk); - ++numCont; + if (LandMBB) { + dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " + << LandMBB->size() << " numPred = " << LandMBB->pred_size(); + if (Detail) { + dbgs() << "\n"; + LandMBB->print(dbgs()); + dbgs() << "\n"; } } - for (typename SmallVector::iterator - iter = contBlk.begin(), iterEnd = contBlk.end(); - iter != iterEnd; ++iter) { - (*iter)->removeSuccessor(loopHeader); - } + dbgs() << "\n"; +} - numLoopcontPatternMatch += numCont; +int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, + MachineBasicBlock **LandMBBPtr) { + bool MigrateTrue = false; + bool MigrateFalse = false; - return numCont; -} //loopcontPatternMatch + MachineBasicBlock *LandBlk = *LandMBBPtr; + assert((!TrueMBB || TrueMBB->succ_size() <= 1) + && (!FalseMBB || FalseMBB->succ_size() <= 1)); -template -bool CFGStructurizer::isSameloopDetachedContbreak(BlockT *src1Blk, - BlockT *src2Blk) { - // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the - // same loop with LoopLandInfo without explicitly keeping track of - // loopContBlks and loopBreakBlks, this is a method to get the information. - // - if (src1Blk->succ_size() == 0) { - LoopT *loopRep = loopInfo->getLoopFor(src1Blk); - if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - if (theEntry != NULL) { - if (DEBUGME) { - errs() << "isLoopContBreakBlock yes src1 = BB" - << src1Blk->getNumber() - << " src2 = BB" << src2Blk->getNumber() << "\n"; - } - return true; - } - } - } - return false; -} //isSameloopDetachedContbreak - -template -int CFGStructurizer::handleJumpintoIf(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk) { - int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); - if (num == 0) { - if (DEBUGME) { - errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; - } - num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); - } - return num; -} - -template -int CFGStructurizer::handleJumpintoIfImp(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk) { - int num = 0; - BlockT *downBlk; - - //trueBlk could be the common post dominator - downBlk = trueBlk; - - if (DEBUGME) { - errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() - << " true = BB" << trueBlk->getNumber() - << ", numSucc=" << trueBlk->succ_size() - << " false = BB" << falseBlk->getNumber() << "\n"; - } - - while (downBlk) { - if (DEBUGME) { - errs() << "check down = BB" << downBlk->getNumber(); - } - - if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { - if (DEBUGME) { - errs() << " working\n"; - } - - num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); - num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); - - numClonedBlock += num; - num += serialPatternMatch(*headBlk->succ_begin()); - num += serialPatternMatch(*(++headBlk->succ_begin())); - num += ifPatternMatch(headBlk); - assert(num > 0); - - break; - } - if (DEBUGME) { - errs() << " not working\n"; - } - downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; - } // walk down the postDomTree - - return num; -} //handleJumpintoIf - -template -void CFGStructurizer::showImproveSimpleJumpintoIf(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk, - BlockT *landBlk, - bool detail) { - errs() << "head = BB" << headBlk->getNumber() - << " size = " << headBlk->size(); - if (detail) { - errs() << "\n"; - headBlk->print(errs()); - errs() << "\n"; - } - - if (trueBlk) { - errs() << ", true = BB" << trueBlk->getNumber() << " size = " - << trueBlk->size() << " numPred = " << trueBlk->pred_size(); - if (detail) { - errs() << "\n"; - trueBlk->print(errs()); - errs() << "\n"; - } - } - if (falseBlk) { - errs() << ", false = BB" << falseBlk->getNumber() << " size = " - << falseBlk->size() << " numPred = " << falseBlk->pred_size(); - if (detail) { - errs() << "\n"; - falseBlk->print(errs()); - errs() << "\n"; - } - } - if (landBlk) { - errs() << ", land = BB" << landBlk->getNumber() << " size = " - << landBlk->size() << " numPred = " << landBlk->pred_size(); - if (detail) { - errs() << "\n"; - landBlk->print(errs()); - errs() << "\n"; - } - } - - errs() << "\n"; -} //showImproveSimpleJumpintoIf - -template -int CFGStructurizer::improveSimpleJumpintoIf(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk, - BlockT **plandBlk) { - bool migrateTrue = false; - bool migrateFalse = false; - - BlockT *landBlk = *plandBlk; - - assert((trueBlk == NULL || trueBlk->succ_size() <= 1) - && (falseBlk == NULL || falseBlk->succ_size() <= 1)); - - if (trueBlk == falseBlk) { + if (TrueMBB == FalseMBB) return 0; - } - migrateTrue = needMigrateBlock(trueBlk); - migrateFalse = needMigrateBlock(falseBlk); + MigrateTrue = needMigrateBlock(TrueMBB); + MigrateFalse = needMigrateBlock(FalseMBB); - if (!migrateTrue && !migrateFalse) { + if (!MigrateTrue && !MigrateFalse) return 0; - } // If we need to migrate either trueBlk and falseBlk, migrate the rest that // have more than one predecessors. without doing this, its predecessor // rather than headBlk will have undefined value in initReg. - if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { - migrateTrue = true; - } - if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { - migrateFalse = true; - } + if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) + MigrateTrue = true; + if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) + MigrateFalse = true; - if (DEBUGME) { - errs() << "before improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); - } + DEBUG( + dbgs() << "before improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); + ); // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk // @@ -1183,205 +1337,192 @@ int CFGStructurizer::improveSimpleJumpintoIf(BlockT *headBlk, // add initReg = initVal to headBlk const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - unsigned initReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - if (!migrateTrue || !migrateFalse) { - int initVal = migrateTrue ? 0 : 1; - CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); + if (!MigrateTrue || !MigrateFalse) { + // XXX: We have an opportunity here to optimize the "branch into if" case + // here. Branch into if looks like this: + // entry + // / | + // diamond_head branch_from + // / \ | + // diamond_false diamond_true + // \ / + // done + // + // The diamond_head block begins the "if" and the diamond_true block + // is the block being "branched into". + // + // If MigrateTrue is true, then TrueBB is the block being "branched into" + // and if MigrateFalse is true, then FalseBB is the block being + // "branched into" + // + // Here is the pseudo code for how I think the optimization should work: + // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. + // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. + // 3. Move the branch instruction from diamond_head into its own basic + // block (new_block). + // 4. Add an unconditional branch from diamond_head to new_block + // 5. Replace the branch instruction in branch_from with an unconditional + // branch to new_block. If branch_from has multiple predecessors, then + // we need to replace the True/False block in the branch + // instruction instead of replacing it. + // 6. Change the condition of the branch instruction in new_block from + // COND to (COND || GPR0) + // + // In order insert these MOV instruction, we will need to use the + // RegisterScavenger. Usually liveness stops being tracked during + // the late machine optimization passes, however if we implement + // bool TargetRegisterInfo::requiresRegisterScavenging( + // const MachineFunction &MF) + // and have it return true, liveness will be tracked correctly + // by generic optimization passes. We will also need to make sure that + // all of our target-specific passes that run after regalloc and before + // the CFGStructurizer track liveness and we will need to modify this pass + // to correctly track liveness. + // + // After the above changes, the new CFG should look like this: + // entry + // / | + // diamond_head branch_from + // \ / + // new_block + // / | + // diamond_false diamond_true + // \ / + // done + // + // Without this optimization, we are forced to duplicate the diamond_true + // block and we will end up with a CFG like this: + // + // entry + // / | + // diamond_head branch_from + // / \ | + // diamond_false diamond_true diamond_true (duplicate) + // \ / | + // done --------------------| + // + // Duplicating diamond_true can be very costly especially if it has a + // lot of instructions. + return 0; } - int numNewBlk = 0; - - if (landBlk == NULL) { - landBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(landBlk); //insert to function - - if (trueBlk) { - trueBlk->addSuccessor(landBlk); - } else { - headBlk->addSuccessor(landBlk); - } - - if (falseBlk) { - falseBlk->addSuccessor(landBlk); - } else { - headBlk->addSuccessor(landBlk); - } - - numNewBlk ++; - } + int NumNewBlk = 0; - bool landBlkHasOtherPred = (landBlk->pred_size() > 2); + bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" - typename BlockT::iterator insertPos = - CFGTraits::getInstrPos - (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep)); - - if (landBlkHasOtherPred) { - unsigned immReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); - unsigned cmpResReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - - CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, - initReg, immReg); - CFGTraits::insertCondBranchBefore(landBlk, insertPos, - AMDGPU::IF_PREDICATE_SET, passRep, - cmpResReg, DebugLoc()); - } - - CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET, - passRep, initReg, DebugLoc()); - - if (migrateTrue) { - migrateInstruction(trueBlk, landBlk, insertPos); + MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF); + + if (LandBlkHasOtherPred) { + llvm_unreachable("Extra register needed to handle CFG"); + unsigned CmpResReg = + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); + llvm_unreachable("Extra compare instruction needed to handle CFG"); + insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, + CmpResReg, DebugLoc()); + } + + // XXX: We are running this after RA, so creating virtual registers will + // cause an assertion failure in the PostRA scheduling pass. + unsigned InitReg = + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); + insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg, + DebugLoc()); + + if (MigrateTrue) { + migrateInstruction(TrueMBB, LandBlk, I); // need to uncondionally insert the assignment to ensure a path from its // predecessor rather than headBlk has valid value in initReg if // (initVal != 1). - CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); + llvm_unreachable("Extra register needed to handle CFG"); } - CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep); + insertInstrBefore(I, AMDGPU::ELSE); - if (migrateFalse) { - migrateInstruction(falseBlk, landBlk, insertPos); + if (MigrateFalse) { + migrateInstruction(FalseMBB, LandBlk, I); // need to uncondionally insert the assignment to ensure a path from its // predecessor rather than headBlk has valid value in initReg if // (initVal != 0) - CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); + llvm_unreachable("Extra register needed to handle CFG"); } - if (landBlkHasOtherPred) { + if (LandBlkHasOtherPred) { // add endif - CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); + insertInstrBefore(I, AMDGPU::ENDIF); // put initReg = 2 to other predecessors of landBlk - for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), - predIterEnd = landBlk->pred_end(); predIter != predIterEnd; - ++predIter) { - BlockT *curBlk = *predIter; - if (curBlk != trueBlk && curBlk != falseBlk) { - CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); - } - } //for - } - if (DEBUGME) { - errs() << "result from improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); - } - - // update landBlk - *plandBlk = landBlk; - - return numNewBlk; -} //improveSimpleJumpintoIf - -template -void CFGStructurizer::handleLoopbreak(BlockT *exitingBlk, - LoopT *exitingLoop, - BlockT *exitBlk, - LoopT *exitLoop, - BlockT *landBlk) { - if (DEBUGME) { - errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) - << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; - } - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - - RegiT initReg = INVALIDREGNUM; - if (exitingLoop != exitLoop) { - initReg = static_cast - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - assert(initReg != INVALIDREGNUM); - addLoopBreakInitReg(exitLoop, initReg); - while (exitingLoop != exitLoop && exitingLoop) { - addLoopBreakOnReg(exitingLoop, initReg); - exitingLoop = exitingLoop->getParentLoop(); + for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), + PE = LandBlk->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *MBB = *PI; + if (MBB != TrueMBB && MBB != FalseMBB) + llvm_unreachable("Extra register needed to handle CFG"); } - assert(exitingLoop == exitLoop); } + DEBUG( + dbgs() << "result from improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); + ); - mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); + // update landBlk + *LandMBBPtr = LandBlk; -} //handleLoopbreak + return NumNewBlk; +} -template -void CFGStructurizer::handleLoopcontBlock(BlockT *contingBlk, - LoopT *contingLoop, - BlockT *contBlk, - LoopT *contLoop) { - if (DEBUGME) { - errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() - << " header = BB" << contBlk->getNumber() << "\n"; +void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB, + MachineLoop *ContingLoop, MachineBasicBlock *ContMBB, + MachineLoop *ContLoop) { + DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber() + << " header = BB" << ContMBB->getNumber() << "\n"; + dbgs() << "Trying to continue loop-depth = " + << getLoopDepth(ContLoop) + << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";); + settleLoopcontBlock(ContingMBB, ContMBB); +} - errs() << "Trying to continue loop-depth = " - << getLoopDepth(contLoop) - << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; - } +void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB) { + DEBUG( + dbgs() << "serialPattern BB" << DstMBB->getNumber() + << " <= BB" << SrcMBB->getNumber() << "\n"; + ); + DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); - RegiT initReg = INVALIDREGNUM; - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - if (contingLoop != contLoop) { - initReg = static_cast - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - assert(initReg != INVALIDREGNUM); - addLoopContInitReg(contLoop, initReg); - while (contingLoop && contingLoop->getParentLoop() != contLoop) { - addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg - contingLoop = contingLoop->getParentLoop(); - } - assert(contingLoop && contingLoop->getParentLoop() == contLoop); - addLoopContOnReg(contingLoop, initReg); - } + DstMBB->removeSuccessor(SrcMBB); + cloneSuccessorList(DstMBB, SrcMBB); - settleLoopcontBlock(contingBlk, contBlk, initReg); -} //handleLoopcontBlock + removeSuccessor(SrcMBB); + MLI->removeBlock(SrcMBB); + retireBlock(SrcMBB); +} -template -void CFGStructurizer::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { - if (DEBUGME) { - errs() << "serialPattern BB" << dstBlk->getNumber() - << " <= BB" << srcBlk->getNumber() << "\n"; - } - dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end()); - - dstBlk->removeSuccessor(srcBlk); - CFGTraits::cloneSuccessorList(dstBlk, srcBlk); - - removeSuccessor(srcBlk); - retireBlock(dstBlk, srcBlk); -} //mergeSerialBlock - -template -void CFGStructurizer::mergeIfthenelseBlock(InstrT *branchInstr, - BlockT *curBlk, - BlockT *trueBlk, - BlockT *falseBlk, - BlockT *landBlk) { - if (DEBUGME) { - errs() << "ifPattern BB" << curBlk->getNumber(); - errs() << "{ "; - if (trueBlk) { - errs() << "BB" << trueBlk->getNumber(); - } - errs() << " } else "; - errs() << "{ "; - if (falseBlk) { - errs() << "BB" << falseBlk->getNumber(); - } - errs() << " }\n "; - errs() << "landBlock: "; - if (landBlk == NULL) { - errs() << "NULL"; +void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, + MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { + assert (TrueMBB); + DEBUG( + dbgs() << "ifPattern BB" << MBB->getNumber(); + dbgs() << "{ "; + if (TrueMBB) { + dbgs() << "BB" << TrueMBB->getNumber(); + } + dbgs() << " } else "; + dbgs() << "{ "; + if (FalseMBB) { + dbgs() << "BB" << FalseMBB->getNumber(); + } + dbgs() << " }\n "; + dbgs() << "landBlock: "; + if (!LandMBB) { + dbgs() << "NULL"; } else { - errs() << "BB" << landBlk->getNumber(); + dbgs() << "BB" << LandMBB->getNumber(); } - errs() << "\n"; - } + dbgs() << "\n"; + ); - int oldOpcode = branchInstr->getOpcode(); - DebugLoc branchDL = branchInstr->getDebugLoc(); + int OldOpcode = BranchMI->getOpcode(); + DebugLoc BranchDL = BranchMI->getDebugLoc(); // transform to // if cond @@ -1391,1659 +1532,382 @@ void CFGStructurizer::mergeIfthenelseBlock(InstrT *branchInstr, // endif // landBlk - typename BlockT::iterator branchInstrPos = - CFGTraits::getInstrPos(curBlk, branchInstr); - CFGTraits::insertCondBranchBefore(branchInstrPos, - CFGTraits::getBranchNzeroOpcode(oldOpcode), - passRep, - branchDL); - - if (trueBlk) { - curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end()); - curBlk->removeSuccessor(trueBlk); - if (landBlk && trueBlk->succ_size()!=0) { - trueBlk->removeSuccessor(landBlk); - } - retireBlock(curBlk, trueBlk); - } - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep); - - if (falseBlk) { - curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(), - falseBlk->end()); - curBlk->removeSuccessor(falseBlk); - if (landBlk && falseBlk->succ_size() != 0) { - falseBlk->removeSuccessor(landBlk); - } - retireBlock(curBlk, falseBlk); - } - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); - - branchInstr->eraseFromParent(); - - if (landBlk && trueBlk && falseBlk) { - curBlk->addSuccessor(landBlk); - } - -} //mergeIfthenelseBlock - -template -void CFGStructurizer::mergeLooplandBlock(BlockT *dstBlk, - LoopLandInfo *loopLand) { - BlockT *landBlk = loopLand->landBlk; - - if (DEBUGME) { - errs() << "loopPattern header = BB" << dstBlk->getNumber() - << " land = BB" << landBlk->getNumber() << "\n"; - } - - // Loop contInitRegs are init at the beginning of the loop. - for (typename std::set::const_iterator iter = - loopLand->contInitRegs.begin(), - iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); - } - - /* we last inserterd the DebugLoc in the - * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk. - * search for the DebugLoc in the that statement. - * if not found, we have to insert the empty/default DebugLoc */ - InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); - DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); - - CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak); - // Loop breakInitRegs are init before entering the loop. - for (typename std::set::const_iterator iter = - loopLand->breakInitRegs.begin(), - iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); - } - // Loop endbranchInitRegs are init before entering the loop. - for (typename std::set::const_iterator iter = - loopLand->endbranchInitRegs.begin(), - iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); - } + MachineBasicBlock::iterator I = BranchMI; + insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), + BranchDL); - /* we last inserterd the DebugLoc in the continue statement in the current dstBlk - * search for the DebugLoc in the continue statement. - * if not found, we have to insert the empty/default DebugLoc */ - InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); - DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); - - CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue); - // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this - // loop. - for (typename std::set::const_iterator iter = - loopLand->breakOnRegs.begin(), - iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep, - *iter); + if (TrueMBB) { + MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); + MBB->removeSuccessor(TrueMBB); + if (LandMBB && TrueMBB->succ_size()!=0) + TrueMBB->removeSuccessor(LandMBB); + retireBlock(TrueMBB); + MLI->removeBlock(TrueMBB); } - // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this - // loop. - for (std::set::const_iterator iter = loopLand->contOnRegs.begin(), - iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32, - passRep, *iter); + if (FalseMBB) { + insertInstrBefore(I, AMDGPU::ELSE); + MBB->splice(I, FalseMBB, FalseMBB->begin(), + FalseMBB->end()); + MBB->removeSuccessor(FalseMBB); + if (LandMBB && FalseMBB->succ_size() != 0) + FalseMBB->removeSuccessor(LandMBB); + retireBlock(FalseMBB); + MLI->removeBlock(FalseMBB); } + insertInstrBefore(I, AMDGPU::ENDIF); - dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); - - for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), - iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { - dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. - } + BranchMI->eraseFromParent(); - removeSuccessor(landBlk); - retireBlock(dstBlk, landBlk); -} //mergeLooplandBlock + if (LandMBB && TrueMBB && FalseMBB) + MBB->addSuccessor(LandMBB); -template -void CFGStructurizer::reversePredicateSetter(typename BlockT::iterator I) { - while (I--) { - if (I->getOpcode() == AMDGPU::PRED_X) { - switch (static_cast(I)->getOperand(2).getImm()) { - case OPCODE_IS_ZERO_INT: - static_cast(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT); - return; - case OPCODE_IS_NOT_ZERO_INT: - static_cast(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT); - return; - case OPCODE_IS_ZERO: - static_cast(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO); - return; - case OPCODE_IS_NOT_ZERO: - static_cast(I)->getOperand(2).setImm(OPCODE_IS_ZERO); - return; - default: - assert(0 && "PRED_X Opcode invalid!"); - } - } - } } -template -void CFGStructurizer::mergeLoopbreakBlock(BlockT *exitingBlk, - BlockT *exitBlk, - BlockT *exitLandBlk, - RegiT setReg) { - if (DEBUGME) { - errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() - << " exit = BB" << exitBlk->getNumber() - << " land = BB" << exitLandBlk->getNumber() << "\n"; - } - - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); - assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); +void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, + MachineBasicBlock *LandMBB) { + DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() + << " land = BB" << LandMBB->getNumber() << "\n";); - DebugLoc DL = branchInstr->getDebugLoc(); - - BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); - - // transform exitingBlk to - // if ( ) { - // exitBlk (if exitBlk != exitLandBlk) - // setReg = 1 - // break - // }endif - // successor = {orgSuccessor(exitingBlk) - exitBlk} - - typename BlockT::iterator branchInstrPos = - CFGTraits::getInstrPos(exitingBlk, branchInstr); + insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); + insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); + DstBlk->addSuccessor(LandMBB); + DstBlk->removeSuccessor(DstBlk); +} - if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { - //break_logical - - if (trueBranch != exitBlk) { - reversePredicateSetter(branchInstrPos); - } - CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL); - } else { - if (trueBranch != exitBlk) { - reversePredicateSetter(branchInstr); - } - CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL); - if (exitBlk != exitLandBlk) { - //splice is insert-before ... - exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), - exitBlk->end()); - } - if (setReg != INVALIDREGNUM) { - CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); - } - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep); - } //if_logical +void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, + MachineBasicBlock *LandMBB) { + DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() + << " land = BB" << LandMBB->getNumber() << "\n";); + MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); + assert(BranchMI && isCondBranch(BranchMI)); + DebugLoc DL = BranchMI->getDebugLoc(); + MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); + MachineBasicBlock::iterator I = BranchMI; + if (TrueBranch != LandMBB) + reversePredicateSetter(I); + insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL); + insertInstrBefore(I, AMDGPU::BREAK); + insertInstrBefore(I, AMDGPU::ENDIF); //now branchInst can be erase safely - branchInstr->eraseFromParent(); - + BranchMI->eraseFromParent(); //now take care of successors, retire blocks - exitingBlk->removeSuccessor(exitBlk); - if (exitBlk != exitLandBlk) { - //splice is insert-before ... - exitBlk->removeSuccessor(exitLandBlk); - retireBlock(exitingBlk, exitBlk); - } - -} //mergeLoopbreakBlock - -template -void CFGStructurizer::settleLoopcontBlock(BlockT *contingBlk, - BlockT *contBlk, - RegiT setReg) { - if (DEBUGME) { - errs() << "settleLoopcontBlock conting = BB" - << contingBlk->getNumber() - << ", cont = BB" << contBlk->getNumber() << "\n"; - } - - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); - if (branchInstr) { - assert(CFGTraits::isCondBranch(branchInstr)); - typename BlockT::iterator branchInstrPos = - CFGTraits::getInstrPos(contingBlk, branchInstr); - BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); - int oldOpcode = branchInstr->getOpcode(); - DebugLoc DL = branchInstr->getDebugLoc(); - - // transform contingBlk to - // if () { - // move instr after branchInstr - // continue - // or - // setReg = 1 - // break - // }endif - // successor = {orgSuccessor(contingBlk) - loopHeader} - - bool useContinueLogical = - (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); - - if (useContinueLogical == false) { - int branchOpcode = - trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) - : CFGTraits::getBranchZeroOpcode(oldOpcode); - - CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); - - if (setReg != INVALIDREGNUM) { - CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL); - } else { - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL); - } + ExitingMBB->removeSuccessor(LandMBB); +} - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL); +void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, + MachineBasicBlock *ContMBB) { + DEBUG(dbgs() << "settleLoopcontBlock conting = BB" + << ContingMBB->getNumber() + << ", cont = BB" << ContMBB->getNumber() << "\n";); + + MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); + if (MI) { + assert(isCondBranch(MI)); + MachineBasicBlock::iterator I = MI; + MachineBasicBlock *TrueBranch = getTrueBranch(MI); + int OldOpcode = MI->getOpcode(); + DebugLoc DL = MI->getDebugLoc(); + + bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); + + if (UseContinueLogical == false) { + int BranchOpcode = + TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : + getBranchZeroOpcode(OldOpcode); + insertCondBranchBefore(I, BranchOpcode, DL); + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL); + insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL); } else { - int branchOpcode = - trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) - : CFGTraits::getContinueZeroOpcode(oldOpcode); - - CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); + int BranchOpcode = + TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : + getContinueZeroOpcode(OldOpcode); + insertCondBranchBefore(I, BranchOpcode, DL); } - branchInstr->eraseFromParent(); + MI->eraseFromParent(); } else { // if we've arrived here then we've already erased the branch instruction - // travel back up the basic block to see the last reference of our debug location - // we've just inserted that reference here so it should be representative - if (setReg != INVALIDREGNUM) { - CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); - } else { - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); - } - } //else - -} //settleLoopcontBlock - -// BBs in exitBlkSet are determined as in break-path for loopRep, -// before we can put code for BBs as inside loop-body for loopRep -// check whether those BBs are determined as cont-BB for parentLoopRep -// earlier. -// If so, generate a new BB newBlk -// (1) set newBlk common successor of BBs in exitBlkSet -// (2) change the continue-instr in BBs in exitBlkSet to break-instr -// (3) generate continue-instr in newBlk -// -template -typename CFGStructurizer::BlockT * -CFGStructurizer::relocateLoopcontBlock(LoopT *parentLoopRep, - LoopT *loopRep, - std::set &exitBlkSet, - BlockT *exitLandBlk) { - std::set endBlkSet; - - - - for (typename std::set::const_iterator iter = exitBlkSet.begin(), - iterEnd = exitBlkSet.end(); - iter != iterEnd; ++iter) { - BlockT *exitBlk = *iter; - BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); - - if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) - return NULL; - - endBlkSet.insert(endBlk); - } - - BlockT *newBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(newBlk); //insert to function - CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep); - SHOWNEWBLK(newBlk, "New continue block: "); - - for (typename std::set::const_iterator iter = endBlkSet.begin(), - iterEnd = endBlkSet.end(); - iter != iterEnd; ++iter) { - BlockT *endBlk = *iter; - InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); - if (contInstr) { - contInstr->eraseFromParent(); - } - endBlk->addSuccessor(newBlk); - if (DEBUGME) { - errs() << "Add new continue Block to BB" - << endBlk->getNumber() << " successors\n"; - } - } - - return newBlk; -} //relocateLoopcontBlock - - -// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as -// LoopLandBlock. This BB branch on the loop endBranchInit register to the -// pathes corresponding to the loop exiting branches. - -template -typename CFGStructurizer::BlockT * -CFGStructurizer::addLoopEndbranchBlock(LoopT *loopRep, - BlockTSmallerVector &exitingBlks, - BlockTSmallerVector &exitBlks) { - const AMDGPUInstrInfo *tii = - static_cast(passRep->getTargetInstrInfo()); - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - - RegiT endBranchReg = static_cast - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - assert(endBranchReg >= 0); - - // reg = 0 before entering the loop - addLoopEndbranchInitReg(loopRep, endBranchReg); - - uint32_t numBlks = static_cast(exitingBlks.size()); - assert(numBlks >=2 && numBlks == exitBlks.size()); - - BlockT *preExitingBlk = exitingBlks[0]; - BlockT *preExitBlk = exitBlks[0]; - BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(preBranchBlk); //insert to function - SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); - - BlockT *newLandBlk = preBranchBlk; - - CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, - newLandBlk); - preExitingBlk->removeSuccessor(preExitBlk); - preExitingBlk->addSuccessor(newLandBlk); - - //it is redundant to add reg = 0 to exitingBlks[0] - - // For 1..n th exiting path (the last iteration handles two pathes) create the - // branch to the previous path and the current path. - for (uint32_t i = 1; i < numBlks; ++i) { - BlockT *curExitingBlk = exitingBlks[i]; - BlockT *curExitBlk = exitBlks[i]; - BlockT *curBranchBlk; - - if (i == numBlks - 1) { - curBranchBlk = curExitBlk; - } else { - curBranchBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(curBranchBlk); //insert to function - SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); - } - - // Add reg = i to exitingBlks[i]. - CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, - endBranchReg, i); - - // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge - // (exitingBlks[i], newLandBlk). - CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, - newLandBlk); - curExitingBlk->removeSuccessor(curExitBlk); - curExitingBlk->addSuccessor(newLandBlk); - - // add to preBranchBlk the branch instruction: - // if (endBranchReg == preVal) - // preExitBlk - // else - // curBranchBlk - // - // preValReg = i - 1 - - DebugLoc DL; - RegiT preValReg = static_cast - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - - preBranchBlk->insert(preBranchBlk->begin(), - tii->getMovImmInstr(preBranchBlk->getParent(), preValReg, - i - 1)); - - // condResReg = (endBranchReg == preValReg) - RegiT condResReg = static_cast - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg) - .addReg(endBranchReg).addReg(preValReg); - - BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32)) - .addMBB(preExitBlk).addReg(condResReg); - - preBranchBlk->addSuccessor(preExitBlk); - preBranchBlk->addSuccessor(curBranchBlk); - - // Update preExitingBlk, preExitBlk, preBranchBlk. - preExitingBlk = curExitingBlk; - preExitBlk = curExitBlk; - preBranchBlk = curBranchBlk; - - } //end for 1 .. n blocks - - return newLandBlk; -} //addLoopEndbranchBlock - -template -typename CFGStructurizer::PathToKind -CFGStructurizer::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, - bool allowSideEntry) { - assert(dstBlk); - - if (srcBlk == dstBlk) { - return SinglePath_InPath; + // travel back up the basic block to see the last reference of our debug + // location we've just inserted that reference here so it should be + // representative insertEnd to ensure phi-moves, if exist, go before the + // continue-instr. + insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, + getLastDebugLocInBB(ContingMBB)); } +} - while (srcBlk && srcBlk->succ_size() == 1) { - srcBlk = *srcBlk->succ_begin(); - if (srcBlk == dstBlk) { - return SinglePath_InPath; - } - - if (!allowSideEntry && srcBlk->pred_size() > 1) { - return Not_SinglePath; - } - } - - if (srcBlk && srcBlk->succ_size()==0) { - return SinglePath_NotInPath; - } - - return Not_SinglePath; -} //singlePathTo - -// If there is a single path from srcBlk to dstBlk, return the last block before -// dstBlk If there is a single path from srcBlk->end without dstBlk, return the -// last block in the path Otherwise, return NULL -template -typename CFGStructurizer::BlockT * -CFGStructurizer::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, - bool allowSideEntry) { - assert(dstBlk); - - if (srcBlk == dstBlk) { - return srcBlk; - } - - if (srcBlk->succ_size() == 0) { - return srcBlk; - } - - while (srcBlk && srcBlk->succ_size() == 1) { - BlockT *preBlk = srcBlk; - - srcBlk = *srcBlk->succ_begin(); - if (srcBlk == NULL) { - return preBlk; - } - - if (!allowSideEntry && srcBlk->pred_size() > 1) { - return NULL; - } - } - - if (srcBlk && srcBlk->succ_size()==0) { - return srcBlk; - } - - return NULL; - -} //singlePathEnd - -template -int CFGStructurizer::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, - BlockT *dstBlk) { - int cloned = 0; - assert(preBlk->isSuccessor(srcBlk)); - while (srcBlk && srcBlk != dstBlk) { - assert(srcBlk->succ_size() == 1); - if (srcBlk->pred_size() > 1) { - srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); - ++cloned; +int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { + int Cloned = 0; + assert(PreMBB->isSuccessor(SrcMBB)); + while (SrcMBB && SrcMBB != DstMBB) { + assert(SrcMBB->succ_size() == 1); + if (SrcMBB->pred_size() > 1) { + SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); + ++Cloned; } - preBlk = srcBlk; - srcBlk = *srcBlk->succ_begin(); + PreMBB = SrcMBB; + SrcMBB = *SrcMBB->succ_begin(); } - return cloned; -} //cloneOnSideEntryTo + return Cloned; +} -template -typename CFGStructurizer::BlockT * -CFGStructurizer::cloneBlockForPredecessor(BlockT *curBlk, - BlockT *predBlk) { - assert(predBlk->isSuccessor(curBlk) && +MachineBasicBlock * +AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, + MachineBasicBlock *PredMBB) { + assert(PredMBB->isSuccessor(MBB) && "succBlk is not a prececessor of curBlk"); - BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions - CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); + MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions + replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); //srcBlk, oldBlk, newBlk - predBlk->removeSuccessor(curBlk); - predBlk->addSuccessor(cloneBlk); + PredMBB->removeSuccessor(MBB); + PredMBB->addSuccessor(CloneMBB); // add all successor to cloneBlk - CFGTraits::cloneSuccessorList(cloneBlk, curBlk); - - numClonedInstr += curBlk->size(); - - if (DEBUGME) { - errs() << "Cloned block: " << "BB" - << curBlk->getNumber() << "size " << curBlk->size() << "\n"; - } + cloneSuccessorList(CloneMBB, MBB); - SHOWNEWBLK(cloneBlk, "result of Cloned block: "); + numClonedInstr += MBB->size(); - return cloneBlk; -} //cloneBlockForPredecessor + DEBUG( + dbgs() << "Cloned block: " << "BB" + << MBB->getNumber() << "size " << MBB->size() << "\n"; + ); -template -typename CFGStructurizer::BlockT * -CFGStructurizer::exitingBlock2ExitBlock(LoopT *loopRep, - BlockT *exitingBlk) { - BlockT *exitBlk = NULL; + SHOWNEWBLK(CloneMBB, "result of Cloned block: "); - for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), - iterSuccEnd = exitingBlk->succ_end(); - iterSucc != iterSuccEnd; ++iterSucc) { - BlockT *curBlk = *iterSucc; - if (!loopRep->contains(curBlk)) { - assert(exitBlk == NULL); - exitBlk = curBlk; - } - } - - assert(exitBlk != NULL); - - return exitBlk; -} //exitingBlock2ExitBlock + return CloneMBB; +} -template -void CFGStructurizer::migrateInstruction(BlockT *srcBlk, - BlockT *dstBlk, - InstrIterator insertPos) { - InstrIterator spliceEnd; +void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, + MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { + MachineBasicBlock::iterator SpliceEnd; //look for the input branchinstr, not the AMDGPU branchinstr - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); - if (branchInstr == NULL) { - if (DEBUGME) { - errs() << "migrateInstruction don't see branch instr\n" ; - } - spliceEnd = srcBlk->end(); + MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); + if (!BranchMI) { + DEBUG( + dbgs() << "migrateInstruction don't see branch instr\n" ; + ); + SpliceEnd = SrcMBB->end(); } else { - if (DEBUGME) { - errs() << "migrateInstruction see branch instr\n" ; - branchInstr->dump(); - } - spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); - } - if (DEBUGME) { - errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() - << "srcSize = " << srcBlk->size() << "\n"; + DEBUG( + dbgs() << "migrateInstruction see branch instr\n" ; + BranchMI->dump(); + ); + SpliceEnd = BranchMI; } + DEBUG( + dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size() + << "srcSize = " << SrcMBB->size() << "\n"; + ); //splice insert before insertPos - dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); + DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); - if (DEBUGME) { - errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() - << "srcSize = " << srcBlk->size() << "\n"; - } -} //migrateInstruction + DEBUG( + dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size() + << "srcSize = " << SrcMBB->size() << "\n"; + ); +} -// normalizeInfiniteLoopExit change -// B1: -// uncond_br LoopHeader -// -// to -// B1: -// cond_br 1 LoopHeader dummyExit -// and return the newly added dummy exit block -// -template -typename CFGStructurizer::BlockT * -CFGStructurizer::normalizeInfiniteLoopExit(LoopT* LoopRep) { - BlockT *loopHeader; - BlockT *loopLatch; - loopHeader = LoopRep->getHeader(); - loopLatch = LoopRep->getLoopLatch(); - BlockT *dummyExitBlk = NULL; +MachineBasicBlock * +AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { + MachineBasicBlock *LoopHeader = LoopRep->getHeader(); + MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - if (loopHeader!=NULL && loopLatch!=NULL) { - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); - if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { - dummyExitBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(dummyExitBlk); //insert to function - SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); - - if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; - - typename BlockT::iterator insertPos = - CFGTraits::getInstrPos(loopLatch, branchInstr); - unsigned immReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); - InstrT *newInstr = - CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep); - MachineInstrBuilder MIB(*funcRep, newInstr); - MIB.addMBB(loopHeader); - MIB.addReg(immReg, false); - - SHOWNEWINSTR(newInstr); - - branchInstr->eraseFromParent(); - loopLatch->addSuccessor(dummyExitBlk); - } - } - return dummyExitBlk; -} //normalizeInfiniteLoopExit + if (!LoopHeader || !LoopLatch) + return nullptr; + MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); + // Is LoopRep an infinite loop ? + if (!BranchMI || !isUncondBranch(BranchMI)) + return nullptr; + + MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); + FuncRep->push_back(DummyExitBlk); //insert to function + SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); + DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); + MachineBasicBlock::iterator I = BranchMI; + unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC); + llvm_unreachable("Extra register needed to handle CFG"); + MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32); + MachineInstrBuilder MIB(*FuncRep, NewMI); + MIB.addMBB(LoopHeader); + MIB.addReg(ImmReg, false); + SHOWNEWINSTR(NewMI); + BranchMI->eraseFromParent(); + LoopLatch->addSuccessor(DummyExitBlk); + + return DummyExitBlk; +} -template -void CFGStructurizer::removeUnconditionalBranch(BlockT *srcBlk) { - InstrT *branchInstr; +void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { + MachineInstr *BranchMI; // I saw two unconditional branch in one basic block in example // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. - while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) - && CFGTraits::isUncondBranch(branchInstr)) { - if (DEBUGME) { - errs() << "Removing unconditional branch instruction" ; - branchInstr->dump(); - } - branchInstr->eraseFromParent(); - } -} //removeUnconditionalBranch - -template -void CFGStructurizer::removeRedundantConditionalBranch(BlockT *srcBlk) { - if (srcBlk->succ_size() == 2) { - BlockT *blk1 = *srcBlk->succ_begin(); - BlockT *blk2 = *(++srcBlk->succ_begin()); - - if (blk1 == blk2) { - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); - assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); - if (DEBUGME) { - errs() << "Removing unneeded conditional branch instruction" ; - branchInstr->dump(); - } - branchInstr->eraseFromParent(); - SHOWNEWBLK(blk1, "Removing redundant successor"); - srcBlk->removeSuccessor(blk1); - } - } -} //removeRedundantConditionalBranch - -template -void CFGStructurizer::addDummyExitBlock(SmallVector &retBlks) { - BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(dummyExitBlk); //insert to function - CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); - - for (typename SmallVector::iterator iter = - retBlks.begin(), - iterEnd = retBlks.end(); iter != iterEnd; ++iter) { - BlockT *curBlk = *iter; - InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); - if (curInstr) { - curInstr->eraseFromParent(); - } - curBlk->addSuccessor(dummyExitBlk); - if (DEBUGME) { - errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() - << " successors\n"; - } - } //for - - SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); -} //addDummyExitBlock - -template -void CFGStructurizer::removeSuccessor(BlockT *srcBlk) { - while (srcBlk->succ_size()) { - srcBlk->removeSuccessor(*srcBlk->succ_begin()); + while ((BranchMI = getLoopendBlockBranchInstr(MBB)) + && isUncondBranch(BranchMI)) { + DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump();); + BranchMI->eraseFromParent(); } } -template -void CFGStructurizer::recordSccnum(BlockT *srcBlk, int sccNum) { - BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; +void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( + MachineBasicBlock *MBB) { + if (MBB->succ_size() != 2) + return; + MachineBasicBlock *MBB1 = *MBB->succ_begin(); + MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); + if (MBB1 != MBB2) + return; + + MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); + assert(BranchMI && isCondBranch(BranchMI)); + DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump();); + BranchMI->eraseFromParent(); + SHOWNEWBLK(MBB1, "Removing redundant successor"); + MBB->removeSuccessor(MBB1); +} - if (srcBlkInfo == NULL) { - srcBlkInfo = new BlockInfo(); +void AMDGPUCFGStructurizer::addDummyExitBlock( + SmallVectorImpl &RetMBB) { + MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); + FuncRep->push_back(DummyExitBlk); //insert to function + insertInstrEnd(DummyExitBlk, AMDGPU::RETURN); + + for (SmallVectorImpl::iterator It = RetMBB.begin(), + E = RetMBB.end(); It != E; ++It) { + MachineBasicBlock *MBB = *It; + MachineInstr *MI = getReturnInstr(MBB); + if (MI) + MI->eraseFromParent(); + MBB->addSuccessor(DummyExitBlk); + DEBUG( + dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() + << " successors\n"; + ); } + SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); +} - srcBlkInfo->sccNum = sccNum; +void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { + while (MBB->succ_size()) + MBB->removeSuccessor(*MBB->succ_begin()); } -template -int CFGStructurizer::getSCCNum(BlockT *srcBlk) { - BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; - return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; +void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, + int SccNum) { + BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; + if (!srcBlkInfo) + srcBlkInfo = new BlockInformation(); + srcBlkInfo->SccNum = SccNum; } -template -void CFGStructurizer::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { - if (DEBUGME) { - errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; - } +void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { + DEBUG( + dbgs() << "Retiring BB" << MBB->getNumber() << "\n"; + ); - BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; + BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; - if (srcBlkInfo == NULL) { - srcBlkInfo = new BlockInfo(); - } + if (!SrcBlkInfo) + SrcBlkInfo = new BlockInformation(); - srcBlkInfo->isRetired = true; - assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 + SrcBlkInfo->IsRetired = true; + assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 && "can't retire block yet"); } -template -bool CFGStructurizer::isRetiredBlock(BlockT *srcBlk) { - BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; - return (srcBlkInfo && srcBlkInfo->isRetired); -} - -template -bool CFGStructurizer::isActiveLoophead(BlockT *curBlk) { - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - while (loopRep && loopRep->getHeader() == curBlk) { - LoopLandInfo *loopLand = getLoopLandInfo(loopRep); - - if(loopLand == NULL) - return true; - - BlockT *landBlk = loopLand->landBlk; - assert(landBlk); - if (!isRetiredBlock(landBlk)) { - return true; - } - - loopRep = loopRep->getParentLoop(); - } - - return false; -} //isActiveLoophead - -template -bool CFGStructurizer::needMigrateBlock(BlockT *blk) { - const unsigned blockSizeThreshold = 30; - const unsigned cloneInstrThreshold = 100; - - bool multiplePreds = blk && (blk->pred_size() > 1); - - if(!multiplePreds) - return false; - - unsigned blkSize = blk->size(); - return ((blkSize > blockSizeThreshold) - && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); -} //needMigrateBlock - -template -typename CFGStructurizer::BlockT * -CFGStructurizer::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, - BlockTSmallerVector &exitBlks, - std::set &exitBlkSet) { - SmallVector inpathBlks; //in exit path blocks - - for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), - predIterEnd = landBlk->pred_end(); - predIter != predIterEnd; ++predIter) { - BlockT *curBlk = *predIter; - if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { - inpathBlks.push_back(curBlk); - } - } //for - - //if landBlk has predecessors that are not in the given loop, - //create a new block - BlockT *newLandBlk = landBlk; - if (inpathBlks.size() != landBlk->pred_size()) { - newLandBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(newLandBlk); //insert to function - newLandBlk->addSuccessor(landBlk); - for (typename SmallVector::iterator iter = - inpathBlks.begin(), - iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { - BlockT *curBlk = *iter; - CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); - //srcBlk, oldBlk, newBlk - curBlk->removeSuccessor(landBlk); - curBlk->addSuccessor(newLandBlk); - } - for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { - if (exitBlks[i] == landBlk) { - exitBlks[i] = newLandBlk; - } - } - SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); - } - - setLoopLandBlock(loopRep, newLandBlk); - - return newLandBlk; -} // recordLoopbreakLand - -template -void CFGStructurizer::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - assert(theEntry->landBlk == NULL); - - if (blk == NULL) { - blk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(blk); //insert to function - SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); - } - - theEntry->landBlk = blk; - - if (DEBUGME) { - errs() << "setLoopLandBlock loop-header = BB" - << loopRep->getHeader()->getNumber() - << " landing-block = BB" << blk->getNumber() << "\n"; - } -} // setLoopLandBlock - -template -void CFGStructurizer::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - - theEntry->breakOnRegs.insert(regNum); - - if (DEBUGME) { - errs() << "addLoopBreakOnReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - } -} // addLoopBreakOnReg - -template -void CFGStructurizer::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->contOnRegs.insert(regNum); - - if (DEBUGME) { - errs() << "addLoopContOnReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - } -} // addLoopContOnReg - -template -void CFGStructurizer::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->breakInitRegs.insert(regNum); - - if (DEBUGME) { - errs() << "addLoopBreakInitReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - } -} // addLoopBreakInitReg - -template -void CFGStructurizer::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->contInitRegs.insert(regNum); - - if (DEBUGME) { - errs() << "addLoopContInitReg loop-header = BB" +void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep, + MachineBasicBlock *MBB) { + MachineBasicBlock *&TheEntry = LLInfoMap[loopRep]; + if (!MBB) { + MBB = FuncRep->CreateMachineBasicBlock(); + FuncRep->push_back(MBB); //insert to function + SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: "); + } + TheEntry = MBB; + DEBUG( + dbgs() << "setLoopLandBlock loop-header = BB" << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - } -} // addLoopContInitReg - -template -void CFGStructurizer::addLoopEndbranchInitReg(LoopT *loopRep, - RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->endbranchInitRegs.insert(regNum); - - if (DEBUGME) { - errs() << "addLoopEndbranchInitReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - } -} // addLoopEndbranchInitReg - -template -typename CFGStructurizer::LoopLandInfo * -CFGStructurizer::getLoopLandInfo(LoopT *loopRep) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - return theEntry; -} // getLoopLandInfo - -template -typename CFGStructurizer::BlockT * -CFGStructurizer::getLoopLandBlock(LoopT *loopRep) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - return theEntry ? theEntry->landBlk : NULL; -} // getLoopLandBlock - - -template -bool CFGStructurizer::hasBackEdge(BlockT *curBlk) { - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - if (loopRep == NULL) - return false; - - BlockT *loopHeader = loopRep->getHeader(); - - return curBlk->isSuccessor(loopHeader); - -} //hasBackEdge - -template -unsigned CFGStructurizer::getLoopDepth(LoopT *loopRep) { - return loopRep ? loopRep->getLoopDepth() : 0; -} //getLoopDepth - -template -int CFGStructurizer::countActiveBlock -(typename SmallVector::const_iterator iterStart, - typename SmallVector::const_iterator iterEnd) { - int count = 0; - while (iterStart != iterEnd) { - if (!isRetiredBlock(*iterStart)) { - ++count; - } - ++iterStart; - } - - return count; -} //countActiveBlock - -// This is work around solution for findNearestCommonDominator not avaiable to -// post dom a proper fix should go to Dominators.h. + << " landing-block = BB" << MBB->getNumber() << "\n"; + ); +} -template -typename CFGStructurizer::BlockT* -CFGStructurizer::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { +MachineBasicBlock * +AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2) { - if (postDomTree->dominates(blk1, blk2)) { - return blk1; - } - if (postDomTree->dominates(blk2, blk1)) { - return blk2; - } + if (PDT->dominates(MBB1, MBB2)) + return MBB1; + if (PDT->dominates(MBB2, MBB1)) + return MBB2; - DomTreeNodeT *node1 = postDomTree->getNode(blk1); - DomTreeNodeT *node2 = postDomTree->getNode(blk2); + MachineDomTreeNode *Node1 = PDT->getNode(MBB1); + MachineDomTreeNode *Node2 = PDT->getNode(MBB2); // Handle newly cloned node. - if (node1 == NULL && blk1->succ_size() == 1) { - return findNearestCommonPostDom(*blk1->succ_begin(), blk2); - } - if (node2 == NULL && blk2->succ_size() == 1) { - return findNearestCommonPostDom(blk1, *blk2->succ_begin()); - } + if (!Node1 && MBB1->succ_size() == 1) + return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2); + if (!Node2 && MBB2->succ_size() == 1) + return findNearestCommonPostDom(MBB1, *MBB2->succ_begin()); - if (node1 == NULL || node2 == NULL) { - return NULL; - } - - node1 = node1->getIDom(); - while (node1) { - if (postDomTree->dominates(node1, node2)) { - return node1->getBlock(); - } - node1 = node1->getIDom(); - } - - return NULL; -} - -template -typename CFGStructurizer::BlockT * -CFGStructurizer::findNearestCommonPostDom -(typename std::set &blks) { - BlockT *commonDom; - typename std::set::const_iterator iter = blks.begin(); - typename std::set::const_iterator iterEnd = blks.end(); - for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { - BlockT *curBlk = *iter; - if (curBlk != commonDom) { - commonDom = findNearestCommonPostDom(curBlk, commonDom); - } - } + if (!Node1 || !Node2) + return nullptr; - if (DEBUGME) { - errs() << "Common post dominator for exit blocks is "; - if (commonDom) { - errs() << "BB" << commonDom->getNumber() << "\n"; - } else { - errs() << "NULL\n"; - } + Node1 = Node1->getIDom(); + while (Node1) { + if (PDT->dominates(Node1, Node2)) + return Node1->getBlock(); + Node1 = Node1->getIDom(); } - return commonDom; -} //findNearestCommonPostDom - -} //end namespace llvm - -//todo: move-end - - -//===----------------------------------------------------------------------===// -// -// CFGStructurizer for AMDGPU -// -//===----------------------------------------------------------------------===// - - -using namespace llvmCFGStruct; - -namespace llvm { -class AMDGPUCFGStructurizer : public MachineFunctionPass { -public: - typedef MachineInstr InstructionType; - typedef MachineFunction FunctionType; - typedef MachineBasicBlock BlockType; - typedef MachineLoopInfo LoopinfoType; - typedef MachineDominatorTree DominatortreeType; - typedef MachinePostDominatorTree PostDominatortreeType; - typedef MachineDomTreeNode DomTreeNodeType; - typedef MachineLoop LoopType; - -protected: - TargetMachine &TM; - const TargetInstrInfo *TII; - const AMDGPURegisterInfo *TRI; - -public: - AMDGPUCFGStructurizer(char &pid, TargetMachine &tm); - const TargetInstrInfo *getTargetInstrInfo() const; - -private: - -}; - -} //end of namespace llvm -AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm) -: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()), - TRI(static_cast(tm.getRegisterInfo())) { + return nullptr; } -const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const { - return TII; +MachineBasicBlock * +AMDGPUCFGStructurizer::findNearestCommonPostDom( + std::set &MBBs) { + MachineBasicBlock *CommonDom; + std::set::const_iterator It = MBBs.begin(); + std::set::const_iterator E = MBBs.end(); + for (CommonDom = *It; It != E && CommonDom; ++It) { + MachineBasicBlock *MBB = *It; + if (MBB != CommonDom) + CommonDom = findNearestCommonPostDom(MBB, CommonDom); + } + + DEBUG( + dbgs() << "Common post dominator for exit blocks is "; + if (CommonDom) + dbgs() << "BB" << CommonDom->getNumber() << "\n"; + else + dbgs() << "NULL\n"; + ); + + return CommonDom; } -//===----------------------------------------------------------------------===// -// -// CFGPrepare -// -//===----------------------------------------------------------------------===// - -using namespace llvmCFGStruct; +char AMDGPUCFGStructurizer::ID = 0; -namespace llvm { -class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer { -public: - static char ID; - -public: - AMDGPUCFGPrepare(TargetMachine &tm); - - virtual const char *getPassName() const; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - - bool runOnMachineFunction(MachineFunction &F); - -private: - -}; - -char AMDGPUCFGPrepare::ID = 0; -} //end of namespace llvm +} // end anonymous namespace -AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm) - : AMDGPUCFGStructurizer(ID, tm ) { -} -const char *AMDGPUCFGPrepare::getPassName() const { - return "AMD IL Control Flow Graph Preparation Pass"; -} - -void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); -} -//===----------------------------------------------------------------------===// -// -// CFGPerform -// -//===----------------------------------------------------------------------===// - - -using namespace llvmCFGStruct; - -namespace llvm { -class AMDGPUCFGPerform : public AMDGPUCFGStructurizer { -public: - static char ID; - -public: - AMDGPUCFGPerform(TargetMachine &tm); - virtual const char *getPassName() const; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - bool runOnMachineFunction(MachineFunction &F); - -private: - -}; - -char AMDGPUCFGPerform::ID = 0; -} //end of namespace llvm - - AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm) -: AMDGPUCFGStructurizer(ID, tm) { -} - -const char *AMDGPUCFGPerform::getPassName() const { - return "AMD IL Control Flow Graph structurizer Pass"; -} - -void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); -} - -//===----------------------------------------------------------------------===// -// -// CFGStructTraits -// -//===----------------------------------------------------------------------===// - -namespace llvmCFGStruct { -// this class is tailor to the AMDGPU backend -template<> -struct CFGStructTraits { - typedef int RegiT; - - static int getBranchNzeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; - case AMDGPU::BRANCH_COND_i32: - case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; - default: - assert(0 && "internal error"); - } - return -1; - } - - static int getBranchZeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; - case AMDGPU::BRANCH_COND_i32: - case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; - default: - assert(0 && "internal error"); - } - return -1; - } - - static int getContinueNzeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; - default: - assert(0 && "internal error"); - }; - return -1; - } - - static int getContinueZeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; - default: - assert(0 && "internal error"); - } - return -1; - } - - static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { - return instr->getOperand(0).getMBB(); - } - - static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { - instr->getOperand(0).setMBB(blk); - } - - static MachineBasicBlock * - getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { - assert(blk->succ_size() == 2); - MachineBasicBlock *trueBranch = getTrueBranch(instr); - MachineBasicBlock::succ_iterator iter = blk->succ_begin(); - MachineBasicBlock::succ_iterator iterNext = iter; - ++iterNext; - - return (*iter == trueBranch) ? *iterNext : *iter; - } - - static bool isCondBranch(MachineInstr *instr) { - switch (instr->getOpcode()) { - case AMDGPU::JUMP: - return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0; - case AMDGPU::BRANCH_COND_i32: - case AMDGPU::BRANCH_COND_f32: - break; - default: - return false; - } - return true; - } - - static bool isUncondBranch(MachineInstr *instr) { - switch (instr->getOpcode()) { - case AMDGPU::JUMP: - return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0; - case AMDGPU::BRANCH: - return true; - default: - return false; - } - return true; - } - - static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { - //get DebugLoc from the first MachineBasicBlock instruction with debug info - DebugLoc DL; - for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { - MachineInstr *instr = &(*iter); - if (instr->getDebugLoc().isUnknown() == false) { - DL = instr->getDebugLoc(); - } - } - return DL; - } - - static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { - MachineBasicBlock::reverse_iterator iter = blk->rbegin(); - MachineInstr *instr = &*iter; - if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { - return instr; - } - return NULL; - } - - // The correct naming for this is getPossibleLoopendBlockBranchInstr. - // - // BB with backward-edge could have move instructions after the branch - // instruction. Such move instruction "belong to" the loop backward-edge. - // - static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { - const AMDGPUInstrInfo * TII = static_cast( - blk->getParent()->getTarget().getInstrInfo()); - - for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), - iterEnd = blk->rend(); iter != iterEnd; ++iter) { - // FIXME: Simplify - MachineInstr *instr = &*iter; - if (instr) { - if (isCondBranch(instr) || isUncondBranch(instr)) { - return instr; - } else if (!TII->isMov(instr->getOpcode())) { - break; - } - } - } - return NULL; - } - - static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { - MachineBasicBlock::reverse_iterator iter = blk->rbegin(); - if (iter != blk->rend()) { - MachineInstr *instr = &(*iter); - if (instr->getOpcode() == AMDGPU::RETURN) { - return instr; - } - } - return NULL; - } - - static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { - MachineBasicBlock::reverse_iterator iter = blk->rbegin(); - if (iter != blk->rend()) { - MachineInstr *instr = &(*iter); - if (instr->getOpcode() == AMDGPU::CONTINUE) { - return instr; - } - } - return NULL; - } - - static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { - for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { - MachineInstr *instr = &(*iter); - if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) { - return instr; - } - } - return NULL; - } - - static bool isReturnBlock(MachineBasicBlock *blk) { - MachineInstr *instr = getReturnInstr(blk); - bool isReturn = (blk->succ_size() == 0); - if (instr) { - assert(isReturn); - } else if (isReturn) { - if (DEBUGME) { - errs() << "BB" << blk->getNumber() - <<" is return block without RETURN instr\n"; - } - } - - return isReturn; - } - - static MachineBasicBlock::iterator - getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { - assert(instr->getParent() == blk && "instruction doesn't belong to block"); - MachineBasicBlock::iterator iter = blk->begin(); - MachineBasicBlock::iterator iterEnd = blk->end(); - while (&(*iter) != instr && iter != iterEnd) { - ++iter; - } - - assert(iter != iterEnd); - return iter; - }//getInstrPos - - static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep) { - return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); - } //insertInstrBefore - - static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep, DebugLoc DL) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineInstr *newInstr = - blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); - - MachineBasicBlock::iterator res; - if (blk->begin() != blk->end()) { - blk->insert(blk->begin(), newInstr); - } else { - blk->push_back(newInstr); - } - - SHOWNEWINSTR(newInstr); - - return newInstr; - } //insertInstrBefore - - static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep) { - insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); - } //insertInstrEnd - - static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep, DebugLoc DL) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineInstr *newInstr = blk->getParent() - ->CreateMachineInstr(tii->get(newOpcode), DL); - - blk->push_back(newInstr); - //assume the instruction doesn't take any reg operand ... - - SHOWNEWINSTR(newInstr); - } //insertInstrEnd - - static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, - int newOpcode, - AMDGPUCFGStructurizer *passRep) { - MachineInstr *oldInstr = &(*instrPos); - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineBasicBlock *blk = oldInstr->getParent(); - MachineInstr *newInstr = - blk->getParent()->CreateMachineInstr(tii->get(newOpcode), - DebugLoc()); - - blk->insert(instrPos, newInstr); - //assume the instruction doesn't take any reg operand ... - - SHOWNEWINSTR(newInstr); - return newInstr; - } //insertInstrBefore - - static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, - int newOpcode, - AMDGPUCFGStructurizer *passRep, - DebugLoc DL) { - MachineInstr *oldInstr = &(*instrPos); - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineBasicBlock *blk = oldInstr->getParent(); - MachineFunction *MF = blk->getParent(); - MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL); - - blk->insert(instrPos, newInstr); - MachineInstrBuilder MIB(*MF, newInstr); - MIB.addReg(oldInstr->getOperand(1).getReg(), false); - - SHOWNEWINSTR(newInstr); - //erase later oldInstr->eraseFromParent(); - } //insertCondBranchBefore - - static void insertCondBranchBefore(MachineBasicBlock *blk, - MachineBasicBlock::iterator insertPos, - int newOpcode, - AMDGPUCFGStructurizer *passRep, - RegiT regNum, - DebugLoc DL) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineFunction *MF = blk->getParent(); - - MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL); - - //insert before - blk->insert(insertPos, newInstr); - MachineInstrBuilder(*MF, newInstr).addReg(regNum, false); - - SHOWNEWINSTR(newInstr); - } //insertCondBranchBefore - - static void insertCondBranchEnd(MachineBasicBlock *blk, - int newOpcode, - AMDGPUCFGStructurizer *passRep, - RegiT regNum) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineFunction *MF = blk->getParent(); - MachineInstr *newInstr = - MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); - - blk->push_back(newInstr); - MachineInstrBuilder(*MF, newInstr).addReg(regNum, false); - - SHOWNEWINSTR(newInstr); - } //insertCondBranchEnd - - - static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, - AMDGPUCFGStructurizer *passRep, - RegiT regNum, int regVal) { - MachineInstr *oldInstr = &(*instrPos); - const AMDGPUInstrInfo *tii = - static_cast(passRep->getTargetInstrInfo()); - MachineBasicBlock *blk = oldInstr->getParent(); - MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, - regVal); - blk->insert(instrPos, newInstr); - - SHOWNEWINSTR(newInstr); - } //insertAssignInstrBefore - - static void insertAssignInstrBefore(MachineBasicBlock *blk, - AMDGPUCFGStructurizer *passRep, - RegiT regNum, int regVal) { - const AMDGPUInstrInfo *tii = - static_cast(passRep->getTargetInstrInfo()); - - MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, - regVal); - if (blk->begin() != blk->end()) { - blk->insert(blk->begin(), newInstr); - } else { - blk->push_back(newInstr); - } - - SHOWNEWINSTR(newInstr); - - } //insertInstrBefore - - static void insertCompareInstrBefore(MachineBasicBlock *blk, - MachineBasicBlock::iterator instrPos, - AMDGPUCFGStructurizer *passRep, - RegiT dstReg, RegiT src1Reg, - RegiT src2Reg) { - const AMDGPUInstrInfo *tii = - static_cast(passRep->getTargetInstrInfo()); - MachineFunction *MF = blk->getParent(); - MachineInstr *newInstr = - MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc()); - - MachineInstrBuilder MIB(*MF, newInstr); - MIB.addReg(dstReg, RegState::Define); //set target - MIB.addReg(src1Reg); //set src value - MIB.addReg(src2Reg); //set src value - - blk->insert(instrPos, newInstr); - SHOWNEWINSTR(newInstr); - - } //insertCompareInstrBefore - - static void cloneSuccessorList(MachineBasicBlock *dstBlk, - MachineBasicBlock *srcBlk) { - for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), - iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { - dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of - } - } //cloneSuccessorList - - static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { - MachineFunction *func = srcBlk->getParent(); - MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); - func->push_back(newBlk); //insert to function - for (MachineBasicBlock::iterator iter = srcBlk->begin(), - iterEnd = srcBlk->end(); - iter != iterEnd; ++iter) { - MachineInstr *instr = func->CloneMachineInstr(iter); - newBlk->push_back(instr); - } - return newBlk; - } - - //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because - //the AMDGPU instruction is not recognized as terminator fix this and retire - //this routine - static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, - MachineBasicBlock *oldBlk, - MachineBasicBlock *newBlk) { - MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); - if (branchInstr && isCondBranch(branchInstr) && - getTrueBranch(branchInstr) == oldBlk) { - setTrueBranch(branchInstr, newBlk); - } - } - - static void wrapup(MachineBasicBlock *entryBlk) { - assert((!entryBlk->getParent()->getJumpTableInfo() - || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) - && "found a jump table"); - - //collect continue right before endloop - SmallVector contInstr; - MachineBasicBlock::iterator pre = entryBlk->begin(); - MachineBasicBlock::iterator iterEnd = entryBlk->end(); - MachineBasicBlock::iterator iter = pre; - while (iter != iterEnd) { - if (pre->getOpcode() == AMDGPU::CONTINUE - && iter->getOpcode() == AMDGPU::ENDLOOP) { - contInstr.push_back(pre); - } - pre = iter; - ++iter; - } //end while - - //delete continue right before endloop - for (unsigned i = 0; i < contInstr.size(); ++i) { - contInstr[i]->eraseFromParent(); - } - - // TODO to fix up jump table so later phase won't be confused. if - // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but - // there isn't such an interface yet. alternatively, replace all the other - // blocks in the jump table with the entryBlk //} - - } //wrapup - - static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) { - return &pass.getAnalysis(); - } - - static MachinePostDominatorTree* - getPostDominatorTree(AMDGPUCFGStructurizer &pass) { - return &pass.getAnalysis(); - } - - static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) { - return &pass.getAnalysis(); - } -}; // template class CFGStructTraits -} //end of namespace llvm - -// createAMDGPUCFGPreparationPass- Returns a pass -FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm - ) { - return new AMDGPUCFGPrepare(tm ); -} - -bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) { - return llvmCFGStruct::CFGStructurizer().prepare(func, - *this, - TRI); -} - -// createAMDGPUCFGStructurizerPass- Returns a pass -FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm - ) { - return new AMDGPUCFGPerform(tm ); -} +INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", + "AMDGPU CFG Structurizer", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", + "AMDGPU CFG Structurizer", false, false) -bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) { - return llvmCFGStruct::CFGStructurizer().run(func, - *this, - TRI); +FunctionPass *llvm::createAMDGPUCFGStructurizerPass() { + return new AMDGPUCFGStructurizer(); }