R600: Replace legacy debug code in AMDILCFGStructurizer.cpp
[oota-llvm.git] / lib / Target / R600 / AMDILCFGStructurizer.cpp
1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //==-----------------------------------------------------------------------===//
10
11 #define DEBUG_TYPE "structcfg"
12
13 #include "AMDGPU.h"
14 #include "AMDGPUInstrInfo.h"
15 #include "llvm/Support/Debug.h"
16 #include "llvm/Support/raw_ostream.h"
17 #include "llvm/ADT/SCCIterator.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/DominatorInternals.h"
21 #include "llvm/Analysis/Dominators.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineJumpTableInfo.h"
28 #include "llvm/CodeGen/MachineLoopInfo.h"
29 #include "llvm/CodeGen/MachinePostDominators.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33
34 using namespace llvm;
35
36 #define DEFAULT_VEC_SLOTS 8
37
38 // TODO: move-begin.
39
40 //===----------------------------------------------------------------------===//
41 //
42 // Statistics for CFGStructurizer.
43 //
44 //===----------------------------------------------------------------------===//
45
46 STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
47     "matched");
48 STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
49     "matched");
50 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
51     "pattern matched");
52 STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
53     "pattern matched");
54 STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
55     "matched");
56 STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
57 STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
58
59 //===----------------------------------------------------------------------===//
60 //
61 // Miscellaneous utility for CFGStructurizer.
62 //
63 //===----------------------------------------------------------------------===//
64 namespace {
65 #define SHOWNEWINSTR(i) \
66   DEBUG(dbgs() << "New instr: " << *i << "\n");
67
68 #define SHOWNEWBLK(b, msg) \
69 DEBUG( \
70   dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
71   dbgs() << "\n"; \
72 );
73
74 #define SHOWBLK_DETAIL(b, msg) \
75 DEBUG( \
76   if (b) { \
77   dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
78   b->print(dbgs()); \
79   dbgs() << "\n"; \
80   } \
81 );
82
83 #define INVALIDSCCNUM -1
84 #define INVALIDREGNUM 0
85
86 template<class LoopinfoT>
87 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
88   for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
89        iterEnd = LoopInfo.end();
90        iter != iterEnd; ++iter) {
91     (*iter)->print(OS, 0);
92   }
93 }
94
95 template<class NodeT>
96 void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
97   size_t sz = Src.size();
98   for (size_t i = 0; i < sz/2; ++i) {
99     NodeT *t = Src[i];
100     Src[i] = Src[sz - i - 1];
101     Src[sz - i - 1] = t;
102   }
103 }
104
105 } // end anonymous namespace
106
107 //===----------------------------------------------------------------------===//
108 //
109 // supporting data structure for CFGStructurizer
110 //
111 //===----------------------------------------------------------------------===//
112
113 namespace {
114 template<class PassT>
115 struct CFGStructTraits {
116 };
117
118 template <class InstrT>
119 class BlockInformation {
120 public:
121   bool isRetired;
122   int  sccNum;
123   //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
124   //Instructions defining the corresponding successor.
125   BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
126 };
127
128 template <class BlockT, class InstrT, class RegiT>
129 class LandInformation {
130 public:
131   BlockT *landBlk;
132   std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
133                                   //WHILELOOP(thisloop) init before entering
134                                   //thisloop.
135   std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
136                                   //WHILELOOP(thisloop) init after entering
137                                   //thisloop.
138   std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
139                                      //land block, branch cond on this reg.
140   std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
141                                      //endif" after ENDLOOP(thisloop) break
142                                      //outerLoopOf(thisLoop).
143   std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
144                                     //endif" after ENDLOOP(thisloop) continue on
145                                     //outerLoopOf(thisLoop).
146   LandInformation() : landBlk(NULL) {}
147 };
148
149 } // end anonymous namespace
150
151 //===----------------------------------------------------------------------===//
152 //
153 // CFGStructurizer
154 //
155 //===----------------------------------------------------------------------===//
156
157 namespace {
158 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
159 template<class PassT>
160 class  CFGStructurizer {
161 public:
162   typedef enum {
163     Not_SinglePath = 0,
164     SinglePath_InPath = 1,
165     SinglePath_NotInPath = 2
166   } PathToKind;
167
168 public:
169   typedef typename PassT::InstructionType         InstrT;
170   typedef typename PassT::FunctionType            FuncT;
171   typedef typename PassT::DominatortreeType       DomTreeT;
172   typedef typename PassT::PostDominatortreeType   PostDomTreeT;
173   typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
174   typedef typename PassT::LoopinfoType            LoopInfoT;
175
176   typedef GraphTraits<FuncT *>                    FuncGTraits;
177   //typedef FuncGTraits::nodes_iterator BlockIterator;
178   typedef typename FuncT::iterator                BlockIterator;
179
180   typedef typename FuncGTraits::NodeType          BlockT;
181   typedef GraphTraits<BlockT *>                   BlockGTraits;
182   typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
183   //typedef BlockGTraits::succ_iterator InstructionIterator;
184   typedef typename BlockT::iterator               InstrIterator;
185
186   typedef CFGStructTraits<PassT>                  CFGTraits;
187   typedef BlockInformation<InstrT>                BlockInfo;
188   typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
189
190   typedef int                                     RegiT;
191   typedef typename PassT::LoopType                LoopT;
192   typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
193         typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
194         //landing info for loop break
195   typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
196
197 public:
198   CFGStructurizer();
199   ~CFGStructurizer();
200
201   /// Perform the CFG structurization
202   bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
203
204   /// Perform the CFG preparation
205   bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
206
207 private:
208   void reversePredicateSetter(typename BlockT::iterator);
209   void   orderBlocks();
210   void   printOrderedBlocks(llvm::raw_ostream &OS);
211   int patternMatch(BlockT *CurBlock);
212   int patternMatchGroup(BlockT *CurBlock);
213
214   int serialPatternMatch(BlockT *CurBlock);
215   int ifPatternMatch(BlockT *CurBlock);
216   int switchPatternMatch(BlockT *CurBlock);
217   int loopendPatternMatch(BlockT *CurBlock);
218   int loopPatternMatch(BlockT *CurBlock);
219
220   int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
221   int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
222   //int loopWithoutBreak(BlockT *);
223
224   void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
225                         BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
226   void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
227                            BlockT *ContBlock, LoopT *contLoop);
228   bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
229   int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
230                        BlockT *FalseBlock);
231   int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
232                           BlockT *FalseBlock);
233   int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
234                               BlockT *FalseBlock, BlockT **LandBlockPtr);
235   void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
236                                    BlockT *FalseBlock, BlockT *LandBlock,
237                                    bool Detail = false);
238   PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
239                           bool AllowSideEntry = true);
240   BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
241                         bool AllowSideEntry = true);
242   int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
243   void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
244
245   void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
246                             BlockT *TrueBlock, BlockT *FalseBlock,
247                             BlockT *LandBlock);
248   void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
249   void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
250                            BlockT *ExitLandBlock, RegiT SetReg);
251   void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
252                            RegiT SetReg);
253   BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
254                                 std::set<BlockT*> &ExitBlockSet,
255                                 BlockT *ExitLandBlk);
256   BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
257                                 BlockTSmallerVector &ExitingBlocks,
258                                 BlockTSmallerVector &ExitBlocks);
259   BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
260   void removeUnconditionalBranch(BlockT *SrcBlock);
261   void removeRedundantConditionalBranch(BlockT *SrcBlock);
262   void addDummyExitBlock(SmallVectorImpl<BlockT *> &RetBlocks);
263
264   void removeSuccessor(BlockT *SrcBlock);
265   BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
266   BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
267
268   void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
269                           InstrIterator InsertPos);
270
271   void recordSccnum(BlockT *SrcBlock, int SCCNum);
272   int getSCCNum(BlockT *srcBlk);
273
274   void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
275   bool isRetiredBlock(BlockT *SrcBlock);
276   bool isActiveLoophead(BlockT *CurBlock);
277   bool needMigrateBlock(BlockT *Block);
278
279   BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
280                               BlockTSmallerVector &exitBlocks,
281                               std::set<BlockT*> &ExitBlockSet);
282   void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
283   BlockT *getLoopLandBlock(LoopT *LoopRep);
284   LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
285
286   void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
287   void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
288   void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
289   void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
290   void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
291
292   bool hasBackEdge(BlockT *curBlock);
293   unsigned getLoopDepth  (LoopT *LoopRep);
294   int countActiveBlock(
295     typename SmallVectorImpl<BlockT *>::const_iterator IterStart,
296     typename SmallVectorImpl<BlockT *>::const_iterator IterEnd);
297     BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
298   BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
299
300 private:
301   DomTreeT *domTree;
302   PostDomTreeT *postDomTree;
303   LoopInfoT *loopInfo;
304   PassT *passRep;
305   FuncT *funcRep;
306
307   BlockInfoMap blockInfoMap;
308   LoopLandInfoMap loopLandInfoMap;
309   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
310   const AMDGPURegisterInfo *TRI;
311
312 };  //template class CFGStructurizer
313
314 template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
315   : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
316 }
317
318 template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
319   for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
320        E = blockInfoMap.end(); I != E; ++I) {
321     delete I->second;
322   }
323 }
324
325 template<class PassT>
326 bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
327                                      const AMDGPURegisterInfo * tri) {
328   passRep = &pass;
329   funcRep = &func;
330   TRI = tri;
331
332   bool changed = false;
333
334   //FIXME: if not reducible flow graph, make it so ???
335
336   DEBUG(
337         dbgs() << "AMDGPUCFGStructurizer::prepare\n";
338   );
339
340   loopInfo = CFGTraits::getLoopInfo(pass);
341   DEBUG(
342     dbgs() << "LoopInfo:\n";
343     PrintLoopinfo(*loopInfo, dbgs());
344   );
345
346   orderBlocks();
347   DEBUG(
348     for (typename SmallVectorImpl<BlockT *>::const_iterator
349         iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
350         iterBlk != iterBlkEnd;
351         ++iterBlk) {
352       (*iterBlk)->dump();
353     }
354     dbgs() << "Ordered blocks:\n";
355     printOrderedBlocks(dbgs());
356   );
357
358   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
359
360   for (typename LoopInfoT::iterator iter = loopInfo->begin(),
361        iterEnd = loopInfo->end();
362        iter != iterEnd; ++iter) {
363     LoopT* loopRep = (*iter);
364     BlockTSmallerVector exitingBlks;
365     loopRep->getExitingBlocks(exitingBlks);
366     
367     if (exitingBlks.size() == 0) {
368       BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
369       if (dummyExitBlk != NULL)
370         retBlks.push_back(dummyExitBlk);
371     }
372   }
373
374   // Remove unconditional branch instr.
375   // Add dummy exit block iff there are multiple returns.
376
377   for (typename SmallVectorImpl<BlockT *>::const_iterator
378        iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
379        iterBlk != iterEndBlk;
380        ++iterBlk) {
381     BlockT *curBlk = *iterBlk;
382     removeUnconditionalBranch(curBlk);
383     removeRedundantConditionalBranch(curBlk);
384     if (CFGTraits::isReturnBlock(curBlk)) {
385       retBlks.push_back(curBlk);
386     }
387     assert(curBlk->succ_size() <= 2);
388   } //for
389
390   if (retBlks.size() >= 2) {
391     addDummyExitBlock(retBlks);
392     changed = true;
393   }
394
395   return changed;
396 } //CFGStructurizer::prepare
397
398 template<class PassT>
399 bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
400     const AMDGPURegisterInfo * tri) {
401   passRep = &pass;
402   funcRep = &func;
403   TRI = tri;
404
405   //Assume reducible CFG...
406   DEBUG(
407     dbgs() << "AMDGPUCFGStructurizer::run\n";
408     func.viewCFG();
409   );
410
411   domTree = CFGTraits::getDominatorTree(pass);
412   DEBUG(
413     domTree->print(dbgs(), (const llvm::Module*)0);
414   );
415
416   postDomTree = CFGTraits::getPostDominatorTree(pass);
417   DEBUG(
418     postDomTree->print(dbgs());
419   );
420
421   loopInfo = CFGTraits::getLoopInfo(pass);
422   DEBUG(
423     dbgs() << "LoopInfo:\n";
424     PrintLoopinfo(*loopInfo, dbgs());
425   );
426
427   orderBlocks();
428 #ifdef STRESSTEST
429   //Use the worse block ordering to test the algorithm.
430   ReverseVector(orderedBlks);
431 #endif
432
433   DEBUG(
434     dbgs() << "Ordered blocks:\n";
435     printOrderedBlocks(dbgs());
436   );
437   int numIter = 0;
438   bool finish = false;
439   BlockT *curBlk;
440   bool makeProgress = false;
441   int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
442                                         orderedBlks.end());
443
444   do {
445     ++numIter;
446     DEBUG(
447       dbgs() << "numIter = " << numIter
448              << ", numRemaintedBlk = " << numRemainedBlk << "\n";
449     );
450
451     typename SmallVectorImpl<BlockT *>::const_iterator
452       iterBlk = orderedBlks.begin();
453     typename SmallVectorImpl<BlockT *>::const_iterator
454       iterBlkEnd = orderedBlks.end();
455
456     typename SmallVectorImpl<BlockT *>::const_iterator
457       sccBeginIter = iterBlk;
458     BlockT *sccBeginBlk = NULL;
459     int sccNumBlk = 0;  // The number of active blocks, init to a
460                         // maximum possible number.
461     int sccNumIter;     // Number of iteration in this SCC.
462
463     while (iterBlk != iterBlkEnd) {
464       curBlk = *iterBlk;
465
466       if (sccBeginBlk == NULL) {
467         sccBeginIter = iterBlk;
468         sccBeginBlk = curBlk;
469         sccNumIter = 0;
470         sccNumBlk = numRemainedBlk; // Init to maximum possible number.
471         DEBUG(
472               dbgs() << "start processing SCC" << getSCCNum(sccBeginBlk);
473               dbgs() << "\n";
474         );
475       }
476
477       if (!isRetiredBlock(curBlk)) {
478         patternMatch(curBlk);
479       }
480
481       ++iterBlk;
482
483       bool contNextScc = true;
484       if (iterBlk == iterBlkEnd
485           || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
486         // Just finish one scc.
487         ++sccNumIter;
488         int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
489         if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
490           DEBUG(
491             dbgs() << "Can't reduce SCC " << getSCCNum(curBlk)
492                    << ", sccNumIter = " << sccNumIter;
493             dbgs() << "doesn't make any progress\n";
494           );
495           contNextScc = true;
496         } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
497           sccNumBlk = sccRemainedNumBlk;
498           iterBlk = sccBeginIter;
499           contNextScc = false;
500           DEBUG(
501             dbgs() << "repeat processing SCC" << getSCCNum(curBlk)
502                    << "sccNumIter = " << sccNumIter << "\n";
503             func.viewCFG();
504           );
505         } else {
506           // Finish the current scc.
507           contNextScc = true;
508         }
509       } else {
510         // Continue on next component in the current scc.
511         contNextScc = false;
512       }
513
514       if (contNextScc) {
515         sccBeginBlk = NULL;
516       }
517     } //while, "one iteration" over the function.
518
519     BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
520     if (entryBlk->succ_size() == 0) {
521       finish = true;
522       DEBUG(
523         dbgs() << "Reduce to one block\n";
524       );
525     } else {
526       int newnumRemainedBlk
527         = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
528       // consider cloned blocks ??
529       if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
530         makeProgress = true;
531         numRemainedBlk = newnumRemainedBlk;
532       } else {
533         makeProgress = false;
534         DEBUG(
535           dbgs() << "No progress\n";
536         );
537       }
538     }
539   } while (!finish && makeProgress);
540
541   // Misc wrap up to maintain the consistency of the Function representation.
542   CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
543
544   // Detach retired Block, release memory.
545   for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
546        iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
547     if ((*iterMap).second && (*iterMap).second->isRetired) {
548       assert(((*iterMap).first)->getNumber() != -1);
549       DEBUG(
550         dbgs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
551       );
552       (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
553     }
554     delete (*iterMap).second;
555   }
556   blockInfoMap.clear();
557
558   // clear loopLandInfoMap
559   for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
560        iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
561     delete (*iterMap).second;
562   }
563   loopLandInfoMap.clear();
564
565   DEBUG(
566     func.viewCFG();
567   );
568
569   if (!finish) {
570     llvm_unreachable("IRREDUCIBL_CF");
571   }
572
573   return true;
574 } //CFGStructurizer::run
575
576 /// Print the ordered Blocks.
577 ///
578 template<class PassT>
579 void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
580   size_t i = 0;
581   for (typename SmallVectorImpl<BlockT *>::const_iterator
582       iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
583        iterBlk != iterBlkEnd;
584        ++iterBlk, ++i) {
585     os << "BB" << (*iterBlk)->getNumber();
586     os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
587     if (i != 0 && i % 10 == 0) {
588       os << "\n";
589     } else {
590       os << " ";
591     }
592   }
593 } //printOrderedBlocks
594
595 /// Compute the reversed DFS post order of Blocks
596 ///
597 template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
598   int sccNum = 0;
599   BlockT *bb;
600   for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
601        sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
602     std::vector<BlockT *> &sccNext = *sccIter;
603     for (typename std::vector<BlockT *>::const_iterator
604          blockIter = sccNext.begin(), blockEnd = sccNext.end();
605          blockIter != blockEnd; ++blockIter) {
606       bb = *blockIter;
607       orderedBlks.push_back(bb);
608       recordSccnum(bb, sccNum);
609     }
610   }
611
612   //walk through all the block in func to check for unreachable
613   for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
614        blockEnd1 = FuncGTraits::nodes_end(funcRep);
615        blockIter1 != blockEnd1; ++blockIter1) {
616     BlockT *bb = &(*blockIter1);
617     sccNum = getSCCNum(bb);
618     if (sccNum == INVALIDSCCNUM) {
619       dbgs() << "unreachable block BB" << bb->getNumber() << "\n";
620     }
621   }
622 } //orderBlocks
623
624 template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
625   int numMatch = 0;
626   int curMatch;
627
628   DEBUG(
629         dbgs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
630   );
631
632   while ((curMatch = patternMatchGroup(curBlk)) > 0) {
633     numMatch += curMatch;
634   }
635
636   DEBUG(
637         dbgs() << "End patternMatch BB" << curBlk->getNumber()
638       << ", numMatch = " << numMatch << "\n";
639   );
640
641   return numMatch;
642 } //patternMatch
643
644 template<class PassT>
645 int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
646   int numMatch = 0;
647   numMatch += serialPatternMatch(curBlk);
648   numMatch += ifPatternMatch(curBlk);
649   numMatch += loopendPatternMatch(curBlk);
650   numMatch += loopPatternMatch(curBlk);
651   return numMatch;
652 }//patternMatchGroup
653
654 template<class PassT>
655 int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
656   if (curBlk->succ_size() != 1) {
657     return 0;
658   }
659
660   BlockT *childBlk = *curBlk->succ_begin();
661   if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
662     return 0;
663   }
664
665   mergeSerialBlock(curBlk, childBlk);
666   ++numSerialPatternMatch;
667   return 1;
668 } //serialPatternMatch
669
670 template<class PassT>
671 int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
672   //two edges
673   if (curBlk->succ_size() != 2) {
674     return 0;
675   }
676
677   if (hasBackEdge(curBlk)) {
678     return 0;
679   }
680
681   InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
682   if (branchInstr == NULL) {
683     return 0;
684   }
685
686   assert(CFGTraits::isCondBranch(branchInstr));
687
688   BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
689   BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
690   BlockT *landBlk;
691   int cloned = 0;
692
693   // TODO: Simplify
694   if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
695     && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
696     landBlk = *trueBlk->succ_begin();
697   } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
698     landBlk = NULL;
699   } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
700     landBlk = falseBlk;
701     falseBlk = NULL;
702   } else if (falseBlk->succ_size() == 1
703              && *falseBlk->succ_begin() == trueBlk) {
704     landBlk = trueBlk;
705     trueBlk = NULL;
706   } else if (falseBlk->succ_size() == 1
707              && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
708     landBlk = *falseBlk->succ_begin();
709   } else if (trueBlk->succ_size() == 1
710     && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
711     landBlk = *trueBlk->succ_begin();
712   } else {
713     return handleJumpintoIf(curBlk, trueBlk, falseBlk);
714   }
715
716   // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
717   // new BB created for landBlk==NULL may introduce new challenge to the
718   // reduction process.
719   if (landBlk != NULL &&
720       ((trueBlk && trueBlk->pred_size() > 1)
721       || (falseBlk && falseBlk->pred_size() > 1))) {
722      cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
723   }
724
725   if (trueBlk && trueBlk->pred_size() > 1) {
726     trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
727     ++cloned;
728   }
729
730   if (falseBlk && falseBlk->pred_size() > 1) {
731     falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
732     ++cloned;
733   }
734
735   mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
736
737   ++numIfPatternMatch;
738
739   numClonedBlock += cloned;
740
741   return 1 + cloned;
742 } //ifPatternMatch
743
744 template<class PassT>
745 int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
746   return 0;
747 } //switchPatternMatch
748
749 template<class PassT>
750 int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
751   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
752   typename std::vector<LoopT *> nestedLoops;
753   while (loopRep) {
754     nestedLoops.push_back(loopRep);
755     loopRep = loopRep->getParentLoop();
756   }
757
758   if (nestedLoops.size() == 0) {
759     return 0;
760   }
761
762   // Process nested loop outside->inside, so "continue" to a outside loop won't
763   // be mistaken as "break" of the current loop.
764   int num = 0;
765   for (typename std::vector<LoopT *>::reverse_iterator
766        iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
767        iter != iterEnd; ++iter) {
768     loopRep = *iter;
769
770     if (getLoopLandBlock(loopRep) != NULL) {
771       continue;
772     }
773
774     BlockT *loopHeader = loopRep->getHeader();
775
776     int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
777
778     if (numBreak == -1) {
779       break;
780     }
781
782     int numCont = loopcontPatternMatch(loopRep, loopHeader);
783     num += numBreak + numCont;
784   }
785
786   return num;
787 } //loopendPatternMatch
788
789 template<class PassT>
790 int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
791   if (curBlk->succ_size() != 0) {
792     return 0;
793   }
794
795   int numLoop = 0;
796   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
797   while (loopRep && loopRep->getHeader() == curBlk) {
798     LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
799     if (loopLand) {
800       BlockT *landBlk = loopLand->landBlk;
801       assert(landBlk);
802       if (!isRetiredBlock(landBlk)) {
803         mergeLooplandBlock(curBlk, loopLand);
804         ++numLoop;
805       }
806     }
807     loopRep = loopRep->getParentLoop();
808   }
809
810   numLoopPatternMatch += numLoop;
811
812   return numLoop;
813 } //loopPatternMatch
814
815 template<class PassT>
816 int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
817                                                   BlockT *loopHeader) {
818   BlockTSmallerVector exitingBlks;
819   loopRep->getExitingBlocks(exitingBlks);
820
821   DEBUG(
822     dbgs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
823   );
824
825   if (exitingBlks.size() == 0) {
826     setLoopLandBlock(loopRep);
827     return 0;
828   }
829
830   // Compute the corresponding exitBlks and exit block set.
831   BlockTSmallerVector exitBlks;
832   std::set<BlockT *> exitBlkSet;
833   for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
834        iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
835     BlockT *exitingBlk = *iter;
836     BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
837     exitBlks.push_back(exitBlk);
838     exitBlkSet.insert(exitBlk);  //non-duplicate insert
839   }
840
841   assert(exitBlkSet.size() > 0);
842   assert(exitBlks.size() == exitingBlks.size());
843
844   DEBUG(
845     dbgs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
846   );
847
848   // Find exitLandBlk.
849   BlockT *exitLandBlk = NULL;
850   int numCloned = 0;
851   int numSerial = 0;
852
853   if (exitBlkSet.size() == 1) {
854     exitLandBlk = *exitBlkSet.begin();
855   } else {
856     exitLandBlk = findNearestCommonPostDom(exitBlkSet);
857
858     if (exitLandBlk == NULL) {
859       return -1;
860     }
861
862     bool allInPath = true;
863     bool allNotInPath = true;
864     for (typename std::set<BlockT*>::const_iterator
865          iter = exitBlkSet.begin(),
866          iterEnd = exitBlkSet.end();
867          iter != iterEnd; ++iter) {
868       BlockT *exitBlk = *iter;
869
870       PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
871       DEBUG(
872         dbgs() << "BB" << exitBlk->getNumber()
873                << " to BB" << exitLandBlk->getNumber() << " PathToKind="
874                << pathKind << "\n";
875       );
876
877       allInPath = allInPath && (pathKind == SinglePath_InPath);
878       allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
879
880       if (!allInPath && !allNotInPath) {
881         DEBUG(
882               dbgs() << "singlePath check fail\n";
883         );
884         return -1;
885       }
886     } // check all exit blocks
887
888     if (allNotInPath) {
889
890       // TODO: Simplify, maybe separate function?
891       LoopT *parentLoopRep = loopRep->getParentLoop();
892       BlockT *parentLoopHeader = NULL;
893       if (parentLoopRep)
894         parentLoopHeader = parentLoopRep->getHeader();
895
896       if (exitLandBlk == parentLoopHeader &&
897           (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
898                                                loopRep,
899                                                exitBlkSet,
900                                                exitLandBlk)) != NULL) {
901         DEBUG(
902           dbgs() << "relocateLoopcontBlock success\n";
903         );
904       } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
905                                                       exitingBlks,
906                                                       exitBlks)) != NULL) {
907         DEBUG(
908           dbgs() << "insertEndbranchBlock success\n";
909         );
910       } else {
911         DEBUG(
912           dbgs() << "loop exit fail\n";
913         );
914         return -1;
915       }
916     }
917
918     // Handle side entry to exit path.
919     exitBlks.clear();
920     exitBlkSet.clear();
921     for (typename BlockTSmallerVector::iterator iterExiting =
922            exitingBlks.begin(),
923          iterExitingEnd = exitingBlks.end();
924          iterExiting != iterExitingEnd; ++iterExiting) {
925       BlockT *exitingBlk = *iterExiting;
926       BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
927       BlockT *newExitBlk = exitBlk;
928
929       if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
930         newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
931         ++numCloned;
932       }
933
934       numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
935
936       exitBlks.push_back(newExitBlk);
937       exitBlkSet.insert(newExitBlk);
938     }
939
940     for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
941          iterExitEnd = exitBlks.end();
942          iterExit != iterExitEnd; ++iterExit) {
943       BlockT *exitBlk = *iterExit;
944       numSerial += serialPatternMatch(exitBlk);
945     }
946
947     for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
948          iterExitEnd = exitBlks.end();
949          iterExit != iterExitEnd; ++iterExit) {
950       BlockT *exitBlk = *iterExit;
951       if (exitBlk->pred_size() > 1) {
952         if (exitBlk != exitLandBlk) {
953           return -1;
954         }
955       } else {
956         if (exitBlk != exitLandBlk &&
957             (exitBlk->succ_size() != 1 ||
958             *exitBlk->succ_begin() != exitLandBlk)) {
959           return -1;
960         }
961       }
962     }
963   } // else
964
965   exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
966
967   // Fold break into the breaking block. Leverage across level breaks.
968   assert(exitingBlks.size() == exitBlks.size());
969   for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
970        iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
971        iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
972     BlockT *exitBlk = *iterExit;
973     BlockT *exitingBlk = *iterExiting;
974     assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
975     LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
976     handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
977   }
978
979   int numBreak = static_cast<int>(exitingBlks.size());
980   numLoopbreakPatternMatch += numBreak;
981   numClonedBlock += numCloned;
982   return numBreak + numSerial + numCloned;
983 } //loopbreakPatternMatch
984
985 template<class PassT>
986 int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
987                                                  BlockT *loopHeader) {
988   int numCont = 0;
989   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
990   for (typename InvBlockGTraits::ChildIteratorType iter =
991        InvBlockGTraits::child_begin(loopHeader),
992        iterEnd = InvBlockGTraits::child_end(loopHeader);
993        iter != iterEnd; ++iter) {
994     BlockT *curBlk = *iter;
995     if (loopRep->contains(curBlk)) {
996       handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
997                           loopHeader, loopRep);
998       contBlk.push_back(curBlk);
999       ++numCont;
1000     }
1001   }
1002
1003   for (typename SmallVectorImpl<BlockT *>::iterator
1004        iter = contBlk.begin(), iterEnd = contBlk.end();
1005        iter != iterEnd; ++iter) {
1006     (*iter)->removeSuccessor(loopHeader);
1007   }
1008
1009   numLoopcontPatternMatch += numCont;
1010
1011   return numCont;
1012 } //loopcontPatternMatch
1013
1014
1015 template<class PassT>
1016 bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1017                                                          BlockT *src2Blk) {
1018   // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1019   // same loop with LoopLandInfo without explicitly keeping track of
1020   // loopContBlks and loopBreakBlks, this is a method to get the information.
1021   //
1022   if (src1Blk->succ_size() == 0) {
1023     LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1024     if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1025       LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1026       if (theEntry != NULL) {
1027         DEBUG(
1028           dbgs() << "isLoopContBreakBlock yes src1 = BB"
1029                  << src1Blk->getNumber()
1030                  << " src2 = BB" << src2Blk->getNumber() << "\n";
1031         );
1032         return true;
1033       }
1034     }
1035   }
1036   return false;
1037 }  //isSameloopDetachedContbreak
1038
1039 template<class PassT>
1040 int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1041                                              BlockT *trueBlk,
1042                                              BlockT *falseBlk) {
1043   int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1044   if (num == 0) {
1045     DEBUG(
1046       dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1047     );
1048     num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1049   }
1050   return num;
1051 }
1052
1053 template<class PassT>
1054 int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1055                                                 BlockT *trueBlk,
1056                                                 BlockT *falseBlk) {
1057   int num = 0;
1058   BlockT *downBlk;
1059
1060   //trueBlk could be the common post dominator
1061   downBlk = trueBlk;
1062
1063   DEBUG(
1064     dbgs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1065            << " true = BB" << trueBlk->getNumber()
1066            << ", numSucc=" << trueBlk->succ_size()
1067            << " false = BB" << falseBlk->getNumber() << "\n";
1068   );
1069
1070   while (downBlk) {
1071     DEBUG(
1072       dbgs() << "check down = BB" << downBlk->getNumber();
1073     );
1074
1075     if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1076       DEBUG(
1077         dbgs() << " working\n";
1078       );
1079
1080       num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1081       num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1082
1083       numClonedBlock += num;
1084       num += serialPatternMatch(*headBlk->succ_begin());
1085       num += serialPatternMatch(*(++headBlk->succ_begin()));
1086       num += ifPatternMatch(headBlk);
1087       assert(num > 0);
1088
1089       break;
1090     }
1091     DEBUG(
1092       dbgs() << " not working\n";
1093     );
1094     downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1095   } // walk down the postDomTree
1096
1097   return num;
1098 } //handleJumpintoIf
1099
1100 template<class PassT>
1101 void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1102                                                          BlockT *trueBlk,
1103                                                          BlockT *falseBlk,
1104                                                          BlockT *landBlk,
1105                                                          bool detail) {
1106   dbgs() << "head = BB" << headBlk->getNumber()
1107          << " size = " << headBlk->size();
1108   if (detail) {
1109     dbgs() << "\n";
1110     headBlk->print(dbgs());
1111     dbgs() << "\n";
1112   }
1113
1114   if (trueBlk) {
1115     dbgs() << ", true = BB" << trueBlk->getNumber() << " size = "
1116            << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1117     if (detail) {
1118       dbgs() << "\n";
1119       trueBlk->print(dbgs());
1120       dbgs() << "\n";
1121     }
1122   }
1123   if (falseBlk) {
1124     dbgs() << ", false = BB" << falseBlk->getNumber() << " size = "
1125            << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1126     if (detail) {
1127       dbgs() << "\n";
1128       falseBlk->print(dbgs());
1129       dbgs() << "\n";
1130     }
1131   }
1132   if (landBlk) {
1133     dbgs() << ", land = BB" << landBlk->getNumber() << " size = "
1134            << landBlk->size() << " numPred = " << landBlk->pred_size();
1135     if (detail) {
1136       dbgs() << "\n";
1137       landBlk->print(dbgs());
1138       dbgs() << "\n";
1139     }
1140   }
1141
1142     dbgs() << "\n";
1143 } //showImproveSimpleJumpintoIf
1144
1145 template<class PassT>
1146 int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1147                                                     BlockT *trueBlk,
1148                                                     BlockT *falseBlk,
1149                                                     BlockT **plandBlk) {
1150   bool migrateTrue = false;
1151   bool migrateFalse = false;
1152
1153   BlockT *landBlk = *plandBlk;
1154
1155   assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1156          && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1157
1158   if (trueBlk == falseBlk) {
1159     return 0;
1160   }
1161
1162   migrateTrue = needMigrateBlock(trueBlk);
1163   migrateFalse = needMigrateBlock(falseBlk);
1164
1165   if (!migrateTrue && !migrateFalse) {
1166     return 0;
1167   }
1168
1169   // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1170   // have more than one predecessors.  without doing this, its predecessor
1171   // rather than headBlk will have undefined value in initReg.
1172   if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1173     migrateTrue = true;
1174   }
1175   if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1176     migrateFalse = true;
1177   }
1178
1179   DEBUG(
1180     dbgs() << "before improveSimpleJumpintoIf: ";
1181     showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1182   );
1183
1184   // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1185   //
1186   // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1187   //      {initReg = 0; org falseBlk branch }
1188   //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1189   //      => org landBlk
1190   //      if landBlk->pred_size() > 2, put the about if-else inside
1191   //      if (initReg !=2) {...}
1192   //
1193   // add initReg = initVal to headBlk
1194
1195   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1196   unsigned initReg =
1197     funcRep->getRegInfo().createVirtualRegister(I32RC);
1198   if (!migrateTrue || !migrateFalse) {
1199     int initVal = migrateTrue ? 0 : 1;
1200     CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1201   }
1202
1203   int numNewBlk = 0;
1204
1205   if (landBlk == NULL) {
1206     landBlk = funcRep->CreateMachineBasicBlock();
1207     funcRep->push_back(landBlk);  //insert to function
1208
1209     if (trueBlk) {
1210       trueBlk->addSuccessor(landBlk);
1211     } else {
1212       headBlk->addSuccessor(landBlk);
1213     }
1214
1215     if (falseBlk) {
1216       falseBlk->addSuccessor(landBlk);
1217     } else {
1218       headBlk->addSuccessor(landBlk);
1219     }
1220
1221     numNewBlk ++;
1222   }
1223
1224   bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1225
1226   //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1227   typename BlockT::iterator insertPos =
1228     CFGTraits::getInstrPos
1229     (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1230
1231   if (landBlkHasOtherPred) {
1232     unsigned immReg =
1233       funcRep->getRegInfo().createVirtualRegister(I32RC);
1234     CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1235     unsigned cmpResReg =
1236       funcRep->getRegInfo().createVirtualRegister(I32RC);
1237
1238     CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1239                                         initReg, immReg);
1240     CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1241                                       AMDGPU::IF_PREDICATE_SET, passRep,
1242                                       cmpResReg, DebugLoc());
1243   }
1244
1245   CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
1246                                     passRep, initReg, DebugLoc());
1247
1248   if (migrateTrue) {
1249     migrateInstruction(trueBlk, landBlk, insertPos);
1250     // need to uncondionally insert the assignment to ensure a path from its
1251     // predecessor rather than headBlk has valid value in initReg if
1252     // (initVal != 1).
1253     CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1254   }
1255   CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1256
1257   if (migrateFalse) {
1258     migrateInstruction(falseBlk, landBlk, insertPos);
1259     // need to uncondionally insert the assignment to ensure a path from its
1260     // predecessor rather than headBlk has valid value in initReg if
1261     // (initVal != 0)
1262     CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1263   }
1264
1265   if (landBlkHasOtherPred) {
1266     // add endif
1267     CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1268
1269     // put initReg = 2 to other predecessors of landBlk
1270     for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1271          predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1272          ++predIter) {
1273       BlockT *curBlk = *predIter;
1274       if (curBlk != trueBlk && curBlk != falseBlk) {
1275         CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1276       }
1277     } //for
1278   }
1279   DEBUG(
1280     dbgs() << "result from improveSimpleJumpintoIf: ";
1281     showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1282   );
1283
1284   // update landBlk
1285   *plandBlk = landBlk;
1286
1287   return numNewBlk;
1288 } //improveSimpleJumpintoIf
1289
1290 template<class PassT>
1291 void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1292                                               LoopT *exitingLoop,
1293                                              BlockT *exitBlk,
1294                                               LoopT *exitLoop,
1295                                              BlockT *landBlk) {
1296   DEBUG(
1297     dbgs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1298            << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1299   );
1300   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1301
1302   RegiT initReg = INVALIDREGNUM;
1303   if (exitingLoop != exitLoop) {
1304     initReg = static_cast<int>
1305       (funcRep->getRegInfo().createVirtualRegister(I32RC));
1306     assert(initReg != INVALIDREGNUM);
1307     addLoopBreakInitReg(exitLoop, initReg);
1308     while (exitingLoop != exitLoop && exitingLoop) {
1309       addLoopBreakOnReg(exitingLoop, initReg);
1310       exitingLoop = exitingLoop->getParentLoop();
1311     }
1312     assert(exitingLoop == exitLoop);
1313   }
1314
1315   mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1316
1317 } //handleLoopbreak
1318
1319 template<class PassT>
1320 void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1321                                                   LoopT *contingLoop,
1322                                                  BlockT *contBlk,
1323                                                   LoopT *contLoop) {
1324   DEBUG(
1325     dbgs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1326            << " header = BB" << contBlk->getNumber() << "\n";
1327
1328     dbgs() << "Trying to continue loop-depth = "
1329            << getLoopDepth(contLoop)
1330            << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1331   );
1332
1333   RegiT initReg = INVALIDREGNUM;
1334   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1335   if (contingLoop != contLoop) {
1336     initReg = static_cast<int>
1337       (funcRep->getRegInfo().createVirtualRegister(I32RC));
1338     assert(initReg != INVALIDREGNUM);
1339     addLoopContInitReg(contLoop, initReg);
1340     while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1341       addLoopBreakOnReg(contingLoop, initReg);  //not addLoopContOnReg
1342       contingLoop = contingLoop->getParentLoop();
1343     }
1344     assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1345     addLoopContOnReg(contingLoop, initReg);
1346   }
1347
1348   settleLoopcontBlock(contingBlk, contBlk, initReg);
1349 } //handleLoopcontBlock
1350
1351 template<class PassT>
1352 void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1353   DEBUG(
1354     dbgs() << "serialPattern BB" << dstBlk->getNumber()
1355            << " <= BB" << srcBlk->getNumber() << "\n";
1356   );
1357   dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
1358
1359   dstBlk->removeSuccessor(srcBlk);
1360   CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1361
1362   removeSuccessor(srcBlk);
1363   retireBlock(dstBlk, srcBlk);
1364 } //mergeSerialBlock
1365
1366 template<class PassT>
1367 void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1368                                                   BlockT *curBlk,
1369                                                   BlockT *trueBlk,
1370                                                   BlockT *falseBlk,
1371                                                   BlockT *landBlk) {
1372   DEBUG(
1373     dbgs() << "ifPattern BB" << curBlk->getNumber();
1374     dbgs() << "{  ";
1375     if (trueBlk) {
1376       dbgs() << "BB" << trueBlk->getNumber();
1377     }
1378     dbgs() << "  } else ";
1379     dbgs() << "{  ";
1380     if (falseBlk) {
1381       dbgs() << "BB" << falseBlk->getNumber();
1382     }
1383     dbgs() << "  }\n ";
1384     dbgs() << "landBlock: ";
1385     if (landBlk == NULL) {
1386       dbgs() << "NULL";
1387     } else {
1388       dbgs() << "BB" << landBlk->getNumber();
1389     }
1390     dbgs() << "\n";
1391   );
1392
1393   int oldOpcode = branchInstr->getOpcode();
1394   DebugLoc branchDL = branchInstr->getDebugLoc();
1395
1396 //    transform to
1397 //    if cond
1398 //       trueBlk
1399 //    else
1400 //       falseBlk
1401 //    endif
1402 //    landBlk
1403
1404   typename BlockT::iterator branchInstrPos =
1405     CFGTraits::getInstrPos(curBlk, branchInstr);
1406   CFGTraits::insertCondBranchBefore(branchInstrPos,
1407                                     CFGTraits::getBranchNzeroOpcode(oldOpcode),
1408                                     passRep,
1409                                     branchDL);
1410
1411   if (trueBlk) {
1412     curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
1413     curBlk->removeSuccessor(trueBlk);
1414     if (landBlk && trueBlk->succ_size()!=0) {
1415       trueBlk->removeSuccessor(landBlk);
1416     }
1417     retireBlock(curBlk, trueBlk);
1418   }
1419   CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1420
1421   if (falseBlk) {
1422     curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
1423                    falseBlk->end());
1424     curBlk->removeSuccessor(falseBlk);
1425     if (landBlk && falseBlk->succ_size() != 0) {
1426       falseBlk->removeSuccessor(landBlk);
1427     }
1428     retireBlock(curBlk, falseBlk);
1429   }
1430   CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1431
1432   branchInstr->eraseFromParent();
1433
1434   if (landBlk && trueBlk && falseBlk) {
1435     curBlk->addSuccessor(landBlk);
1436   }
1437
1438 } //mergeIfthenelseBlock
1439
1440 template<class PassT>
1441 void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1442                                                 LoopLandInfo *loopLand) {
1443   BlockT *landBlk = loopLand->landBlk;
1444
1445   DEBUG(
1446     dbgs() << "loopPattern header = BB" << dstBlk->getNumber()
1447            << " land = BB" << landBlk->getNumber() << "\n";
1448   );
1449
1450   // Loop contInitRegs are init at the beginning of the loop.
1451   for (typename std::set<RegiT>::const_iterator iter =
1452          loopLand->contInitRegs.begin(),
1453        iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1454     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1455   }
1456
1457   /* we last inserterd the DebugLoc in the
1458    * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1459    * search for the DebugLoc in the that statement.
1460    * if not found, we have to insert the empty/default DebugLoc */
1461   InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1462   DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1463
1464   CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1465   // Loop breakInitRegs are init before entering the loop.
1466   for (typename std::set<RegiT>::const_iterator iter =
1467          loopLand->breakInitRegs.begin(),
1468        iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
1469     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1470   }
1471   // Loop endbranchInitRegs are init before entering the loop.
1472   for (typename std::set<RegiT>::const_iterator iter =
1473          loopLand->endbranchInitRegs.begin(),
1474        iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1475     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1476   }
1477
1478   /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1479    * search for the DebugLoc in the continue statement.
1480    * if not found, we have to insert the empty/default DebugLoc */
1481   InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1482   DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1483
1484   CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1485   // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1486   // loop.
1487   for (typename std::set<RegiT>::const_iterator iter =
1488          loopLand->breakOnRegs.begin(),
1489        iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1490     CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
1491                                    *iter);
1492   }
1493
1494   // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1495   // loop.
1496   for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1497        iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1498     CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1499                                    passRep, *iter);
1500   }
1501
1502   dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1503
1504   for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1505        iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1506     dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of.
1507   }
1508
1509   removeSuccessor(landBlk);
1510   retireBlock(dstBlk, landBlk);
1511 } //mergeLooplandBlock
1512
1513 template<class PassT>
1514 void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
1515   while (I--) {
1516     if (I->getOpcode() == AMDGPU::PRED_X) {
1517       switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
1518       case OPCODE_IS_ZERO_INT:
1519         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
1520         return;
1521       case OPCODE_IS_NOT_ZERO_INT:
1522         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
1523         return;
1524       case OPCODE_IS_ZERO:
1525         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
1526         return;
1527       case OPCODE_IS_NOT_ZERO:
1528         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
1529         return;
1530       default:
1531         llvm_unreachable("PRED_X Opcode invalid!");
1532       }
1533     }
1534   }
1535 }
1536
1537 template<class PassT>
1538 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1539                                                  BlockT *exitBlk,
1540                                                  BlockT *exitLandBlk,
1541                                                  RegiT  setReg) {
1542   DEBUG(
1543     dbgs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1544            << " exit = BB" << exitBlk->getNumber()
1545            << " land = BB" << exitLandBlk->getNumber() << "\n";
1546   );
1547
1548   InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1549   assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1550
1551   DebugLoc DL = branchInstr->getDebugLoc();
1552
1553   BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1554
1555   //    transform exitingBlk to
1556   //    if ( ) {
1557   //       exitBlk (if exitBlk != exitLandBlk)
1558   //       setReg = 1
1559   //       break
1560   //    }endif
1561   //    successor = {orgSuccessor(exitingBlk) - exitBlk}
1562
1563   typename BlockT::iterator branchInstrPos =
1564     CFGTraits::getInstrPos(exitingBlk, branchInstr);
1565
1566   if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1567     //break_logical
1568
1569     if (trueBranch != exitBlk) {
1570       reversePredicateSetter(branchInstrPos);
1571     }
1572     CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1573   } else {
1574     if (trueBranch != exitBlk) {
1575       reversePredicateSetter(branchInstr);
1576     }
1577     CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1578     if (exitBlk != exitLandBlk) {
1579       //splice is insert-before ...
1580       exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1581                          exitBlk->end());
1582     }
1583     if (setReg != INVALIDREGNUM) {
1584       CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1585     }
1586     CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1587   } //if_logical
1588
1589   //now branchInst can be erase safely
1590   branchInstr->eraseFromParent();
1591
1592   //now take care of successors, retire blocks
1593   exitingBlk->removeSuccessor(exitBlk);
1594   if (exitBlk != exitLandBlk) {
1595     //splice is insert-before ...
1596     exitBlk->removeSuccessor(exitLandBlk);
1597     retireBlock(exitingBlk, exitBlk);
1598   }
1599
1600 } //mergeLoopbreakBlock
1601
1602 template<class PassT>
1603 void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1604                                                  BlockT *contBlk,
1605                                                  RegiT   setReg) {
1606   DEBUG(
1607     dbgs() << "settleLoopcontBlock conting = BB"
1608            << contingBlk->getNumber()
1609            << ", cont = BB" << contBlk->getNumber() << "\n";
1610   );
1611
1612   InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1613   if (branchInstr) {
1614     assert(CFGTraits::isCondBranch(branchInstr));
1615     typename BlockT::iterator branchInstrPos =
1616       CFGTraits::getInstrPos(contingBlk, branchInstr);
1617     BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1618     int oldOpcode = branchInstr->getOpcode();
1619     DebugLoc DL = branchInstr->getDebugLoc();
1620
1621     //    transform contingBlk to
1622     //     if () {
1623     //          move instr after branchInstr
1624     //          continue
1625     //        or
1626     //          setReg = 1
1627     //          break
1628     //     }endif
1629     //     successor = {orgSuccessor(contingBlk) - loopHeader}
1630
1631     bool useContinueLogical = 
1632       (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1633
1634     if (useContinueLogical == false) {
1635       int branchOpcode =
1636         trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1637                               : CFGTraits::getBranchZeroOpcode(oldOpcode);
1638
1639       CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1640
1641       if (setReg != INVALIDREGNUM) {
1642         CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1643         // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1644         CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1645       } else {
1646         // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1647         CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1648       }
1649
1650       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1651     } else {
1652       int branchOpcode =
1653         trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1654                               : CFGTraits::getContinueZeroOpcode(oldOpcode);
1655
1656       CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1657     }
1658
1659     branchInstr->eraseFromParent();
1660   } else {
1661     // if we've arrived here then we've already erased the branch instruction
1662     // travel back up the basic block to see the last reference of our debug location
1663     // we've just inserted that reference here so it should be representative
1664     if (setReg != INVALIDREGNUM) {
1665       CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1666       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1667       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1668     } else {
1669       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1670       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1671     }
1672   } //else
1673
1674 } //settleLoopcontBlock
1675
1676 // BBs in exitBlkSet are determined as in break-path for loopRep,
1677 // before we can put code for BBs as inside loop-body for loopRep
1678 // check whether those BBs are determined as cont-BB for parentLoopRep
1679 // earlier.
1680 // If so, generate a new BB newBlk
1681 //    (1) set newBlk common successor of BBs in exitBlkSet
1682 //    (2) change the continue-instr in BBs in exitBlkSet to break-instr
1683 //    (3) generate continue-instr in newBlk
1684 //
1685 template<class PassT>
1686 typename CFGStructurizer<PassT>::BlockT *
1687 CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1688                                               LoopT *loopRep,
1689                                               std::set<BlockT *> &exitBlkSet,
1690                                               BlockT *exitLandBlk) {
1691   std::set<BlockT *> endBlkSet;
1692
1693
1694
1695   for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1696        iterEnd = exitBlkSet.end();
1697        iter != iterEnd; ++iter) {
1698     BlockT *exitBlk = *iter;
1699     BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1700
1701     if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1702       return NULL;
1703
1704     endBlkSet.insert(endBlk);
1705   }
1706
1707   BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1708   funcRep->push_back(newBlk);  //insert to function
1709   CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1710   SHOWNEWBLK(newBlk, "New continue block: ");
1711
1712   for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1713        iterEnd = endBlkSet.end();
1714        iter != iterEnd; ++iter) {
1715       BlockT *endBlk = *iter;
1716       InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1717       if (contInstr) {
1718         contInstr->eraseFromParent();
1719       }
1720       endBlk->addSuccessor(newBlk);
1721       DEBUG(
1722         dbgs() << "Add new continue Block to BB"
1723                << endBlk->getNumber() << " successors\n";
1724       );
1725   }
1726
1727   return newBlk;
1728 } //relocateLoopcontBlock
1729
1730
1731 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1732 // LoopLandBlock. This BB branch on the loop endBranchInit register to the
1733 // pathes corresponding to the loop exiting branches.
1734
1735 template<class PassT>
1736 typename CFGStructurizer<PassT>::BlockT *
1737 CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1738                                               BlockTSmallerVector &exitingBlks,
1739                                               BlockTSmallerVector &exitBlks) {
1740   const AMDGPUInstrInfo *tii =
1741              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
1742   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1743
1744   RegiT endBranchReg = static_cast<int>
1745     (funcRep->getRegInfo().createVirtualRegister(I32RC));
1746   assert(endBranchReg >= 0);
1747
1748   // reg = 0 before entering the loop
1749   addLoopEndbranchInitReg(loopRep, endBranchReg);
1750
1751   uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1752   assert(numBlks >=2 && numBlks == exitBlks.size());
1753
1754   BlockT *preExitingBlk = exitingBlks[0];
1755   BlockT *preExitBlk = exitBlks[0];
1756   BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1757   funcRep->push_back(preBranchBlk);  //insert to function
1758   SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1759
1760   BlockT *newLandBlk = preBranchBlk;
1761
1762       CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1763         newLandBlk);
1764   preExitingBlk->removeSuccessor(preExitBlk);
1765   preExitingBlk->addSuccessor(newLandBlk);
1766
1767   //it is redundant to add reg = 0 to exitingBlks[0]
1768
1769   // For 1..n th exiting path (the last iteration handles two pathes) create the
1770   // branch to the previous path and the current path.
1771   for (uint32_t i = 1; i < numBlks; ++i) {
1772     BlockT *curExitingBlk = exitingBlks[i];
1773     BlockT *curExitBlk = exitBlks[i];
1774     BlockT *curBranchBlk;
1775
1776     if (i == numBlks - 1) {
1777       curBranchBlk = curExitBlk;
1778     } else {
1779       curBranchBlk = funcRep->CreateMachineBasicBlock();
1780       funcRep->push_back(curBranchBlk);  //insert to function
1781       SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1782     }
1783
1784     // Add reg = i to exitingBlks[i].
1785     CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1786                                        endBranchReg, i);
1787
1788     // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1789     // (exitingBlks[i], newLandBlk).
1790     CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1791                                           newLandBlk);
1792     curExitingBlk->removeSuccessor(curExitBlk);
1793     curExitingBlk->addSuccessor(newLandBlk);
1794
1795     // add to preBranchBlk the branch instruction:
1796     // if (endBranchReg == preVal)
1797     //    preExitBlk
1798     // else
1799     //    curBranchBlk
1800     //
1801     // preValReg = i - 1
1802
1803   DebugLoc DL;
1804   RegiT preValReg = static_cast<int>
1805     (funcRep->getRegInfo().createVirtualRegister(I32RC));
1806
1807   preBranchBlk->insert(preBranchBlk->begin(),
1808                        tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1809                        i - 1));
1810
1811   // condResReg = (endBranchReg == preValReg)
1812     RegiT condResReg = static_cast<int>
1813       (funcRep->getRegInfo().createVirtualRegister(I32RC));
1814     BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1815       .addReg(endBranchReg).addReg(preValReg);
1816
1817     BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1818       .addMBB(preExitBlk).addReg(condResReg);
1819
1820     preBranchBlk->addSuccessor(preExitBlk);
1821     preBranchBlk->addSuccessor(curBranchBlk);
1822
1823     // Update preExitingBlk, preExitBlk, preBranchBlk.
1824     preExitingBlk = curExitingBlk;
1825     preExitBlk = curExitBlk;
1826     preBranchBlk = curBranchBlk;
1827
1828   }  //end for 1 .. n blocks
1829
1830   return newLandBlk;
1831 } //addLoopEndbranchBlock
1832
1833 template<class PassT>
1834 typename CFGStructurizer<PassT>::PathToKind
1835 CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1836                                      bool allowSideEntry) {
1837   assert(dstBlk);
1838
1839   if (srcBlk == dstBlk) {
1840     return SinglePath_InPath;
1841   }
1842
1843   while (srcBlk && srcBlk->succ_size() == 1) {
1844     srcBlk = *srcBlk->succ_begin();
1845     if (srcBlk == dstBlk) {
1846       return SinglePath_InPath;
1847     }
1848
1849     if (!allowSideEntry && srcBlk->pred_size() > 1) {
1850       return Not_SinglePath;
1851     }
1852   }
1853
1854   if (srcBlk && srcBlk->succ_size()==0) {
1855     return SinglePath_NotInPath;
1856   }
1857
1858   return Not_SinglePath;
1859 } //singlePathTo
1860
1861 // If there is a single path from srcBlk to dstBlk, return the last block before
1862 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the
1863 // last block in the path Otherwise, return NULL
1864 template<class PassT>
1865 typename CFGStructurizer<PassT>::BlockT *
1866 CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
1867                                       bool allowSideEntry) {
1868   assert(dstBlk);
1869
1870   if (srcBlk == dstBlk) {
1871     return srcBlk;
1872   }
1873
1874   if (srcBlk->succ_size() == 0) {
1875     return srcBlk;
1876   }
1877
1878   while (srcBlk && srcBlk->succ_size() == 1) {
1879     BlockT *preBlk = srcBlk;
1880
1881     srcBlk = *srcBlk->succ_begin();
1882     if (srcBlk == NULL) {
1883       return preBlk;
1884     }
1885
1886     if (!allowSideEntry && srcBlk->pred_size() > 1) {
1887       return NULL;
1888     }
1889   }
1890
1891   if (srcBlk && srcBlk->succ_size()==0) {
1892     return srcBlk;
1893   }
1894
1895   return NULL;
1896
1897 } //singlePathEnd
1898
1899 template<class PassT>
1900 int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
1901                                                BlockT *dstBlk) {
1902   int cloned = 0;
1903   assert(preBlk->isSuccessor(srcBlk));
1904   while (srcBlk && srcBlk != dstBlk) {
1905     assert(srcBlk->succ_size() == 1);
1906     if (srcBlk->pred_size() > 1) {
1907       srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
1908       ++cloned;
1909     }
1910
1911     preBlk = srcBlk;
1912     srcBlk = *srcBlk->succ_begin();
1913   }
1914
1915   return cloned;
1916 } //cloneOnSideEntryTo
1917
1918 template<class PassT>
1919 typename CFGStructurizer<PassT>::BlockT *
1920 CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
1921                                                  BlockT *predBlk) {
1922   assert(predBlk->isSuccessor(curBlk) &&
1923          "succBlk is not a prececessor of curBlk");
1924
1925   BlockT *cloneBlk = CFGTraits::clone(curBlk);  //clone instructions
1926   CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
1927   //srcBlk, oldBlk, newBlk
1928
1929   predBlk->removeSuccessor(curBlk);
1930   predBlk->addSuccessor(cloneBlk);
1931
1932   // add all successor to cloneBlk
1933   CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
1934
1935   numClonedInstr += curBlk->size();
1936
1937   DEBUG(
1938     dbgs() << "Cloned block: " << "BB"
1939            << curBlk->getNumber() << "size " << curBlk->size() << "\n";
1940   );
1941
1942   SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
1943
1944   return cloneBlk;
1945 } //cloneBlockForPredecessor
1946
1947 template<class PassT>
1948 typename CFGStructurizer<PassT>::BlockT *
1949 CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
1950                                                BlockT *exitingBlk) {
1951   BlockT *exitBlk = NULL;
1952
1953   for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
1954        iterSuccEnd = exitingBlk->succ_end();
1955        iterSucc != iterSuccEnd; ++iterSucc) {
1956     BlockT *curBlk = *iterSucc;
1957     if (!loopRep->contains(curBlk)) {
1958       assert(exitBlk == NULL);
1959       exitBlk = curBlk;
1960     }
1961   }
1962
1963   assert(exitBlk != NULL);
1964
1965   return exitBlk;
1966 } //exitingBlock2ExitBlock
1967
1968 template<class PassT>
1969 void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
1970                                                 BlockT *dstBlk,
1971                                                 InstrIterator insertPos) {
1972   InstrIterator spliceEnd;
1973   //look for the input branchinstr, not the AMDGPU branchinstr
1974   InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
1975   if (branchInstr == NULL) {
1976     DEBUG(
1977       dbgs() << "migrateInstruction don't see branch instr\n" ;
1978     );
1979     spliceEnd = srcBlk->end();
1980   } else {
1981     DEBUG(
1982       dbgs() << "migrateInstruction see branch instr\n" ;
1983       branchInstr->dump();
1984     );
1985     spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
1986   }
1987   DEBUG(
1988     dbgs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
1989       << "srcSize = " << srcBlk->size() << "\n";
1990   );
1991
1992   //splice insert before insertPos
1993   dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
1994
1995   DEBUG(
1996     dbgs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
1997       << "srcSize = " << srcBlk->size() << "\n";
1998   );
1999 } //migrateInstruction
2000
2001 // normalizeInfiniteLoopExit change
2002 //   B1:
2003 //        uncond_br LoopHeader
2004 //
2005 // to
2006 //   B1:
2007 //        cond_br 1 LoopHeader dummyExit
2008 // and return the newly added dummy exit block
2009 // 
2010 template<class PassT>
2011 typename CFGStructurizer<PassT>::BlockT *
2012 CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2013   BlockT *loopHeader;
2014   BlockT *loopLatch;
2015   loopHeader = LoopRep->getHeader();
2016   loopLatch = LoopRep->getLoopLatch();
2017   BlockT *dummyExitBlk = NULL;
2018   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2019   if (loopHeader!=NULL && loopLatch!=NULL) {
2020     InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2021     if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2022       dummyExitBlk = funcRep->CreateMachineBasicBlock();
2023       funcRep->push_back(dummyExitBlk);  //insert to function
2024       SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2025
2026       DEBUG(dbgs() << "Old branch instr: " << *branchInstr << "\n";);
2027
2028       typename BlockT::iterator insertPos =
2029         CFGTraits::getInstrPos(loopLatch, branchInstr);
2030       unsigned immReg =
2031         funcRep->getRegInfo().createVirtualRegister(I32RC);
2032       CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2033       InstrT *newInstr = 
2034         CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2035       MachineInstrBuilder MIB(*funcRep, newInstr);
2036       MIB.addMBB(loopHeader);
2037       MIB.addReg(immReg, false);
2038
2039       SHOWNEWINSTR(newInstr);
2040
2041       branchInstr->eraseFromParent();
2042       loopLatch->addSuccessor(dummyExitBlk);
2043     }
2044   }
2045
2046   return dummyExitBlk;
2047 } //normalizeInfiniteLoopExit
2048
2049 template<class PassT>
2050 void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2051   InstrT *branchInstr;
2052
2053   // I saw two unconditional branch in one basic block in example
2054   // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2055   while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2056           && CFGTraits::isUncondBranch(branchInstr)) {
2057     DEBUG(
2058           dbgs() << "Removing unconditional branch instruction" ;
2059       branchInstr->dump();
2060     );
2061     branchInstr->eraseFromParent();
2062   }
2063 } //removeUnconditionalBranch
2064
2065 template<class PassT>
2066 void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2067   if (srcBlk->succ_size() == 2) {
2068     BlockT *blk1 = *srcBlk->succ_begin();
2069     BlockT *blk2 = *(++srcBlk->succ_begin());
2070
2071     if (blk1 == blk2) {
2072       InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2073       assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2074       DEBUG(
2075         dbgs() << "Removing unneeded conditional branch instruction" ;
2076         branchInstr->dump();
2077       );
2078       branchInstr->eraseFromParent();
2079       SHOWNEWBLK(blk1, "Removing redundant successor");
2080       srcBlk->removeSuccessor(blk1);
2081     }
2082   }
2083 } //removeRedundantConditionalBranch
2084
2085 template<class PassT>
2086 void CFGStructurizer<PassT>::addDummyExitBlock(SmallVectorImpl<BlockT *>
2087                                                &retBlks) {
2088   BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2089   funcRep->push_back(dummyExitBlk);  //insert to function
2090   CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2091
2092   for (typename SmallVectorImpl<BlockT *>::iterator iter =
2093          retBlks.begin(),
2094        iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2095     BlockT *curBlk = *iter;
2096     InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2097     if (curInstr) {
2098       curInstr->eraseFromParent();
2099     }
2100     curBlk->addSuccessor(dummyExitBlk);
2101     DEBUG(
2102       dbgs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2103              << " successors\n";
2104     );
2105   } //for
2106
2107   SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2108 } //addDummyExitBlock
2109
2110 template<class PassT>
2111 void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2112   while (srcBlk->succ_size()) {
2113     srcBlk->removeSuccessor(*srcBlk->succ_begin());
2114   }
2115 }
2116
2117 template<class PassT>
2118 void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2119   BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2120
2121   if (srcBlkInfo == NULL) {
2122     srcBlkInfo = new BlockInfo();
2123   }
2124
2125   srcBlkInfo->sccNum = sccNum;
2126 }
2127
2128 template<class PassT>
2129 int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2130   BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2131   return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2132 }
2133
2134 template<class PassT>
2135 void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2136   DEBUG(
2137         dbgs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2138   );
2139
2140   BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2141
2142   if (srcBlkInfo == NULL) {
2143     srcBlkInfo = new BlockInfo();
2144   }
2145
2146   srcBlkInfo->isRetired = true;
2147   assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2148          && "can't retire block yet");
2149 }
2150
2151 template<class PassT>
2152 bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2153   BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2154   return (srcBlkInfo && srcBlkInfo->isRetired);
2155 }
2156
2157 template<class PassT>
2158 bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2159   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2160   while (loopRep && loopRep->getHeader() == curBlk) {
2161     LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2162
2163     if(loopLand == NULL)
2164       return true;
2165
2166     BlockT *landBlk = loopLand->landBlk;
2167     assert(landBlk);
2168     if (!isRetiredBlock(landBlk)) {
2169       return true;
2170     }
2171
2172     loopRep = loopRep->getParentLoop();
2173   }
2174
2175   return false;
2176 } //isActiveLoophead
2177
2178 template<class PassT>
2179 bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2180   const unsigned blockSizeThreshold = 30;
2181   const unsigned cloneInstrThreshold = 100;
2182
2183   bool multiplePreds = blk && (blk->pred_size() > 1);
2184
2185   if(!multiplePreds)
2186     return false;
2187
2188   unsigned blkSize = blk->size();
2189   return ((blkSize > blockSizeThreshold)
2190           && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2191 } //needMigrateBlock
2192
2193 template<class PassT>
2194 typename CFGStructurizer<PassT>::BlockT *
2195 CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2196                                             BlockTSmallerVector &exitBlks,
2197                                             std::set<BlockT *> &exitBlkSet) {
2198   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks;  //in exit path blocks
2199
2200   for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2201        predIterEnd = landBlk->pred_end();
2202        predIter != predIterEnd; ++predIter) {
2203     BlockT *curBlk = *predIter;
2204     if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2205       inpathBlks.push_back(curBlk);
2206     }
2207   } //for
2208
2209   //if landBlk has predecessors that are not in the given loop,
2210   //create a new block
2211   BlockT *newLandBlk = landBlk;
2212   if (inpathBlks.size() != landBlk->pred_size()) {
2213     newLandBlk = funcRep->CreateMachineBasicBlock();
2214     funcRep->push_back(newLandBlk);  //insert to function
2215     newLandBlk->addSuccessor(landBlk);
2216     for (typename SmallVectorImpl<BlockT *>::iterator iter =
2217          inpathBlks.begin(),
2218          iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2219       BlockT *curBlk = *iter;
2220       CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2221       //srcBlk, oldBlk, newBlk
2222       curBlk->removeSuccessor(landBlk);
2223       curBlk->addSuccessor(newLandBlk);
2224     }
2225     for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2226       if (exitBlks[i] == landBlk) {
2227         exitBlks[i] = newLandBlk;
2228       }
2229     }
2230     SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2231   }
2232
2233   setLoopLandBlock(loopRep, newLandBlk);
2234
2235   return newLandBlk;
2236 } // recordLoopbreakLand
2237
2238 template<class PassT>
2239 void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2240   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2241
2242   if (theEntry == NULL) {
2243     theEntry = new LoopLandInfo();
2244   }
2245   assert(theEntry->landBlk == NULL);
2246
2247   if (blk == NULL) {
2248     blk = funcRep->CreateMachineBasicBlock();
2249     funcRep->push_back(blk);  //insert to function
2250     SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2251   }
2252
2253   theEntry->landBlk = blk;
2254
2255   DEBUG(
2256     dbgs() << "setLoopLandBlock loop-header = BB"
2257            << loopRep->getHeader()->getNumber()
2258            << "  landing-block = BB" << blk->getNumber() << "\n";
2259   );
2260 } // setLoopLandBlock
2261
2262 template<class PassT>
2263 void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2264   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2265
2266   if (theEntry == NULL) {
2267     theEntry = new LoopLandInfo();
2268   }
2269
2270   theEntry->breakOnRegs.insert(regNum);
2271
2272   DEBUG(
2273     dbgs() << "addLoopBreakOnReg loop-header = BB"
2274            << loopRep->getHeader()->getNumber()
2275            << "  regNum = " << regNum << "\n";
2276   );
2277 } // addLoopBreakOnReg
2278
2279 template<class PassT>
2280 void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2281   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2282
2283   if (theEntry == NULL) {
2284     theEntry = new LoopLandInfo();
2285   }
2286   theEntry->contOnRegs.insert(regNum);
2287
2288   DEBUG(
2289     dbgs() << "addLoopContOnReg loop-header = BB"
2290            << loopRep->getHeader()->getNumber()
2291            << "  regNum = " << regNum << "\n";
2292   );
2293 } // addLoopContOnReg
2294
2295 template<class PassT>
2296 void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2297   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2298
2299   if (theEntry == NULL) {
2300     theEntry = new LoopLandInfo();
2301   }
2302   theEntry->breakInitRegs.insert(regNum);
2303
2304   DEBUG(
2305     dbgs() << "addLoopBreakInitReg loop-header = BB"
2306            << loopRep->getHeader()->getNumber()
2307            << "  regNum = " << regNum << "\n";
2308   );
2309 } // addLoopBreakInitReg
2310
2311 template<class PassT>
2312 void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2313   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2314
2315   if (theEntry == NULL) {
2316     theEntry = new LoopLandInfo();
2317   }
2318   theEntry->contInitRegs.insert(regNum);
2319
2320   DEBUG(
2321     dbgs() << "addLoopContInitReg loop-header = BB"
2322            << loopRep->getHeader()->getNumber()
2323            << "  regNum = " << regNum << "\n";
2324   );
2325 } // addLoopContInitReg
2326
2327 template<class PassT>
2328 void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2329                                                      RegiT regNum) {
2330   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2331
2332   if (theEntry == NULL) {
2333     theEntry = new LoopLandInfo();
2334   }
2335   theEntry->endbranchInitRegs.insert(regNum);
2336
2337   DEBUG(
2338         dbgs() << "addLoopEndbranchInitReg loop-header = BB"
2339       << loopRep->getHeader()->getNumber()
2340       << "  regNum = " << regNum << "\n";
2341   );
2342 } // addLoopEndbranchInitReg
2343
2344 template<class PassT>
2345 typename CFGStructurizer<PassT>::LoopLandInfo *
2346 CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2347   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2348
2349   return theEntry;
2350 } // getLoopLandInfo
2351
2352 template<class PassT>
2353 typename CFGStructurizer<PassT>::BlockT *
2354 CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2355   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2356
2357   return theEntry ? theEntry->landBlk : NULL;
2358 } // getLoopLandBlock
2359
2360
2361 template<class PassT>
2362 bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2363   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2364   if (loopRep == NULL)
2365     return false;
2366
2367   BlockT *loopHeader = loopRep->getHeader();
2368
2369   return curBlk->isSuccessor(loopHeader);
2370
2371 } //hasBackEdge
2372
2373 template<class PassT>
2374 unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2375   return loopRep ? loopRep->getLoopDepth() : 0;
2376 } //getLoopDepth
2377
2378 template<class PassT>
2379 int CFGStructurizer<PassT>::countActiveBlock
2380 (typename SmallVectorImpl<BlockT *>::const_iterator iterStart,
2381  typename SmallVectorImpl<BlockT *>::const_iterator iterEnd) {
2382   int count = 0;
2383   while (iterStart != iterEnd) {
2384     if (!isRetiredBlock(*iterStart)) {
2385       ++count;
2386     }
2387     ++iterStart;
2388   }
2389
2390   return count;
2391 } //countActiveBlock
2392
2393 // This is work around solution for findNearestCommonDominator not avaiable to
2394 // post dom a proper fix should go to Dominators.h.
2395
2396 template<class PassT>
2397 typename CFGStructurizer<PassT>::BlockT*
2398 CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2399
2400   if (postDomTree->dominates(blk1, blk2)) {
2401     return blk1;
2402   }
2403   if (postDomTree->dominates(blk2, blk1)) {
2404     return blk2;
2405   }
2406
2407   DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2408   DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2409
2410   // Handle newly cloned node.
2411   if (node1 == NULL && blk1->succ_size() == 1) {
2412     return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2413   }
2414   if (node2 == NULL && blk2->succ_size() == 1) {
2415     return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2416   }
2417
2418   if (node1 == NULL || node2 == NULL) {
2419     return NULL;
2420   }
2421
2422   node1 = node1->getIDom();
2423   while (node1) {
2424     if (postDomTree->dominates(node1, node2)) {
2425       return node1->getBlock();
2426     }
2427     node1 = node1->getIDom();
2428   }
2429
2430   return NULL;
2431 }
2432
2433 template<class PassT>
2434 typename CFGStructurizer<PassT>::BlockT *
2435 CFGStructurizer<PassT>::findNearestCommonPostDom
2436 (typename std::set<BlockT *> &blks) {
2437   BlockT *commonDom;
2438   typename std::set<BlockT *>::const_iterator iter = blks.begin();
2439   typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2440   for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2441     BlockT *curBlk = *iter;
2442     if (curBlk != commonDom) {
2443       commonDom = findNearestCommonPostDom(curBlk, commonDom);
2444     }
2445   }
2446
2447   DEBUG(
2448     dbgs() << "Common post dominator for exit blocks is ";
2449     if (commonDom) {
2450           dbgs() << "BB" << commonDom->getNumber() << "\n";
2451     } else {
2452       dbgs() << "NULL\n";
2453     }
2454   );
2455
2456   return commonDom;
2457 } //findNearestCommonPostDom
2458
2459 } // end anonymous namespace
2460
2461 //todo: move-end
2462
2463
2464 //===----------------------------------------------------------------------===//
2465 //
2466 // CFGStructurizer for AMDGPU
2467 //
2468 //===----------------------------------------------------------------------===//
2469
2470
2471 namespace {
2472 class AMDGPUCFGStructurizer : public MachineFunctionPass {
2473 public:
2474   typedef MachineInstr              InstructionType;
2475   typedef MachineFunction           FunctionType;
2476   typedef MachineBasicBlock         BlockType;
2477   typedef MachineLoopInfo           LoopinfoType;
2478   typedef MachineDominatorTree      DominatortreeType;
2479   typedef MachinePostDominatorTree  PostDominatortreeType;
2480   typedef MachineDomTreeNode        DomTreeNodeType;
2481   typedef MachineLoop               LoopType;
2482
2483 protected:
2484   TargetMachine &TM;
2485
2486 public:
2487   AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
2488   const TargetInstrInfo *getTargetInstrInfo() const;
2489   const AMDGPURegisterInfo *getTargetRegisterInfo() const;
2490 };
2491
2492 } // end anonymous namespace
2493 AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
2494   : MachineFunctionPass(pid), TM(tm) {
2495 }
2496
2497 const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
2498   return TM.getInstrInfo();
2499 }
2500
2501 const AMDGPURegisterInfo *AMDGPUCFGStructurizer::getTargetRegisterInfo() const {
2502   return static_cast<const AMDGPURegisterInfo *>(TM.getRegisterInfo());
2503 }
2504
2505 //===----------------------------------------------------------------------===//
2506 //
2507 // CFGPrepare
2508 //
2509 //===----------------------------------------------------------------------===//
2510
2511
2512 namespace {
2513 class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
2514 public:
2515   static char ID;
2516
2517 public:
2518   AMDGPUCFGPrepare(TargetMachine &tm);
2519
2520   virtual const char *getPassName() const;
2521   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2522
2523   bool runOnMachineFunction(MachineFunction &F);
2524 };
2525
2526 char AMDGPUCFGPrepare::ID = 0;
2527 } // end anonymous namespace
2528
2529 AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
2530   : AMDGPUCFGStructurizer(ID, tm )  {
2531 }
2532 const char *AMDGPUCFGPrepare::getPassName() const {
2533   return "AMD IL Control Flow Graph Preparation Pass";
2534 }
2535
2536 void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2537   AU.addPreserved<MachineFunctionAnalysis>();
2538   AU.addRequired<MachineFunctionAnalysis>();
2539   AU.addRequired<MachineDominatorTree>();
2540   AU.addRequired<MachinePostDominatorTree>();
2541   AU.addRequired<MachineLoopInfo>();
2542 }
2543
2544 //===----------------------------------------------------------------------===//
2545 //
2546 // CFGPerform
2547 //
2548 //===----------------------------------------------------------------------===//
2549
2550
2551 namespace {
2552 class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
2553 public:
2554   static char ID;
2555
2556 public:
2557   AMDGPUCFGPerform(TargetMachine &tm);
2558   virtual const char *getPassName() const;
2559   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2560   bool runOnMachineFunction(MachineFunction &F);
2561 };
2562
2563 char AMDGPUCFGPerform::ID = 0;
2564 } // end anonymous namespace
2565
2566   AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
2567 : AMDGPUCFGStructurizer(ID, tm) {
2568 }
2569
2570 const char *AMDGPUCFGPerform::getPassName() const {
2571   return "AMD IL Control Flow Graph structurizer Pass";
2572 }
2573
2574 void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2575   AU.addPreserved<MachineFunctionAnalysis>();
2576   AU.addRequired<MachineFunctionAnalysis>();
2577   AU.addRequired<MachineDominatorTree>();
2578   AU.addRequired<MachinePostDominatorTree>();
2579   AU.addRequired<MachineLoopInfo>();
2580 }
2581
2582 //===----------------------------------------------------------------------===//
2583 //
2584 // CFGStructTraits<AMDGPUCFGStructurizer>
2585 //
2586 //===----------------------------------------------------------------------===//
2587
2588 namespace {
2589 // this class is tailor to the AMDGPU backend
2590 template<>
2591 struct CFGStructTraits<AMDGPUCFGStructurizer> {
2592   typedef int RegiT;
2593
2594   static int getBranchNzeroOpcode(int oldOpcode) {
2595     switch(oldOpcode) {
2596     case AMDGPU::JUMP_COND:
2597     case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2598     case AMDGPU::BRANCH_COND_i32:
2599     case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
2600     default:
2601       llvm_unreachable("internal error");
2602     }
2603     return -1;
2604   }
2605
2606   static int getBranchZeroOpcode(int oldOpcode) {
2607     switch(oldOpcode) {
2608     case AMDGPU::JUMP_COND:
2609     case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2610     case AMDGPU::BRANCH_COND_i32:
2611     case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
2612     default:
2613       llvm_unreachable("internal error");
2614     }
2615     return -1;
2616   }
2617
2618   static int getContinueNzeroOpcode(int oldOpcode) {
2619     switch(oldOpcode) {
2620     case AMDGPU::JUMP_COND:
2621     case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
2622     default:
2623       llvm_unreachable("internal error");
2624     };
2625     return -1;
2626   }
2627
2628   static int getContinueZeroOpcode(int oldOpcode) {
2629     switch(oldOpcode) {
2630     case AMDGPU::JUMP_COND:
2631     case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
2632     default:
2633       llvm_unreachable("internal error");
2634     }
2635     return -1;
2636   }
2637
2638   static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2639     return instr->getOperand(0).getMBB();
2640   }
2641
2642   static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2643     instr->getOperand(0).setMBB(blk);
2644   }
2645
2646   static MachineBasicBlock *
2647   getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2648     assert(blk->succ_size() == 2);
2649     MachineBasicBlock *trueBranch = getTrueBranch(instr);
2650     MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2651     MachineBasicBlock::succ_iterator iterNext = iter;
2652     ++iterNext;
2653
2654     return (*iter == trueBranch) ? *iterNext : *iter;
2655   }
2656
2657   static bool isCondBranch(MachineInstr *instr) {
2658     switch (instr->getOpcode()) {
2659       case AMDGPU::JUMP_COND:
2660       case AMDGPU::BRANCH_COND_i32:
2661       case AMDGPU::BRANCH_COND_f32:
2662       break;
2663     default:
2664       return false;
2665     }
2666     return true;
2667   }
2668
2669   static bool isUncondBranch(MachineInstr *instr) {
2670     switch (instr->getOpcode()) {
2671     case AMDGPU::JUMP:
2672     case AMDGPU::BRANCH:
2673       return true;
2674     default:
2675       return false;
2676     }
2677     return true;
2678   }
2679
2680   static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2681     //get DebugLoc from the first MachineBasicBlock instruction with debug info
2682     DebugLoc DL;
2683     for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2684       MachineInstr *instr = &(*iter);
2685       if (instr->getDebugLoc().isUnknown() == false) {
2686         DL = instr->getDebugLoc();
2687       }
2688     }
2689     return DL;
2690   }
2691
2692   static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2693     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2694     MachineInstr *instr = &*iter;
2695     if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2696       return instr;
2697     }
2698     return NULL;
2699   }
2700
2701   // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2702   //
2703   // BB with backward-edge could have move instructions after the branch
2704   // instruction.  Such move instruction "belong to" the loop backward-edge.
2705   //
2706   static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2707     const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
2708                                   blk->getParent()->getTarget().getInstrInfo());
2709
2710     for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2711          iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2712       // FIXME: Simplify
2713       MachineInstr *instr = &*iter;
2714       if (instr) {
2715         if (isCondBranch(instr) || isUncondBranch(instr)) {
2716           return instr;
2717         } else if (!TII->isMov(instr->getOpcode())) {
2718           break;
2719         }
2720       }
2721     }
2722     return NULL;
2723   }
2724
2725   static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2726     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2727     if (iter != blk->rend()) {
2728       MachineInstr *instr = &(*iter);
2729       if (instr->getOpcode() == AMDGPU::RETURN) {
2730         return instr;
2731       }
2732     }
2733     return NULL;
2734   }
2735
2736   static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2737     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2738     if (iter != blk->rend()) {
2739       MachineInstr *instr = &(*iter);
2740       if (instr->getOpcode() == AMDGPU::CONTINUE) {
2741         return instr;
2742       }
2743     }
2744     return NULL;
2745   }
2746
2747   static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2748     for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2749       MachineInstr *instr = &(*iter);
2750       if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
2751         return instr;
2752       }
2753     }
2754     return NULL;
2755   }
2756
2757   static bool isReturnBlock(MachineBasicBlock *blk) {
2758     MachineInstr *instr = getReturnInstr(blk);
2759     bool isReturn = (blk->succ_size() == 0);
2760     if (instr) {
2761       assert(isReturn);
2762     } else if (isReturn) {
2763       DEBUG(
2764         dbgs() << "BB" << blk->getNumber()
2765                <<" is return block without RETURN instr\n";
2766       );
2767     }
2768
2769     return  isReturn;
2770   }
2771
2772   static MachineBasicBlock::iterator
2773   getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2774     assert(instr->getParent() == blk && "instruction doesn't belong to block");
2775     MachineBasicBlock::iterator iter = blk->begin();
2776     MachineBasicBlock::iterator iterEnd = blk->end();
2777     while (&(*iter) != instr && iter != iterEnd) {
2778       ++iter;
2779     }
2780
2781     assert(iter != iterEnd);
2782     return iter;
2783   }//getInstrPos
2784
2785   static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2786                                          AMDGPUCFGStructurizer *passRep) {
2787     return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
2788   } //insertInstrBefore
2789
2790   static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2791                                          AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2792     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2793     MachineInstr *newInstr =
2794       blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
2795
2796     MachineBasicBlock::iterator res;
2797     if (blk->begin() != blk->end()) {
2798       blk->insert(blk->begin(), newInstr);
2799     } else {
2800       blk->push_back(newInstr);
2801     }
2802
2803     SHOWNEWINSTR(newInstr);
2804
2805     return newInstr;
2806   } //insertInstrBefore
2807
2808   static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2809                              AMDGPUCFGStructurizer *passRep) {
2810     insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
2811   } //insertInstrEnd
2812
2813   static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2814                              AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2815     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2816    MachineInstr *newInstr = blk->getParent()
2817       ->CreateMachineInstr(tii->get(newOpcode), DL);
2818
2819     blk->push_back(newInstr);
2820     //assume the instruction doesn't take any reg operand ...
2821
2822     SHOWNEWINSTR(newInstr);
2823   } //insertInstrEnd
2824
2825   static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
2826                                          int newOpcode, 
2827                                          AMDGPUCFGStructurizer *passRep) {
2828     MachineInstr *oldInstr = &(*instrPos);
2829     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2830     MachineBasicBlock *blk = oldInstr->getParent();
2831     MachineInstr *newInstr =
2832       blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
2833                                            DebugLoc());
2834
2835     blk->insert(instrPos, newInstr);
2836     //assume the instruction doesn't take any reg operand ...
2837
2838     SHOWNEWINSTR(newInstr);
2839     return newInstr;
2840   } //insertInstrBefore
2841
2842   static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
2843                                      int newOpcode,
2844                                      AMDGPUCFGStructurizer *passRep,
2845                                      DebugLoc DL) {
2846     MachineInstr *oldInstr = &(*instrPos);
2847     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2848     MachineBasicBlock *blk = oldInstr->getParent();
2849     MachineFunction *MF = blk->getParent();
2850     MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
2851
2852     blk->insert(instrPos, newInstr);
2853     MachineInstrBuilder MIB(*MF, newInstr);
2854     MIB.addReg(oldInstr->getOperand(1).getReg(), false);
2855
2856     SHOWNEWINSTR(newInstr);
2857     //erase later oldInstr->eraseFromParent();
2858   } //insertCondBranchBefore
2859
2860   static void insertCondBranchBefore(MachineBasicBlock *blk,
2861                                      MachineBasicBlock::iterator insertPos,
2862                                      int newOpcode,
2863                                      AMDGPUCFGStructurizer *passRep,
2864                                      RegiT regNum,
2865                                      DebugLoc DL) {
2866     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2867     MachineFunction *MF = blk->getParent();
2868
2869     MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
2870
2871     //insert before
2872     blk->insert(insertPos, newInstr);
2873     MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
2874
2875     SHOWNEWINSTR(newInstr);
2876   } //insertCondBranchBefore
2877
2878   static void insertCondBranchEnd(MachineBasicBlock *blk,
2879                                   int newOpcode,
2880                                   AMDGPUCFGStructurizer *passRep,
2881                                   RegiT regNum) {
2882     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2883     MachineFunction *MF = blk->getParent();
2884     MachineInstr *newInstr =
2885       MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
2886
2887     blk->push_back(newInstr);
2888     MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
2889
2890     SHOWNEWINSTR(newInstr);
2891   } //insertCondBranchEnd
2892
2893
2894   static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
2895                                       AMDGPUCFGStructurizer *passRep,
2896                                       RegiT regNum, int regVal) {
2897     MachineInstr *oldInstr = &(*instrPos);
2898     const AMDGPUInstrInfo *tii =
2899              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2900     MachineBasicBlock *blk = oldInstr->getParent();
2901     MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2902                                                  regVal);
2903     blk->insert(instrPos, newInstr);
2904
2905     SHOWNEWINSTR(newInstr);
2906   } //insertAssignInstrBefore
2907
2908   static void insertAssignInstrBefore(MachineBasicBlock *blk,
2909                                       AMDGPUCFGStructurizer *passRep,
2910                                       RegiT regNum, int regVal) {
2911     const AMDGPUInstrInfo *tii =
2912              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2913
2914     MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2915                                                  regVal);
2916     if (blk->begin() != blk->end()) {
2917       blk->insert(blk->begin(), newInstr);
2918     } else {
2919       blk->push_back(newInstr);
2920     }
2921
2922     SHOWNEWINSTR(newInstr);
2923
2924   } //insertInstrBefore
2925
2926   static void insertCompareInstrBefore(MachineBasicBlock *blk,
2927                                        MachineBasicBlock::iterator instrPos,
2928                                        AMDGPUCFGStructurizer *passRep,
2929                                        RegiT dstReg, RegiT src1Reg,
2930                                        RegiT src2Reg) {
2931     const AMDGPUInstrInfo *tii =
2932              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2933     MachineFunction *MF = blk->getParent();
2934     MachineInstr *newInstr =
2935       MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
2936
2937     MachineInstrBuilder MIB(*MF, newInstr);
2938     MIB.addReg(dstReg, RegState::Define); //set target
2939     MIB.addReg(src1Reg); //set src value
2940     MIB.addReg(src2Reg); //set src value
2941
2942     blk->insert(instrPos, newInstr);
2943     SHOWNEWINSTR(newInstr);
2944
2945   } //insertCompareInstrBefore
2946
2947   static void cloneSuccessorList(MachineBasicBlock *dstBlk,
2948                                  MachineBasicBlock *srcBlk) {
2949     for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
2950          iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
2951       dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of
2952     }
2953   } //cloneSuccessorList
2954
2955   static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
2956     MachineFunction *func = srcBlk->getParent();
2957     MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
2958     func->push_back(newBlk);  //insert to function
2959     for (MachineBasicBlock::iterator iter = srcBlk->begin(),
2960          iterEnd = srcBlk->end();
2961          iter != iterEnd; ++iter) {
2962       MachineInstr *instr = func->CloneMachineInstr(iter);
2963       newBlk->push_back(instr);
2964     }
2965     return newBlk;
2966   }
2967
2968   //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
2969   //the AMDGPU instruction is not recognized as terminator fix this and retire
2970   //this routine
2971   static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
2972                                          MachineBasicBlock *oldBlk,
2973                                          MachineBasicBlock *newBlk) {
2974     MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
2975     if (branchInstr && isCondBranch(branchInstr) &&
2976         getTrueBranch(branchInstr) == oldBlk) {
2977       setTrueBranch(branchInstr, newBlk);
2978     }
2979   }
2980
2981   static void wrapup(MachineBasicBlock *entryBlk) {
2982     assert((!entryBlk->getParent()->getJumpTableInfo()
2983             || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
2984            && "found a jump table");
2985
2986      //collect continue right before endloop
2987      SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
2988      MachineBasicBlock::iterator pre = entryBlk->begin();
2989      MachineBasicBlock::iterator iterEnd = entryBlk->end();
2990      MachineBasicBlock::iterator iter = pre;
2991      while (iter != iterEnd) {
2992        if (pre->getOpcode() == AMDGPU::CONTINUE
2993            && iter->getOpcode() == AMDGPU::ENDLOOP) {
2994          contInstr.push_back(pre);
2995        }
2996        pre = iter;
2997        ++iter;
2998      } //end while
2999
3000      //delete continue right before endloop
3001      for (unsigned i = 0; i < contInstr.size(); ++i) {
3002         contInstr[i]->eraseFromParent();
3003      }
3004
3005      // TODO to fix up jump table so later phase won't be confused.  if
3006      // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3007      // there isn't such an interface yet.  alternatively, replace all the other
3008      // blocks in the jump table with the entryBlk //}
3009
3010   } //wrapup
3011
3012   static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
3013     return &pass.getAnalysis<MachineDominatorTree>();
3014   }
3015
3016   static MachinePostDominatorTree*
3017   getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
3018     return &pass.getAnalysis<MachinePostDominatorTree>();
3019   }
3020
3021   static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
3022     return &pass.getAnalysis<MachineLoopInfo>();
3023   }
3024 }; // template class CFGStructTraits
3025 } // end anonymous namespace
3026
3027 // createAMDGPUCFGPreparationPass- Returns a pass
3028 FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm) {
3029   return new AMDGPUCFGPrepare(tm);
3030 }
3031
3032 bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3033   return CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func, *this,
3034                                                        getTargetRegisterInfo());
3035 }
3036
3037 // createAMDGPUCFGStructurizerPass- Returns a pass
3038 FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) {
3039   return new AMDGPUCFGPerform(tm);
3040 }
3041
3042 bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
3043   return CFGStructurizer<AMDGPUCFGStructurizer>().run(func, *this,
3044                                                       getTargetRegisterInfo());
3045 }