Relax an invariant that block placement was trying to assert a bit
[oota-llvm.git] / lib / CodeGen / MachineBlockPlacement.cpp
index ca17ad07e447c6377030ab4a7234ded87b1efa1a..45d5af293039c941d408dbd7793902003213d8f6 100644 (file)
@@ -214,11 +214,12 @@ class MachineBlockPlacement : public MachineFunctionPass {
   MachineBasicBlock *selectBestCandidateBlock(
       BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
       const BlockFilterSet *BlockFilter);
-  MachineBasicBlock *getFirstUnplacedBlock(const BlockChain &PlacedChain,
-                                           ArrayRef<MachineBasicBlock *> Blocks,
-                                           unsigned &PrevUnplacedBlockIdx);
+  MachineBasicBlock *getFirstUnplacedBlock(
+      MachineFunction &F,
+      const BlockChain &PlacedChain,
+      MachineFunction::iterator &PrevUnplacedBlockIt,
+      const BlockFilterSet *BlockFilter);
   void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
-                  ArrayRef<MachineBasicBlock *> Blocks,
                   SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
                   const BlockFilterSet *BlockFilter = 0);
   void buildLoopChains(MachineFunction &F, MachineLoop &L);
@@ -334,7 +335,15 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
   const BranchProbability HotProb(4, 5); // 80%
 
   MachineBasicBlock *BestSucc = 0;
-  BranchProbability BestProb = BranchProbability::getZero();
+  // FIXME: Due to the performance of the probability and weight routines in
+  // the MBPI analysis, we manually compute probabilities using the edge
+  // weights. This is suboptimal as it means that the somewhat subtle
+  // definition of edge weight semantics is encoded here as well. We should
+  // improve the MBPI interface to effeciently support query patterns such as
+  // this.
+  uint32_t BestWeight = 0;
+  uint32_t WeightScale = 0;
+  uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
   DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
   for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
                                         SE = BB->succ_end();
@@ -346,28 +355,76 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
       DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Already merged!\n");
       continue;
     }
+    if (*SI != *SuccChain.begin()) {
+      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Mid chain!\n");
+      continue;
+    }
 
-    BranchProbability SuccProb = MBPI->getEdgeProbability(BB, *SI);
+    uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+    BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
 
     // Only consider successors which are either "hot", or wouldn't violate
     // any CFG constraints.
-    if (SuccChain.LoopPredecessors != 0 && SuccProb < HotProb) {
-      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> CFG conflict\n");
-      continue;
+    if (SuccChain.LoopPredecessors != 0) {
+      if (SuccProb < HotProb) {
+        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> CFG conflict\n");
+        continue;
+      }
+
+      // Make sure that a hot successor doesn't have a globally more important
+      // predecessor.
+      BlockFrequency CandidateEdgeFreq
+        = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+      bool BadCFGConflict = false;
+      for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
+                                            PE = (*SI)->pred_end();
+           PI != PE; ++PI) {
+        if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
+            BlockToChain[*PI] == &Chain)
+          continue;
+        BlockFrequency PredEdgeFreq
+          = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+        if (PredEdgeFreq >= CandidateEdgeFreq) {
+          BadCFGConflict = true;
+          break;
+        }
+      }
+      if (BadCFGConflict) {
+        DEBUG(dbgs() << "    " << getBlockName(*SI)
+                               << " -> non-cold CFG conflict\n");
+        continue;
+      }
     }
 
     DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
                  << " (prob)"
                  << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
                  << "\n");
-    if (BestSucc && BestProb >= SuccProb)
+    if (BestSucc && BestWeight >= SuccWeight)
       continue;
     BestSucc = *SI;
-    BestProb = SuccProb;
+    BestWeight = SuccWeight;
   }
   return BestSucc;
 }
 
+namespace {
+/// \brief Predicate struct to detect blocks already placed.
+class IsBlockPlaced {
+  const BlockChain &PlacedChain;
+  const BlockToChainMapType &BlockToChain;
+
+public:
+  IsBlockPlaced(const BlockChain &PlacedChain,
+                const BlockToChainMapType &BlockToChain)
+      : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
+
+  bool operator()(MachineBasicBlock *BB) const {
+    return BlockToChain.lookup(BB) == &PlacedChain;
+  }
+};
+}
+
 /// \brief Select the best block from a worklist.
 ///
 /// This looks through the provided worklist as a list of candidate basic
@@ -381,13 +438,20 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
 MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
     BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
     const BlockFilterSet *BlockFilter) {
+  // Once we need to walk the worklist looking for a candidate, cleanup the
+  // worklist of already placed entries.
+  // FIXME: If this shows up on profiles, it could be folded (at the cost of
+  // some code complexity) into the loop below.
+  WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
+                                IsBlockPlaced(Chain, BlockToChain)),
+                 WorkList.end());
+
   MachineBasicBlock *BestBlock = 0;
   BlockFrequency BestFreq;
   for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
                                                       WBE = WorkList.end();
        WBI != WBE; ++WBI) {
-    if (BlockFilter && !BlockFilter->count(*WBI))
-      continue;
+    assert(!BlockFilter || BlockFilter->count(*WBI));
     BlockChain &SuccChain = *BlockToChain[*WBI];
     if (&SuccChain == &Chain) {
       DEBUG(dbgs() << "    " << getBlockName(*WBI)
@@ -411,18 +475,23 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
 ///
 /// This routine is called when we are unable to use the CFG to walk through
 /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
-/// We walk through the sequence of blocks, starting from the
-/// LastUnplacedBlockIdx. We update this index to avoid re-scanning the entire
-/// sequence on repeated calls to this routine.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
 MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
-    const BlockChain &PlacedChain,
-    ArrayRef<MachineBasicBlock *> Blocks,
-    unsigned &PrevUnplacedBlockIdx) {
-  for (unsigned i = PrevUnplacedBlockIdx, e = Blocks.size(); i != e; ++i) {
-    MachineBasicBlock *BB = Blocks[i];
-    if (BlockToChain[BB] != &PlacedChain) {
-      PrevUnplacedBlockIdx = i;
-      return BB;
+    MachineFunction &F, const BlockChain &PlacedChain,
+    MachineFunction::iterator &PrevUnplacedBlockIt,
+    const BlockFilterSet *BlockFilter) {
+  for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+       ++I) {
+    if (BlockFilter && !BlockFilter->count(I))
+      continue;
+    if (BlockToChain[I] != &PlacedChain) {
+      PrevUnplacedBlockIt = I;
+      // Now select the head of the chain to which the unplaced block belongs
+      // as the block to place. This will force the entire chain to be placed,
+      // and satisfies the requirements of merging chains.
+      return *BlockToChain[I]->begin();
     }
   }
   return 0;
@@ -431,14 +500,12 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
 void MachineBlockPlacement::buildChain(
     MachineBasicBlock *BB,
     BlockChain &Chain,
-    ArrayRef<MachineBasicBlock *> Blocks,
     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
     const BlockFilterSet *BlockFilter) {
   assert(BB);
   assert(BlockToChain[BB] == &Chain);
-  assert(*Chain.begin() == BB);
-  SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
-  unsigned PrevUnplacedBlockIdx = 0;
+  MachineFunction &F = *BB->getParent();
+  MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
 
   MachineBasicBlock *LoopHeaderBB = BB;
   markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
@@ -449,26 +516,9 @@ void MachineBlockPlacement::buildChain(
     assert(*llvm::prior(Chain.end()) == BB);
     MachineBasicBlock *BestSucc = 0;
 
-    // Check for unreasonable branches, and forcibly merge the existing layout
-    // successor for them. We can handle cases that AnalyzeBranch can't: jump
-    // tables etc are fine. The case we want to handle specially is when there
-    // is potential fallthrough, but the branch cannot be analyzed. This
-    // includes blocks without terminators as well as other cases.
-    Cond.clear();
-    MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
-    if (TII->AnalyzeBranch(*BB, TBB, FBB, Cond) && BB->canFallThrough()) {
-      MachineFunction::iterator I(BB), NextI(llvm::next(I));
-      // Ensure that the layout successor is a viable block, as we know that
-      // fallthrough is a possibility.
-      assert(NextI != BB->getParent()->end());
-      assert(!BlockFilter || BlockFilter->count(NextI));
-      BestSucc = NextI;
-    }
-
-    // Otherwise, look for the best viable successor if there is one to place
-    // immediately after this block.
-    if (!BestSucc)
-      BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+    // Look for the best viable successor if there is one to place immediately
+    // after this block.
+    BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
 
     // If an immediate successor isn't available, look for the best viable
     // block among those we've identified as not violating the loop's CFG at
@@ -477,7 +527,8 @@ void MachineBlockPlacement::buildChain(
       BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
 
     if (!BestSucc) {
-      BestSucc = getFirstUnplacedBlock(Chain, Blocks, PrevUnplacedBlockIdx);
+      BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
+                                       BlockFilter);
       if (!BestSucc)
         break;
 
@@ -546,8 +597,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
       BlockWorkList.push_back(*BI);
   }
 
-  buildChain(*L.block_begin(), LoopChain, L.getBlocks(), BlockWorkList,
-             &LoopBlockSet);
+  buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet);
 
   DEBUG({
     // Crash at the end so we get all of the debugging output first.
@@ -561,7 +611,9 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
     for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
          BCI != BCE; ++BCI)
       if (!LoopBlockSet.erase(*BCI)) {
-        BadLoop = true;
+        // We don't mark the loop as bad here because there are real situations
+        // where this can occur. For example, with an unanalyzable fallthrough
+        // from a loop block to a non-loop block or vice versa.
         dbgs() << "Loop chain contains a block not contained by the loop!\n"
                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
@@ -585,26 +637,43 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
 void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   // Ensure that every BB in the function has an associated chain to simplify
   // the assumptions of the remaining algorithm.
-  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
-    BlockToChain[&*FI] =
-      new (ChainAllocator.Allocate()) BlockChain(BlockToChain, &*FI);
+  SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    MachineBasicBlock *BB = FI;
+    BlockChain *&Chain = BlockToChain[BB];
+    Chain = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+    // Also, merge any blocks which we cannot reason about and must preserve
+    // the exact fallthrough behavior for.
+    for (;;) {
+      Cond.clear();
+      MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+      if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+        break;
+
+      MachineFunction::iterator NextFI(llvm::next(FI));
+      MachineBasicBlock *NextBB = NextFI;
+      // Ensure that the layout successor is a viable block, as we know that
+      // fallthrough is a possibility.
+      assert(NextFI != FE && "Can't fallthrough past the last block.");
+      DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+                   << getBlockName(BB) << " -> " << getBlockName(NextBB)
+                   << "\n");
+      Chain->merge(NextBB, 0);
+      FI = NextFI;
+      BB = NextBB;
+    }
+  }
 
   // Build any loop-based chains.
   for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
        ++LI)
     buildLoopChains(F, **LI);
 
-  // We need a vector of blocks so that buildChain can handle unnatural CFG
-  // constructs by searching for unplaced blocks and just concatenating them.
-  SmallVector<MachineBasicBlock *, 16> Blocks;
-  Blocks.reserve(F.size());
-
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
 
   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
   for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
     MachineBasicBlock *BB = &*FI;
-    Blocks.push_back(BB);
     BlockChain &Chain = *BlockToChain[BB];
     if (!UpdatedPreds.insert(&Chain))
       continue;
@@ -627,7 +696,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   }
 
   BlockChain &FunctionChain = *BlockToChain[&F.front()];
-  buildChain(&F.front(), FunctionChain, Blocks, BlockWorkList);
+  buildChain(&F.front(), FunctionChain, BlockWorkList);
 
   typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
   DEBUG({
@@ -659,7 +728,6 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
 
   // Splice the blocks into place.
   MachineFunction::iterator InsertPos = F.begin();
-  SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
   for (BlockChain::iterator BI = FunctionChain.begin(),
                             BE = FunctionChain.end();
        BI != BE; ++BI) {
@@ -730,6 +798,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
   AlignLoops(F);
 
   BlockToChain.clear();
+  ChainAllocator.DestroyAll();
 
   // We always return true as we have no way to track whether the final order
   // differs from the original order.