Relax an invariant that block placement was trying to assert a bit

[oota-llvm.git] / lib / CodeGen / MachineBlockPlacement.cpp
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp

index 6aa4268f4271f1a0c533cb71ef92dcca2b96f915..45d5af293039c941d408dbd7793902003213d8f6 100644 (file)
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -206,10 +206,21 @@ class MachineBlockPlacement : public MachineFunctionPass {
  
    void markChainSuccessors(BlockChain &Chain,
                             MachineBasicBlock *LoopHeaderBB,
-                           SmallVectorImpl<MachineBasicBlock *> &Blocks,
+                           SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
                             const BlockFilterSet *BlockFilter = 0);
+  MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
+                                         BlockChain &Chain,
+                                         const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *selectBestCandidateBlock(
+      BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+      const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *getFirstUnplacedBlock(
+      MachineFunction &F,
+      const BlockChain &PlacedChain,
+      MachineFunction::iterator &PrevUnplacedBlockIt,
+      const BlockFilterSet *BlockFilter);
    void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
-                  SmallVectorImpl<MachineBasicBlock *> &Blocks,
+                  SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
                    const BlockFilterSet *BlockFilter = 0);
    void buildLoopChains(MachineFunction &F, MachineLoop &L);
    void buildCFGChains(MachineFunction &F);
@@ -272,6 +283,12 @@ static std::string getBlockNum(MachineBasicBlock *BB) {
  }
  #endif
  
+/// \brief Mark a chain's successors as having one fewer preds.
+///
+/// When a chain is being merged into the "placed" chain, this routine will
+/// quickly walk the successors of each block in the chain and mark them as
+/// having one fewer active predecessor. It also adds any successors of this
+/// chain which reach the zero-predecessor state to the worklist passed in.
  void MachineBlockPlacement::markChainSuccessors(
      BlockChain &Chain,
      MachineBasicBlock *LoopHeaderBB,
@@ -303,15 +320,193 @@ void MachineBlockPlacement::markChainSuccessors(
    }
  }
  
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
+    MachineBasicBlock *BB, BlockChain &Chain,
+    const BlockFilterSet *BlockFilter) {
+  const BranchProbability HotProb(4, 5); // 80%
+
+  MachineBasicBlock *BestSucc = 0;
+  // FIXME: Due to the performance of the probability and weight routines in
+  // the MBPI analysis, we manually compute probabilities using the edge
+  // weights. This is suboptimal as it means that the somewhat subtle
+  // definition of edge weight semantics is encoded here as well. We should
+  // improve the MBPI interface to effeciently support query patterns such as
+  // this.
+  uint32_t BestWeight = 0;
+  uint32_t WeightScale = 0;
+  uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
+  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+                                        SE = BB->succ_end();
+       SI != SE; ++SI) {
+    if (BlockFilter && !BlockFilter->count(*SI))
+      continue;
+    BlockChain &SuccChain = *BlockToChain[*SI];
+    if (&SuccChain == &Chain) {
+      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Already merged!\n");
+      continue;
+    }
+    if (*SI != *SuccChain.begin()) {
+      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Mid chain!\n");
+      continue;
+    }
+
+    uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+    BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+
+    // Only consider successors which are either "hot", or wouldn't violate
+    // any CFG constraints.
+    if (SuccChain.LoopPredecessors != 0) {
+      if (SuccProb < HotProb) {
+        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> CFG conflict\n");
+        continue;
+      }
+
+      // Make sure that a hot successor doesn't have a globally more important
+      // predecessor.
+      BlockFrequency CandidateEdgeFreq
+        = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+      bool BadCFGConflict = false;
+      for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
+                                            PE = (*SI)->pred_end();
+           PI != PE; ++PI) {
+        if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
+            BlockToChain[*PI] == &Chain)
+          continue;
+        BlockFrequency PredEdgeFreq
+          = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+        if (PredEdgeFreq >= CandidateEdgeFreq) {
+          BadCFGConflict = true;
+          break;
+        }
+      }
+      if (BadCFGConflict) {
+        DEBUG(dbgs() << "    " << getBlockName(*SI)
+                               << " -> non-cold CFG conflict\n");
+        continue;
+      }
+    }
+
+    DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+                 << " (prob)"
+                 << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
+                 << "\n");
+    if (BestSucc && BestWeight >= SuccWeight)
+      continue;
+    BestSucc = *SI;
+    BestWeight = SuccWeight;
+  }
+  return BestSucc;
+}
+
+namespace {
+/// \brief Predicate struct to detect blocks already placed.
+class IsBlockPlaced {
+  const BlockChain &PlacedChain;
+  const BlockToChainMapType &BlockToChain;
+
+public:
+  IsBlockPlaced(const BlockChain &PlacedChain,
+                const BlockToChainMapType &BlockToChain)
+      : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
+
+  bool operator()(MachineBasicBlock *BB) const {
+    return BlockToChain.lookup(BB) == &PlacedChain;
+  }
+};
+}
+
+/// \brief Select the best block from a worklist.
+///
+/// This looks through the provided worklist as a list of candidate basic
+/// blocks and select the most profitable one to place. The definition of
+/// profitable only really makes sense in the context of a loop. This returns
+/// the most frequently visited block in the worklist, which in the case of
+/// a loop, is the one most desirable to be physically close to the rest of the
+/// loop body in order to improve icache behavior.
+///
+/// \returns The best block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
+    BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+    const BlockFilterSet *BlockFilter) {
+  // Once we need to walk the worklist looking for a candidate, cleanup the
+  // worklist of already placed entries.
+  // FIXME: If this shows up on profiles, it could be folded (at the cost of
+  // some code complexity) into the loop below.
+  WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
+                                IsBlockPlaced(Chain, BlockToChain)),
+                 WorkList.end());
+
+  MachineBasicBlock *BestBlock = 0;
+  BlockFrequency BestFreq;
+  for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
+                                                      WBE = WorkList.end();
+       WBI != WBE; ++WBI) {
+    assert(!BlockFilter || BlockFilter->count(*WBI));
+    BlockChain &SuccChain = *BlockToChain[*WBI];
+    if (&SuccChain == &Chain) {
+      DEBUG(dbgs() << "    " << getBlockName(*WBI)
+                   << " -> Already merged!\n");
+      continue;
+    }
+    assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+
+    BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
+    DEBUG(dbgs() << "    " << getBlockName(*WBI) << " -> " << CandidateFreq
+                 << " (freq)\n");
+    if (BestBlock && BestFreq >= CandidateFreq)
+      continue;
+    BestBlock = *WBI;
+    BestFreq = CandidateFreq;
+  }
+  return BestBlock;
+}
+
+/// \brief Retrieve the first unplaced basic block.
+///
+/// This routine is called when we are unable to use the CFG to walk through
+/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+    MachineFunction &F, const BlockChain &PlacedChain,
+    MachineFunction::iterator &PrevUnplacedBlockIt,
+    const BlockFilterSet *BlockFilter) {
+  for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+       ++I) {
+    if (BlockFilter && !BlockFilter->count(I))
+      continue;
+    if (BlockToChain[I] != &PlacedChain) {
+      PrevUnplacedBlockIt = I;
+      // Now select the head of the chain to which the unplaced block belongs
+      // as the block to place. This will force the entire chain to be placed,
+      // and satisfies the requirements of merging chains.
+      return *BlockToChain[I]->begin();
+    }
+  }
+  return 0;
+}
+
  void MachineBlockPlacement::buildChain(
      MachineBasicBlock *BB,
      BlockChain &Chain,
      SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
      const BlockFilterSet *BlockFilter) {
-  const BranchProbability HotProb(4, 5); // 80%
    assert(BB);
    assert(BlockToChain[BB] == &Chain);
-  assert(*Chain.begin() == BB);
+  MachineFunction &F = *BB->getParent();
+  MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
+
    MachineBasicBlock *LoopHeaderBB = BB;
    markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
    BB = *llvm::prior(Chain.end());
@@ -319,83 +514,42 @@ void MachineBlockPlacement::buildChain(
      assert(BB);
      assert(BlockToChain[BB] == &Chain);
      assert(*llvm::prior(Chain.end()) == BB);
+    MachineBasicBlock *BestSucc = 0;
  
      // Look for the best viable successor if there is one to place immediately
      // after this block.
-    MachineBasicBlock *BestSucc = 0;
-    BranchProbability BestProb = BranchProbability::getZero();
-    DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
-    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-                                          SE = BB->succ_end();
-         SI != SE; ++SI) {
-      if (BlockFilter && !BlockFilter->count(*SI))
-        continue;
-      BlockChain &SuccChain = *BlockToChain[*SI];
-      if (&SuccChain == &Chain) {
-        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Already merged!\n");
-        continue;
-      }
-
-      BranchProbability SuccProb = MBPI->getEdgeProbability(BB, *SI);
-
-      // Only consider successors which are either "hot", or wouldn't violate
-      // any CFG constraints.
-      if (SuccChain.LoopPredecessors != 0 && SuccProb < HotProb) {
-        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> CFG conflict\n");
-        continue;
-      }
-
-      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
-                   << " (prob)"
-                   << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
-                   << "\n");
-      if (BestSucc && BestProb >= SuccProb)
-        continue;
-      BestSucc = *SI;
-      BestProb = SuccProb;
-    }
+    BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
  
      // If an immediate successor isn't available, look for the best viable
      // block among those we've identified as not violating the loop's CFG at
      // this point. This won't be a fallthrough, but it will increase locality.
-    if (!BestSucc) {
-      BlockFrequency BestFreq;
-      for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = BlockWorkList.begin(),
-                                                          WBE = BlockWorkList.end();
-           WBI != WBE; ++WBI) {
-        if (BlockFilter && !BlockFilter->count(*WBI))
-          continue;
-        BlockChain &SuccChain = *BlockToChain[*WBI];
-        if (&SuccChain == &Chain) {
-          DEBUG(dbgs() << "    " << getBlockName(*WBI)
-                       << " -> Already merged!\n");
-          continue;
-        }
-        assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+    if (!BestSucc)
+      BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
  
-        BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
-        DEBUG(dbgs() << "    " << getBlockName(*WBI) << " -> " << CandidateFreq
-                     << " (freq)\n");
-        if (BestSucc && BestFreq >= CandidateFreq)
-          continue;
-        BestSucc = *WBI;
-        BestFreq = CandidateFreq;
-      }
-    }
      if (!BestSucc) {
-      DEBUG(dbgs() << "Finished forming chain for header block "
-                   << getBlockNum(*Chain.begin()) << "\n");
-      return;
+      BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
+                                       BlockFilter);
+      if (!BestSucc)
+        break;
+
+      DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+                      "layout successor until the CFG reduces\n");
      }
  
      // Place this block, updating the datastructures to reflect its placement.
      BlockChain &SuccChain = *BlockToChain[BestSucc];
+    // Zero out LoopPredecessors for the successor we're about to merge in case
+    // we selected a successor that didn't fit naturally into the CFG.
+    SuccChain.LoopPredecessors = 0;
      DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
                   << " to " << getBlockNum(BestSucc) << "\n");
      markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
      Chain.merge(BestSucc, &SuccChain);
      BB = *llvm::prior(Chain.end());
-  }
+  };
+
+  DEBUG(dbgs() << "Finished forming chain for header block "
+               << getBlockNum(*Chain.begin()) << "\n");
  }
  
  /// \brief Forms basic block chains from the natural loop structures.
@@ -413,6 +567,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
  
    SmallVector<MachineBasicBlock *, 16> BlockWorkList;
    BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+  BlockChain &LoopChain = *BlockToChain[L.getHeader()];
  
    // FIXME: This is a really lame way of walking the chains in the loop: we
    // walk the blocks, and use a set to prevent visiting a particular chain
@@ -442,38 +597,72 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
        BlockWorkList.push_back(*BI);
    }
  
-  BlockChain &LoopChain = *BlockToChain[L.getHeader()];
    buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet);
  
    DEBUG({
-    if (LoopChain.LoopPredecessors)
+    // Crash at the end so we get all of the debugging output first.
+    bool BadLoop = false;
+    if (LoopChain.LoopPredecessors) {
+      BadLoop = true;
        dbgs() << "Loop chain contains a block without its preds placed!\n"
               << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
               << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
+    }
      for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
           BCI != BCE; ++BCI)
-      if (!LoopBlockSet.erase(*BCI))
+      if (!LoopBlockSet.erase(*BCI)) {
+        // We don't mark the loop as bad here because there are real situations
+        // where this can occur. For example, with an unanalyzable fallthrough
+        // from a loop block to a non-loop block or vice versa.
          dbgs() << "Loop chain contains a block not contained by the loop!\n"
                 << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
                 << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
                 << "  Bad block:    " << getBlockName(*BCI) << "\n";
+      }
  
-    if (!LoopBlockSet.empty())
-      for (SmallPtrSet<MachineBasicBlock *, 16>::iterator LBI = LoopBlockSet.begin(), LBE = LoopBlockSet.end();
+    if (!LoopBlockSet.empty()) {
+      BadLoop = true;
+      for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
+                                    LBE = LoopBlockSet.end();
             LBI != LBE; ++LBI)
          dbgs() << "Loop contains blocks never placed into a chain!\n"
                 << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
                 << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
                 << "  Bad block:    " << getBlockName(*LBI) << "\n";
+    }
+    assert(!BadLoop && "Detected problems with the placement of this loop.");
    });
  }
  
  void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
    // Ensure that every BB in the function has an associated chain to simplify
    // the assumptions of the remaining algorithm.
-  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
-    BlockToChain[&*FI] =
-      new (ChainAllocator.Allocate()) BlockChain(BlockToChain, &*FI);
+  SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    MachineBasicBlock *BB = FI;
+    BlockChain *&Chain = BlockToChain[BB];
+    Chain = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+    // Also, merge any blocks which we cannot reason about and must preserve
+    // the exact fallthrough behavior for.
+    for (;;) {
+      Cond.clear();
+      MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+      if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+        break;
+
+      MachineFunction::iterator NextFI(llvm::next(FI));
+      MachineBasicBlock *NextBB = NextFI;
+      // Ensure that the layout successor is a viable block, as we know that
+      // fallthrough is a possibility.
+      assert(NextFI != FE && "Can't fallthrough past the last block.");
+      DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+                   << getBlockName(BB) << " -> " << getBlockName(NextBB)
+                   << "\n");
+      Chain->merge(NextBB, 0);
+      FI = NextFI;
+      BB = NextBB;
+    }
+  }
  
    // Build any loop-based chains.
    for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
@@ -511,27 +700,36 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
  
    typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
    DEBUG({
+    // Crash at the end so we get all of the debugging output first.
+    bool BadFunc = false;
      FunctionBlockSetType FunctionBlockSet;
      for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
        FunctionBlockSet.insert(FI);
  
-    for (BlockChain::iterator BCI = FunctionChain.begin(), BCE = FunctionChain.end();
+    for (BlockChain::iterator BCI = FunctionChain.begin(),
+                              BCE = FunctionChain.end();
           BCI != BCE; ++BCI)
-      if (!FunctionBlockSet.erase(*BCI))
+      if (!FunctionBlockSet.erase(*BCI)) {
+        BadFunc = true;
          dbgs() << "Function chain contains a block not in the function!\n"
                 << "  Bad block:    " << getBlockName(*BCI) << "\n";
+      }
  
-    if (!FunctionBlockSet.empty())
-      for (SmallPtrSet<MachineBasicBlock *, 16>::iterator FBI = FunctionBlockSet.begin(),
-           FBE = FunctionBlockSet.end(); FBI != FBE; ++FBI)
+    if (!FunctionBlockSet.empty()) {
+      BadFunc = true;
+      for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
+                                          FBE = FunctionBlockSet.end();
+           FBI != FBE; ++FBI)
          dbgs() << "Function contains blocks never placed into a chain!\n"
                 << "  Bad block:    " << getBlockName(*FBI) << "\n";
+    }
+    assert(!BadFunc && "Detected problems with the block placement.");
    });
  
    // Splice the blocks into place.
    MachineFunction::iterator InsertPos = F.begin();
-  SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
-  for (BlockChain::iterator BI = FunctionChain.begin(), BE = FunctionChain.end();
+  for (BlockChain::iterator BI = FunctionChain.begin(),
+                            BE = FunctionChain.end();
         BI != BE; ++BI) {
      DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
                                                    : "          ... ")
@@ -600,6 +798,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
    AlignLoops(F);
  
    BlockToChain.clear();
+  ChainAllocator.DestroyAll();
  
    // We always return true as we have no way to track whether the final order
    // differs from the original order.