#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <vector>
+#include <list>
#define DEBUG_TYPE "block-freq"
/// pseudo-node once it's packaged.
struct LoopData {
typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap;
- typedef SmallVector<BlockNode, 4> MemberList;
- BlockNode Header; ///< Header.
+ typedef SmallVector<BlockNode, 4> NodeList;
+ LoopData *Parent; ///< The parent loop.
bool IsPackaged; ///< Whether this has been packaged.
ExitMap Exits; ///< Successor edges (and weights).
- MemberList Members; ///< Members of the loop.
+ NodeList Nodes; ///< Header and the members of the loop.
BlockMass BackedgeMass; ///< Mass returned to loop header.
BlockMass Mass;
Float Scale;
- LoopData(const BlockNode &Header) : Header(Header), IsPackaged(false) {}
+ LoopData(LoopData *Parent, const BlockNode &Header)
+ : Parent(Parent), IsPackaged(false), Nodes(1, Header) {}
+ bool isHeader(const BlockNode &Node) const { return Node == Nodes[0]; }
+ BlockNode getHeader() const { return Nodes[0]; }
+
+ NodeList::const_iterator members_begin() const { return Nodes.begin() + 1; }
+ NodeList::const_iterator members_end() const { return Nodes.end(); }
+ iterator_range<NodeList::const_iterator> members() const {
+ return make_range(members_begin(), members_end());
+ }
};
/// \brief Index of loop information.
struct WorkingData {
- LoopData *Loop; ///< The loop this block is the header of.
- BlockNode ContainingLoop; ///< The block whose loop this block is inside.
- bool IsPackaged; ///< Has ContainingLoop been packaged up?
- BlockMass Mass; ///< Mass distribution from the entry block.
+ BlockNode Node; ///< This node.
+ LoopData *Loop; ///< The loop this block is inside.
+ BlockMass Mass; ///< Mass distribution from the entry block.
+
+ WorkingData(const BlockNode &Node) : Node(Node), Loop(nullptr) {}
+
+ bool isLoopHeader() const { return Loop && Loop->isHeader(Node); }
- WorkingData() : Loop(nullptr), IsPackaged(false) {}
+ LoopData *getContainingLoop() const {
+ return isLoopHeader() ? Loop->Parent : Loop;
+ }
- bool hasLoopHeader() const { return ContainingLoop.isValid(); }
- bool isLoopHeader() const { return Loop; }
+ /// \brief Resolve a node to its representative.
+ ///
+ /// Get the node currently representing Node, which could be a containing
+ /// loop.
+ ///
+ /// This function should only be called when distributing mass. As long as
+ /// there are no irreducilbe edges to Node, then it will have complexity
+ /// O(1) in this context.
+ ///
+ /// In general, the complexity is O(L), where L is the number of loop
+ /// headers Node has been packaged into. Since this method is called in
+ /// the context of distributing mass, L will be the number of loop headers
+ /// an early exit edge jumps out of.
+ BlockNode getResolvedNode() const {
+ auto L = getPackagedLoop();
+ return L ? L->getHeader() : Node;
+ }
+ LoopData *getPackagedLoop() const {
+ if (!Loop || !Loop->IsPackaged)
+ return nullptr;
+ auto L = Loop;
+ while (L->Parent && L->Parent->IsPackaged)
+ L = L->Parent;
+ return L;
+ }
- /// \brief Has this block's loop been packaged up?
- bool isAPackage() const { return Loop && Loop->IsPackaged; }
+ /// \brief Get the appropriate mass for a node.
+ ///
+ /// Get appropriate mass for Node. If Node is a loop-header (whose loop
+ /// has been packaged), returns the mass of its pseudo-node. If it's a
+ /// node inside a packaged loop, it returns the loop's mass.
+ BlockMass &getMass() { return isAPackage() ? Loop->Mass : Mass; }
+
+ /// \brief Has ContainingLoop been packaged up?
+ bool isPackaged() const { return getResolvedNode() != Node; }
+ /// \brief Has Loop been packaged up?
+ bool isAPackage() const { return isLoopHeader() && Loop->IsPackaged; }
};
/// \brief Unscaled probability weight.
/// This class collates the successor edge weights for later processing.
///
/// \a DidOverflow indicates whether \a Total did overflow while adding to
- /// the distribution. It should never overflow twice. There's no flag for
- /// whether \a ForwardTotal overflows, since when \a Total exceeds 32-bits
- /// they both get re-computed during \a normalize().
+ /// the distribution. It should never overflow twice.
struct Distribution {
typedef SmallVector<Weight, 4> WeightList;
WeightList Weights; ///< Individual successor weights.
uint64_t Total; ///< Sum of all weights.
bool DidOverflow; ///< Whether \a Total did overflow.
- uint32_t ForwardTotal; ///< Total excluding backedges.
- Distribution() : Total(0), DidOverflow(false), ForwardTotal(0) {}
+ Distribution() : Total(0), DidOverflow(false) {}
void addLocal(const BlockNode &Node, uint64_t Amount) {
add(Node, Amount, Weight::Local);
}
/// \brief Loop data: see initializeLoops().
std::vector<WorkingData> Working;
- /// \brief Indexed information about packaged loops.
- std::vector<std::unique_ptr<LoopData>> PackagedLoops;
+ /// \brief Indexed information about loops.
+ std::list<LoopData> Loops;
/// \brief Add all edges out of a packaged loop to the distribution.
///
/// Adds all edges from LocalLoopHead to Dist. Calls addToDist() to add each
/// successor edge.
- void addLoopSuccessorsToDist(const BlockNode &LoopHead,
- const BlockNode &LocalLoopHead,
+ void addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop,
Distribution &Dist);
/// \brief Add an edge to the distribution.
///
/// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the
- /// edge is forward/exit/backedge is in the context of LoopHead. Otherwise,
- /// every edge should be a forward edge (since all the loops are packaged
- /// up).
- void addToDist(Distribution &Dist, const BlockNode &LoopHead,
+ /// edge is local/exit/backedge is in the context of LoopHead. Otherwise,
+ /// every edge should be a local edge (since all the loops are packaged up).
+ void addToDist(Distribution &Dist, const LoopData *OuterLoop,
const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight);
LoopData &getLoopPackage(const BlockNode &Head) {
assert(Head.Index < Working.size());
- assert(Working[Head.Index].Loop != nullptr);
+ assert(Working[Head.Index].isLoopHeader());
return *Working[Head.Index].Loop;
}
/// \brief Distribute mass according to a distribution.
///
/// Distributes the mass in Source according to Dist. If LoopHead.isValid(),
- /// backedges and exits are stored in its entry in PackagedLoops.
+ /// backedges and exits are stored in its entry in Loops.
///
/// Mass is distributed in parallel from two copies of the source mass.
- ///
- /// The first mass (forward) represents the distribution of mass through the
- /// local DAG. This distribution should lose mass at loop exits and ignore
- /// backedges.
- ///
- /// The second mass (general) represents the behavior of the loop in the
- /// global context. In a given distribution from the head, how much mass
- /// exits, and to where? How much mass returns to the loop head?
- ///
- /// The forward mass should be split up between local successors and exits,
- /// but only actually distributed to the local successors. The general mass
- /// should be split up between all three types of successors, but distributed
- /// only to exits and backedges.
- void distributeMass(const BlockNode &Source, const BlockNode &LoopHead,
+ void distributeMass(const BlockNode &Source, LoopData *OuterLoop,
Distribution &Dist);
/// \brief Compute the loop scale for a loop.
- void computeLoopScale(const BlockNode &LoopHead);
+ void computeLoopScale(LoopData &Loop);
/// \brief Package up a loop.
- void packageLoop(const BlockNode &LoopHead);
+ void packageLoop(LoopData &Loop);
+
+ /// \brief Unwrap loops.
+ void unwrapLoops();
/// \brief Finalize frequency metrics.
///
- /// Unwraps loop packages, calculates final frequencies, and cleans up
- /// no-longer-needed data structures.
+ /// Calculates final frequencies and cleans up no-longer-needed data
+ /// structures.
void finalizeMetrics();
/// \brief Clear all memory.
/// in \a LoopData::Exits. Otherwise, fetch it from
/// BranchProbabilityInfo.
///
-/// - Each successor is categorized as \a Weight::Local, a normal
-/// forward edge within the current loop, \a Weight::Backedge, a
-/// backedge to the loop header, or \a Weight::Exit, any successor
-/// outside the loop. The weight, the successor, and its category
-/// are stored in \a Distribution. There can be multiple edges to
-/// each successor.
+/// - Each successor is categorized as \a Weight::Local, a local edge
+/// within the current loop, \a Weight::Backedge, a backedge to the
+/// loop header, or \a Weight::Exit, any successor outside the loop.
+/// The weight, the successor, and its category are stored in \a
+/// Distribution. There can be multiple edges to each successor.
///
/// - Normalize the distribution: scale weights down so that their sum
/// is 32-bits, and coalesce multiple edges to the same node.
///
/// - Distribute the mass accordingly, dithering to minimize mass loss,
-/// as described in \a distributeMass(). Mass is distributed in
-/// parallel in two ways: forward, and general. Local successors
-/// take their mass from the forward mass, while exit and backedge
-/// successors take their mass from the general mass. Additionally,
-/// exit edges use up (ignored) mass from the forward mass, and local
-/// edges use up (ignored) mass from the general distribution.
+/// as described in \a distributeMass().
///
/// Finally, calculate the loop scale from the accumulated backedge mass.
///
return RPOT[Node.Index];
}
+ /// \brief Run (and save) a post-order traversal.
+ ///
+ /// Saves a reverse post-order traversal of all the nodes in \a F.
void initializeRPOT();
+
+ /// \brief Initialize loop data.
+ ///
+ /// Build up \a Loops using \a LoopInfo. \a LoopInfo gives us a mapping from
+ /// each block to the deepest loop it's in, but we need the inverse. For each
+ /// loop, we store in reverse post-order its "immediate" members, defined as
+ /// the header, the headers of immediate sub-loops, and all other blocks in
+ /// the loop that are not in sub-loops.
void initializeLoops();
- void runOnFunction(const FunctionT *F);
- void propagateMassToSuccessors(const BlockNode &LoopHead,
- const BlockNode &Node);
+ /// \brief Propagate to a block's successors.
+ ///
+ /// In the context of distributing mass through \c OuterLoop, divide the mass
+ /// currently assigned to \c Node between its successors.
+ void propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node);
+
+ /// \brief Compute mass in a particular loop.
+ ///
+ /// Assign mass to \c Loop's header, and then for each block in \c Loop in
+ /// reverse post-order, distribute mass to its successors. Only visits nodes
+ /// that have not been packaged into sub-loops.
+ ///
+ /// \pre \a computeMassInLoop() has been called for each subloop of \c Loop.
+ void computeMassInLoop(LoopData &Loop);
+
+ /// \brief Compute mass in all loops.
+ ///
+ /// For each loop bottom-up, call \a computeMassInLoop().
void computeMassInLoops();
- void computeMassInLoop(const BlockNode &LoopHead);
+
+ /// \brief Compute mass in the top-level function.
+ ///
+ /// Assign mass to the entry block, and then for each block in reverse
+ /// post-order, distribute mass to its successors. Skips nodes that have
+ /// been packaged into loops.
+ ///
+ /// \pre \a computeMassInLoops() has been called.
void computeMassInFunction();
std::string getBlockName(const BlockNode &Node) const override {
void doFunction(const FunctionT *F, const BranchProbabilityInfoT *BPI,
const LoopInfoT *LI);
- BlockFrequencyInfoImpl() : BPI(0), LI(0), F(0) {}
+ BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {}
using BlockFrequencyInfoImplBase::getEntryFreq;
BlockFrequency getBlockFreq(const BlockT *BB) const {
// the full function.
computeMassInLoops();
computeMassInFunction();
+ unwrapLoops();
finalizeMetrics();
}
Nodes[*I] = Node;
}
- Working.resize(RPOT.size());
+ Working.reserve(RPOT.size());
+ for (size_t Index = 0; Index < RPOT.size(); ++Index)
+ Working.emplace_back(Index);
Freqs.resize(RPOT.size());
}
return;
// Visit loops top down and assign them an index.
- std::deque<const LoopT *> Q;
- Q.insert(Q.end(), LI->begin(), LI->end());
+ std::deque<std::pair<const LoopT *, LoopData *>> Q;
+ for (const LoopT *L : *LI)
+ Q.emplace_back(L, nullptr);
while (!Q.empty()) {
- const LoopT *Loop = Q.front();
+ const LoopT *Loop = Q.front().first;
+ LoopData *Parent = Q.front().second;
Q.pop_front();
- Q.insert(Q.end(), Loop->begin(), Loop->end());
- // Save the order this loop was visited.
BlockNode Header = getNode(Loop->getHeader());
assert(Header.isValid());
- PackagedLoops.emplace_back(new LoopData(Header));
- Working[Header.Index].Loop = PackagedLoops.back().get();
+ Loops.emplace_back(Parent, Header);
+ Working[Header.Index].Loop = &Loops.back();
DEBUG(dbgs() << " - loop = " << getBlockName(Header) << "\n");
+
+ for (const LoopT *L : *Loop)
+ Q.emplace_back(L, &Loops.back());
}
// Visit nodes in reverse post-order and add them to their deepest containing
// loop.
for (size_t Index = 0; Index < RPOT.size(); ++Index) {
+ // Loop headers have already been mostly mapped.
+ if (Working[Index].isLoopHeader()) {
+ LoopData *ContainingLoop = Working[Index].getContainingLoop();
+ if (ContainingLoop)
+ ContainingLoop->Nodes.push_back(Index);
+ continue;
+ }
+
const LoopT *Loop = LI->getLoopFor(RPOT[Index]);
if (!Loop)
continue;
- // If this is a loop header, find its parent loop (if any).
- if (Working[Index].isLoopHeader())
- if (!(Loop = Loop->getParentLoop()))
- continue;
-
// Add this node to its containing loop's member list.
BlockNode Header = getNode(Loop->getHeader());
assert(Header.isValid());
const auto &HeaderData = Working[Header.Index];
assert(HeaderData.isLoopHeader());
- Working[Index].ContainingLoop = Header;
- HeaderData.Loop->Members.push_back(Index);
+ Working[Index].Loop = HeaderData.Loop;
+ HeaderData.Loop->Nodes.push_back(Index);
DEBUG(dbgs() << " - loop = " << getBlockName(Header)
<< ": member = " << getBlockName(Index) << "\n");
}
template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInLoops() {
// Visit loops with the deepest first, and the top-level loops last.
- for (const auto &L : make_range(PackagedLoops.rbegin(), PackagedLoops.rend()))
- computeMassInLoop(L->Header);
+ for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L)
+ computeMassInLoop(*L);
}
template <class BT>
-void BlockFrequencyInfoImpl<BT>::computeMassInLoop(const BlockNode &LoopHead) {
+void BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
// Compute mass in loop.
- DEBUG(dbgs() << "compute-mass-in-loop: " << getBlockName(LoopHead) << "\n");
+ DEBUG(dbgs() << "compute-mass-in-loop: " << getBlockName(Loop.getHeader())
+ << "\n");
- Working[LoopHead.Index].Mass = BlockMass::getFull();
- propagateMassToSuccessors(LoopHead, LoopHead);
+ Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
+ propagateMassToSuccessors(&Loop, Loop.getHeader());
- for (const BlockNode &M : getLoopPackage(LoopHead).Members)
- propagateMassToSuccessors(LoopHead, M);
+ for (const BlockNode &M : Loop.members())
+ propagateMassToSuccessors(&Loop, M);
- computeLoopScale(LoopHead);
- packageLoop(LoopHead);
+ computeLoopScale(Loop);
+ packageLoop(Loop);
}
template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() {
assert(!Working.empty() && "no blocks in function");
assert(!Working[0].isLoopHeader() && "entry block is a loop header");
- Working[0].Mass = BlockMass::getFull();
+ Working[0].getMass() = BlockMass::getFull();
for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE; ++I) {
// Check for nodes that have been packaged.
BlockNode Node = getNode(I);
- if (Working[Node.Index].hasLoopHeader())
+ if (Working[Node.Index].isPackaged())
continue;
- propagateMassToSuccessors(BlockNode(), Node);
+ propagateMassToSuccessors(nullptr, Node);
}
}
template <class BT>
void
-BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(const BlockNode &LoopHead,
+BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop,
const BlockNode &Node) {
DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n");
// Calculate probability for successors.
Distribution Dist;
- if (Node != LoopHead && Working[Node.Index].isLoopHeader())
- addLoopSuccessorsToDist(LoopHead, Node, Dist);
- else {
+ if (auto *Loop = Working[Node.Index].getPackagedLoop()) {
+ assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop");
+ addLoopSuccessorsToDist(OuterLoop, *Loop, Dist);
+ } else {
const BlockT *BB = getBlock(Node);
for (auto SI = Successor::child_begin(BB), SE = Successor::child_end(BB);
SI != SE; ++SI)
// Do not dereference SI, or getEdgeWeight() is linear in the number of
// successors.
- addToDist(Dist, LoopHead, Node, getNode(*SI), BPI->getEdgeWeight(BB, SI));
+ addToDist(Dist, OuterLoop, Node, getNode(*SI),
+ BPI->getEdgeWeight(BB, SI));
}
// Distribute mass to successors, saving exit and backedge data in the
// loop header.
- distributeMass(Node, LoopHead, Dist);
+ distributeMass(Node, OuterLoop, Dist);
}
template <class BT>