/// \brief Data about a loop.
///
- /// Contains the data necessary to represent represent a loop as a
- /// pseudo-node once it's packaged.
+ /// Contains the data necessary to represent a loop as a pseudo-node once it's
+ /// packaged.
struct LoopData {
typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap;
typedef SmallVector<BlockNode, 4> NodeList;
- LoopData *Parent; ///< The parent loop.
- bool IsPackaged; ///< Whether this has been packaged.
- uint32_t NumHeaders; ///< Number of headers.
- ExitMap Exits; ///< Successor edges (and weights).
- NodeList Nodes; ///< Header and the members of the loop.
- BlockMass BackedgeMass; ///< Mass returned to loop header.
+ typedef SmallVector<BlockMass, 1> HeaderMassList;
+ LoopData *Parent; ///< The parent loop.
+ bool IsPackaged; ///< Whether this has been packaged.
+ uint32_t NumHeaders; ///< Number of headers.
+ ExitMap Exits; ///< Successor edges (and weights).
+ NodeList Nodes; ///< Header and the members of the loop.
+ HeaderMassList BackedgeMass; ///< Mass returned to each loop header.
BlockMass Mass;
Scaled64 Scale;
LoopData(LoopData *Parent, const BlockNode &Header)
- : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header) {}
+ : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header),
+ BackedgeMass(1) {}
template <class It1, class It2>
LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther,
It2 LastOther)
: Parent(Parent), IsPackaged(false), Nodes(FirstHeader, LastHeader) {
NumHeaders = Nodes.size();
Nodes.insert(Nodes.end(), FirstOther, LastOther);
+ BackedgeMass.resize(NumHeaders);
}
bool isHeader(const BlockNode &Node) const {
if (isIrreducible())
BlockNode getHeader() const { return Nodes[0]; }
bool isIrreducible() const { return NumHeaders > 1; }
+ HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) {
+ assert(isHeader(B) && "this is only valid on loop header blocks");
+ if (isIrreducible())
+ return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) -
+ Nodes.begin();
+ return 0;
+ }
+
NodeList::const_iterator members_begin() const {
return Nodes.begin() + NumHeaders;
}
/// loop.
///
/// This function should only be called when distributing mass. As long as
- /// there are no irreducilbe edges to Node, then it will have complexity
+ /// there are no irreducible edges to Node, then it will have complexity
/// O(1) in this context.
///
/// In general, the complexity is O(L), where L is the number of loop
/// \brief Compute the loop scale for a loop.
void computeLoopScale(LoopData &Loop);
+ /// Adjust the mass of all headers in an irreducible loop.
+ ///
+ /// Initially, irreducible loops are assumed to distribute their mass
+ /// equally among its headers. This can lead to wrong frequency estimates
+ /// since some headers may be executed more frequently than others.
+ ///
+ /// This adjusts header mass distribution so it matches the weights of
+ /// the backedges going into each of the loop headers.
+ void adjustLoopHeaderMass(LoopData &Loop);
+
/// \brief Package up a loop.
void packageLoop(LoopData &Loop);
/// - Distribute the mass accordingly, dithering to minimize mass loss,
/// as described in \a distributeMass().
///
+/// In the case of irreducible loops, instead of a single loop header,
+/// there will be several. The computation of backedge masses is similar
+/// but instead of having a single backedge mass, there will be one
+/// backedge per loop header. In these cases, each backedge will carry
+/// a mass proportional to the edge weights along the corresponding
+/// path.
+///
+/// At the end of propagation, the full mass assigned to the loop will be
+/// distributed among the loop headers proportionally according to the
+/// mass flowing through their backedges.
+///
/// Finally, calculate the loop scale from the accumulated backedge mass.
///
/// 3. Distribute mass in the function (\a computeMassInFunction()).
///
/// It has some known flaws.
///
-/// - Loop scale is limited to 4096 per loop (2^12) to avoid exhausting
-/// BlockFrequency's 64-bit integer precision.
-///
/// - The model of irreducible control flow is a rough approximation.
///
/// Modelling irreducible control flow exactly involves setting up and
/// as sub-loops, rather than arbitrarily shoving the problematic
/// blocks into the headers of the main irreducible SCC.
///
-/// - Backedge frequencies are assumed to be evenly split between the
-/// headers of a given irreducible SCC. Instead, we could track the
-/// backedge mass separately for each header, and adjust their relative
-/// frequencies.
-///
/// - Entry frequencies are assumed to be evenly split between the
/// headers of a given irreducible SCC, which is the only option if we
/// need to compute mass in the SCC before its parent loop. Instead,
///
/// \pre \a computeMassInLoop() has been called for each subloop of \c
/// OuterLoop.
- /// \pre \c Insert points at the the last loop successfully processed by \a
+ /// \pre \c Insert points at the last loop successfully processed by \a
/// computeMassInLoop().
/// \pre \c OuterLoop has irreducible SCCs.
void computeIrreducibleMass(LoopData *OuterLoop,
public:
const FunctionT *getFunction() const { return F; }
- void doFunction(const FunctionT *F, const BranchProbabilityInfoT *BPI,
- const LoopInfoT *LI);
+ void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI,
+ const LoopInfoT &LI);
BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {}
using BlockFrequencyInfoImplBase::getEntryFreq;
};
template <class BT>
-void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F,
- const BranchProbabilityInfoT *BPI,
- const LoopInfoT *LI) {
+void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F,
+ const BranchProbabilityInfoT &BPI,
+ const LoopInfoT &LI) {
// Save the parameters.
- this->BPI = BPI;
- this->LI = LI;
- this->F = F;
+ this->BPI = &BPI;
+ this->LI = &LI;
+ this->F = &F;
// Clean up left-over data structures.
BlockFrequencyInfoImplBase::clear();
Nodes.clear();
// Initialize.
- DEBUG(dbgs() << "\nblock-frequency: " << F->getName() << "\n================="
- << std::string(F->getName().size(), '=') << "\n");
+ DEBUG(dbgs() << "\nblock-frequency: " << F.getName() << "\n================="
+ << std::string(F.getName().size(), '=') << "\n");
initializeRPOT();
initializeLoops();
- // Visit loops in post-order to find thelocal mass distribution, and then do
+ // Visit loops in post-order to find the local mass distribution, and then do
// the full function.
computeMassInLoops();
computeMassInFunction();
for (const BlockNode &M : Loop.Nodes)
if (!propagateMassToSuccessors(&Loop, M))
llvm_unreachable("unhandled irreducible control flow");
+
+ adjustLoopHeaderMass(Loop);
} else {
Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))