/// 2. Calculate the portion's mass as \a RemMass times P.
/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
/// the current portion's weight and mass.
-///
-/// Mass is distributed in two ways: full distribution and forward
-/// distribution. The latter ignores backedges, and uses the parallel fields
-/// \a RemForwardWeight and \a RemForwardMass.
struct DitheringDistributer {
uint32_t RemWeight;
- uint32_t RemForwardWeight;
-
BlockMass RemMass;
- BlockMass RemForwardMass;
DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
- BlockMass takeLocalMass(uint32_t Weight) {
- (void)takeMass(Weight);
- return takeForwardMass(Weight);
- }
- BlockMass takeExitMass(uint32_t Weight) {
- (void)takeForwardMass(Weight);
- return takeMass(Weight);
- }
- BlockMass takeBackedgeMass(uint32_t Weight) { return takeMass(Weight); }
-
-private:
- BlockMass takeForwardMass(uint32_t Weight);
BlockMass takeMass(uint32_t Weight);
};
}
const BlockMass &Mass) {
Dist.normalize();
RemWeight = Dist.Total;
- RemForwardWeight = Dist.ForwardTotal;
RemMass = Mass;
- RemForwardMass = Dist.ForwardTotal ? Mass : BlockMass();
}
-BlockMass DitheringDistributer::takeForwardMass(uint32_t Weight) {
- // Compute the amount of mass to take.
- assert(Weight && "invalid weight");
- assert(Weight <= RemForwardWeight);
- BlockMass Mass = RemForwardMass * BranchProbability(Weight, RemForwardWeight);
-
- // Decrement totals (dither).
- RemForwardWeight -= Weight;
- RemForwardMass -= Mass;
- return Mass;
-}
BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
assert(Weight && "invalid weight");
assert(Weight <= RemWeight);
W.Amount = Amount;
W.Type = Type;
Weights.push_back(W);
-
- if (Type == Weight::Backedge)
- return;
-
- // Update forward total. Don't worry about overflow here, since then Total
- // will exceed 32-bits and they'll both be recomputed in normalize().
- ForwardTotal += Amount;
}
static void combineWeight(Weight &W, const Weight &OtherW) {
}
assert(W.Type == OtherW.Type);
assert(W.TargetNode == OtherW.TargetNode);
- assert(W.Amount < W.Amount + OtherW.Amount);
+ assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
W.Amount += OtherW.Amount;
}
static void combineWeightsBySorting(WeightList &Weights) {
// Early exit when combined into a single successor.
if (Weights.size() == 1) {
Total = 1;
- ForwardTotal = Weights.front().Type != Weight::Backedge;
Weights.front().Amount = 1;
return;
}
return;
// Recompute the total through accumulation (rather than shifting it) so that
- // it's accurate after shifting. ForwardTotal is dirty here anyway.
+ // it's accurate after shifting.
Total = 0;
- ForwardTotal = 0;
// Sum the weights to each node and shift right if necessary.
for (Weight &W : Weights) {
// Update the total.
Total += W.Amount;
- if (W.Type == Weight::Backedge)
- continue;
-
- // Update the forward total.
- ForwardTotal += W.Amount;
}
assert(Total <= UINT32_MAX);
}
// does not actually clear heap storage.
std::vector<FrequencyData>().swap(Freqs);
std::vector<WorkingData>().swap(Working);
- std::vector<std::unique_ptr<LoopData>>().swap(Loops);
+ Loops.clear();
}
/// \brief Clear all memory not needed downstream.
BFI.Freqs = std::move(SavedFreqs);
}
-/// \brief Get a possibly packaged node.
-///
-/// Get the node currently representing Node, which could be a containing
-/// loop.
-///
-/// This function should only be called when distributing mass. As long as
-/// there are no irreducilbe edges to Node, then it will have complexity O(1)
-/// in this context.
-///
-/// In general, the complexity is O(L), where L is the number of loop headers
-/// Node has been packaged into. Since this method is called in the context
-/// of distributing mass, L will be the number of loop headers an early exit
-/// edge jumps out of.
-static BlockNode getPackagedNode(const BlockFrequencyInfoImplBase &BFI,
- const BlockNode &Node) {
- assert(Node.isValid());
- if (!BFI.Working[Node.Index].isPackaged())
- return Node;
- if (!BFI.Working[Node.Index].isAPackage())
- return Node;
- return getPackagedNode(BFI, BFI.Working[Node.Index].getContainingHeader());
-}
-
/// \brief Get the appropriate mass for a possible pseudo-node loop package.
///
/// Get appropriate mass for Node. If Node is a loop-header (whose loop has
}
void BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
- const BlockNode &LoopHead,
+ const LoopData *OuterLoop,
const BlockNode &Pred,
const BlockNode &Succ,
uint64_t Weight) {
if (!Weight)
Weight = 1;
+ auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
+ return OuterLoop && OuterLoop->isHeader(Node);
+ };
+
#ifndef NDEBUG
auto debugSuccessor = [&](const char *Type, const BlockNode &Resolved) {
dbgs() << " =>"
<< " [" << Type << "] weight = " << Weight;
- if (Succ != LoopHead)
+ if (!isLoopHeader(Succ))
dbgs() << ", succ = " << getBlockName(Succ);
if (Resolved != Succ)
dbgs() << ", resolved = " << getBlockName(Resolved);
(void)debugSuccessor;
#endif
- if (Succ == LoopHead) {
+ if (isLoopHeader(Succ)) {
DEBUG(debugSuccessor("backedge", Succ));
- Dist.addBackedge(LoopHead, Weight);
+ Dist.addBackedge(OuterLoop->getHeader(), Weight);
return;
}
- BlockNode Resolved = getPackagedNode(*this, Succ);
- assert(Resolved != LoopHead);
+ BlockNode Resolved = getPackagedNode(Succ);
+ assert(!isLoopHeader(Resolved));
- if (Working[Resolved.Index].getContainingHeader() != LoopHead) {
+ if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
DEBUG(debugSuccessor(" exit ", Resolved));
Dist.addExit(Resolved, Weight);
return;
}
void BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
- const BlockNode &LoopHead, const BlockNode &LocalLoopHead,
- Distribution &Dist) {
- LoopData &LoopPackage = getLoopPackage(LocalLoopHead);
- const LoopData::ExitMap &Exits = LoopPackage.Exits;
-
+ const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
// Copy the exit map into Dist.
- for (const auto &I : Exits)
- addToDist(Dist, LoopHead, LocalLoopHead, I.first, I.second.getMass());
+ for (const auto &I : Loop.Exits)
+ addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, I.second.getMass());
// We don't need this map any more. Clear it to prevent quadratic memory
// usage in deeply nested loops with irreducible control flow.
- LoopPackage.Exits.clear();
+ Loop.Exits.clear();
}
/// \brief Get the maximum allowed loop scale.
static Float getMaxLoopScale() { return Float(1, 12); }
/// \brief Compute the loop scale for a loop.
-void BlockFrequencyInfoImplBase::computeLoopScale(const BlockNode &LoopHead) {
+void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
// Compute loop scale.
- DEBUG(dbgs() << "compute-loop-scale: " << getBlockName(LoopHead) << "\n");
+ DEBUG(dbgs() << "compute-loop-scale: " << getBlockName(Loop.getHeader())
+ << "\n");
// LoopScale == 1 / ExitMass
// ExitMass == HeadMass - BackedgeMass
- LoopData &LoopPackage = getLoopPackage(LoopHead);
- BlockMass ExitMass = BlockMass::getFull() - LoopPackage.BackedgeMass;
+ BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
// Block scale stores the inverse of the scale.
- LoopPackage.Scale = ExitMass.toFloat().inverse();
+ Loop.Scale = ExitMass.toFloat().inverse();
DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
- << " - " << LoopPackage.BackedgeMass << ")\n"
- << " - scale = " << LoopPackage.Scale << "\n");
+ << " - " << Loop.BackedgeMass << ")\n"
+ << " - scale = " << Loop.Scale << "\n");
- if (LoopPackage.Scale > getMaxLoopScale()) {
- LoopPackage.Scale = getMaxLoopScale();
+ if (Loop.Scale > getMaxLoopScale()) {
+ Loop.Scale = getMaxLoopScale();
DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
}
}
/// \brief Package up a loop.
-void BlockFrequencyInfoImplBase::packageLoop(const BlockNode &LoopHead) {
- DEBUG(dbgs() << "packaging-loop: " << getBlockName(LoopHead) << "\n");
- auto &PackagedLoop = getLoopPackage(LoopHead);
- PackagedLoop.IsPackaged = true;
+void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
+ DEBUG(dbgs() << "packaging-loop: " << getBlockName(Loop.getHeader()) << "\n");
+ Loop.IsPackaged = true;
DEBUG(for (const BlockNode &M
- : PackagedLoop.Members) {
+ : Loop.members()) {
dbgs() << " - node: " << getBlockName(M.Index) << "\n";
});
}
void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
- const BlockNode &LoopHead,
+ LoopData *OuterLoop,
Distribution &Dist) {
BlockMass Mass = getPackageMass(*this, Source);
- DEBUG(dbgs() << " => mass: " << Mass
- << " ( general | forward )\n");
+ DEBUG(dbgs() << " => mass: " << Mass << "\n");
// Distribute mass to successors as laid out in Dist.
DitheringDistributer D(Dist, Mass);
#ifndef NDEBUG
auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
const char *Desc) {
- dbgs() << " => assign " << M << " (" << D.RemMass << "|"
- << D.RemForwardMass << ")";
+ dbgs() << " => assign " << M << " (" << D.RemMass << ")";
if (Desc)
dbgs() << " [" << Desc << "]";
if (T.isValid())
(void)debugAssign;
#endif
- LoopData *LoopPackage = 0;
- if (LoopHead.isValid())
- LoopPackage = &getLoopPackage(LoopHead);
for (const Weight &W : Dist.Weights) {
- // Check for a local edge (forward and non-exit).
+ // Check for a local edge (non-backedge and non-exit).
+ BlockMass Taken = D.takeMass(W.Amount);
if (W.Type == Weight::Local) {
- BlockMass Local = D.takeLocalMass(W.Amount);
- getPackageMass(*this, W.TargetNode) += Local;
- DEBUG(debugAssign(W.TargetNode, Local, nullptr));
+ getPackageMass(*this, W.TargetNode) += Taken;
+ DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
continue;
}
// Backedges and exits only make sense if we're processing a loop.
- assert(LoopPackage && "backedge or exit outside of loop");
+ assert(OuterLoop && "backedge or exit outside of loop");
// Check for a backedge.
if (W.Type == Weight::Backedge) {
- BlockMass Back = D.takeBackedgeMass(W.Amount);
- LoopPackage->BackedgeMass += Back;
- DEBUG(debugAssign(BlockNode(), Back, "back"));
+ OuterLoop->BackedgeMass += Taken;
+ DEBUG(debugAssign(BlockNode(), Taken, "back"));
continue;
}
// This must be an exit.
assert(W.Type == Weight::Exit);
- BlockMass Exit = D.takeExitMass(W.Amount);
- LoopPackage->Exits.push_back(std::make_pair(W.TargetNode, Exit));
- DEBUG(debugAssign(W.TargetNode, Exit, "exit"));
+ OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
+ DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
}
}
}
}
-static void scaleBlockData(BlockFrequencyInfoImplBase &BFI,
- const BlockNode &Node,
- const LoopData &Loop) {
- Float F = Loop.Mass.toFloat() * Loop.Scale;
-
- Float &Current = BFI.Freqs[Node.Index].Floating;
- Float Updated = Current * F;
-
- DEBUG(dbgs() << " - " << BFI.getBlockName(Node) << ": " << Current << " => "
- << Updated << "\n");
-
- Current = Updated;
-}
-
/// \brief Unwrap a loop package.
///
/// Visits all the members of a loop, adjusting their BlockData according to
/// the loop's pseudo-node.
-static void unwrapLoopPackage(BlockFrequencyInfoImplBase &BFI,
- const BlockNode &Head) {
- assert(Head.isValid());
-
- LoopData &LoopPackage = BFI.getLoopPackage(Head);
- DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Head)
- << ": mass = " << LoopPackage.Mass
- << ", scale = " << LoopPackage.Scale << "\n");
- scaleBlockData(BFI, Head, LoopPackage);
+static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
+ DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Loop.getHeader())
+ << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
+ << "\n");
+ Loop.Scale *= Loop.Mass.toFloat();
+ Loop.IsPackaged = false;
+ DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
// Propagate the head scale through the loop. Since members are visited in
// RPO, the head scale will be updated by the loop scale first, and then the
// final head scale will be used for updated the rest of the members.
- for (const BlockNode &M : LoopPackage.Members) {
- const FrequencyData &HeadData = BFI.Freqs[Head.Index];
- FrequencyData &Freqs = BFI.Freqs[M.Index];
- Float NewFreq = Freqs.Floating * HeadData.Floating;
- DEBUG(dbgs() << " - " << BFI.getBlockName(M) << ": " << Freqs.Floating
- << " => " << NewFreq << "\n");
- Freqs.Floating = NewFreq;
+ for (const BlockNode &N : Loop.Nodes) {
+ const auto &Working = BFI.Working[N.Index];
+ Float &F = Working.isAPackage() ? BFI.getLoopPackage(N).Scale
+ : BFI.Freqs[N.Index].Floating;
+ Float New = Loop.Scale * F;
+ DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
+ << "\n");
+ F = New;
}
}
-void BlockFrequencyInfoImplBase::finalizeMetrics() {
+void BlockFrequencyInfoImplBase::unwrapLoops() {
// Set initial frequencies from loop-local masses.
for (size_t Index = 0; Index < Working.size(); ++Index)
Freqs[Index].Floating = Working[Index].Mass.toFloat();
+ for (LoopData &Loop : Loops)
+ unwrapLoop(*this, Loop);
+}
+
+void BlockFrequencyInfoImplBase::finalizeMetrics() {
// Unwrap loop packages in reverse post-order, tracking min and max
// frequencies.
auto Min = Float::getLargest();
auto Max = Float::getZero();
for (size_t Index = 0; Index < Working.size(); ++Index) {
- if (Working[Index].isLoopHeader())
- unwrapLoopPackage(*this, BlockNode(Index));
-
- // Update max scale.
+ // Update min/max scale.
Min = std::min(Min, Freqs[Index].Floating);
Max = std::max(Max, Freqs[Index].Floating);
}