/// 2. Calculate the portion's mass as \a RemMass times P.
/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
/// the current portion's weight and mass.
-///
-/// Mass is distributed in two ways: full distribution and forward
-/// distribution. The latter ignores backedges, and uses the parallel fields
-/// \a RemForwardWeight and \a RemForwardMass.
struct DitheringDistributer {
uint32_t RemWeight;
- uint32_t RemForwardWeight;
-
BlockMass RemMass;
- BlockMass RemForwardMass;
DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
- BlockMass takeLocalMass(uint32_t Weight) {
- (void)takeMass(Weight);
- return takeForwardMass(Weight);
- }
- BlockMass takeExitMass(uint32_t Weight) {
- (void)takeForwardMass(Weight);
- return takeMass(Weight);
- }
- BlockMass takeBackedgeMass(uint32_t Weight) { return takeMass(Weight); }
-
-private:
- BlockMass takeForwardMass(uint32_t Weight);
BlockMass takeMass(uint32_t Weight);
};
}
const BlockMass &Mass) {
Dist.normalize();
RemWeight = Dist.Total;
- RemForwardWeight = Dist.ForwardTotal;
RemMass = Mass;
- RemForwardMass = Dist.ForwardTotal ? Mass : BlockMass();
}
-BlockMass DitheringDistributer::takeForwardMass(uint32_t Weight) {
- // Compute the amount of mass to take.
- assert(Weight && "invalid weight");
- assert(Weight <= RemForwardWeight);
- BlockMass Mass = RemForwardMass * BranchProbability(Weight, RemForwardWeight);
-
- // Decrement totals (dither).
- RemForwardWeight -= Weight;
- RemForwardMass -= Mass;
- return Mass;
-}
BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
assert(Weight && "invalid weight");
assert(Weight <= RemWeight);
W.Amount = Amount;
W.Type = Type;
Weights.push_back(W);
-
- if (Type == Weight::Backedge)
- return;
-
- // Update forward total. Don't worry about overflow here, since then Total
- // will exceed 32-bits and they'll both be recomputed in normalize().
- ForwardTotal += Amount;
}
static void combineWeight(Weight &W, const Weight &OtherW) {
}
assert(W.Type == OtherW.Type);
assert(W.TargetNode == OtherW.TargetNode);
- assert(W.Amount < W.Amount + OtherW.Amount);
+ assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
W.Amount += OtherW.Amount;
}
static void combineWeightsBySorting(WeightList &Weights) {
// Early exit when combined into a single successor.
if (Weights.size() == 1) {
Total = 1;
- ForwardTotal = Weights.front().Type != Weight::Backedge;
Weights.front().Amount = 1;
return;
}
return;
// Recompute the total through accumulation (rather than shifting it) so that
- // it's accurate after shifting. ForwardTotal is dirty here anyway.
+ // it's accurate after shifting.
Total = 0;
- ForwardTotal = 0;
// Sum the weights to each node and shift right if necessary.
for (Weight &W : Weights) {
// Update the total.
Total += W.Amount;
- if (W.Type == Weight::Backedge)
- continue;
-
- // Update the forward total.
- ForwardTotal += W.Amount;
}
assert(Total <= UINT32_MAX);
}
BFI.Freqs = std::move(SavedFreqs);
}
-/// \brief Get a possibly packaged node.
-///
-/// Get the node currently representing Node, which could be a containing
-/// loop.
-///
-/// This function should only be called when distributing mass. As long as
-/// there are no irreducilbe edges to Node, then it will have complexity O(1)
-/// in this context.
-///
-/// In general, the complexity is O(L), where L is the number of loop headers
-/// Node has been packaged into. Since this method is called in the context
-/// of distributing mass, L will be the number of loop headers an early exit
-/// edge jumps out of.
-static BlockNode getPackagedNode(const BlockFrequencyInfoImplBase &BFI,
- const BlockNode &Node) {
- assert(Node.isValid());
- if (!BFI.Working[Node.Index].isPackaged())
- return Node;
- if (!BFI.Working[Node.Index].isAPackage())
- return Node;
- return getPackagedNode(BFI, BFI.Working[Node.Index].getContainingHeader());
-}
-
/// \brief Get the appropriate mass for a possible pseudo-node loop package.
///
/// Get appropriate mass for Node. If Node is a loop-header (whose loop has
Dist.addBackedge(OuterLoop->getHeader(), Weight);
return;
}
- BlockNode Resolved = getPackagedNode(*this, Succ);
+ BlockNode Resolved = getPackagedNode(Succ);
assert(!isLoopHeader(Resolved));
if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
DEBUG(dbgs() << "packaging-loop: " << getBlockName(Loop.getHeader()) << "\n");
Loop.IsPackaged = true;
DEBUG(for (const BlockNode &M
- : Loop.Members) {
+ : Loop.members()) {
dbgs() << " - node: " << getBlockName(M.Index) << "\n";
});
}
LoopData *OuterLoop,
Distribution &Dist) {
BlockMass Mass = getPackageMass(*this, Source);
- DEBUG(dbgs() << " => mass: " << Mass
- << " ( general | forward )\n");
+ DEBUG(dbgs() << " => mass: " << Mass << "\n");
// Distribute mass to successors as laid out in Dist.
DitheringDistributer D(Dist, Mass);
#ifndef NDEBUG
auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
const char *Desc) {
- dbgs() << " => assign " << M << " (" << D.RemMass << "|"
- << D.RemForwardMass << ")";
+ dbgs() << " => assign " << M << " (" << D.RemMass << ")";
if (Desc)
dbgs() << " [" << Desc << "]";
if (T.isValid())
#endif
for (const Weight &W : Dist.Weights) {
- // Check for a local edge (forward and non-exit).
+ // Check for a local edge (non-backedge and non-exit).
+ BlockMass Taken = D.takeMass(W.Amount);
if (W.Type == Weight::Local) {
- BlockMass Local = D.takeLocalMass(W.Amount);
- getPackageMass(*this, W.TargetNode) += Local;
- DEBUG(debugAssign(W.TargetNode, Local, nullptr));
+ getPackageMass(*this, W.TargetNode) += Taken;
+ DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
continue;
}
// Check for a backedge.
if (W.Type == Weight::Backedge) {
- BlockMass Back = D.takeBackedgeMass(W.Amount);
- OuterLoop->BackedgeMass += Back;
- DEBUG(debugAssign(BlockNode(), Back, "back"));
+ OuterLoop->BackedgeMass += Taken;
+ DEBUG(debugAssign(BlockNode(), Taken, "back"));
continue;
}
// This must be an exit.
assert(W.Type == Weight::Exit);
- BlockMass Exit = D.takeExitMass(W.Amount);
- OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Exit));
- DEBUG(debugAssign(W.TargetNode, Exit, "exit"));
+ OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
+ DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
}
}
}
}
-static void scaleBlockData(BlockFrequencyInfoImplBase &BFI,
- const BlockNode &Node,
- const LoopData &Loop) {
- Float F = Loop.Mass.toFloat() * Loop.Scale;
-
- Float &Current = BFI.Freqs[Node.Index].Floating;
- Float Updated = Current * F;
-
- DEBUG(dbgs() << " - " << BFI.getBlockName(Node) << ": " << Current << " => "
- << Updated << "\n");
-
- Current = Updated;
-}
-
/// \brief Unwrap a loop package.
///
/// Visits all the members of a loop, adjusting their BlockData according to
/// the loop's pseudo-node.
-static void unwrapLoopPackage(BlockFrequencyInfoImplBase &BFI,
- const BlockNode &Head) {
- assert(Head.isValid());
-
- LoopData &LoopPackage = BFI.getLoopPackage(Head);
- DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Head)
- << ": mass = " << LoopPackage.Mass
- << ", scale = " << LoopPackage.Scale << "\n");
- scaleBlockData(BFI, Head, LoopPackage);
+static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
+ DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Loop.getHeader())
+ << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
+ << "\n");
+ Loop.Scale *= Loop.Mass.toFloat();
+ Loop.IsPackaged = false;
+ DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
// Propagate the head scale through the loop. Since members are visited in
// RPO, the head scale will be updated by the loop scale first, and then the
// final head scale will be used for updated the rest of the members.
- for (const BlockNode &M : LoopPackage.Members) {
- const FrequencyData &HeadData = BFI.Freqs[Head.Index];
- FrequencyData &Freqs = BFI.Freqs[M.Index];
- Float NewFreq = Freqs.Floating * HeadData.Floating;
- DEBUG(dbgs() << " - " << BFI.getBlockName(M) << ": " << Freqs.Floating
- << " => " << NewFreq << "\n");
- Freqs.Floating = NewFreq;
+ for (const BlockNode &N : Loop.Nodes) {
+ const auto &Working = BFI.Working[N.Index];
+ Float &F = Working.isAPackage() ? BFI.getLoopPackage(N).Scale
+ : BFI.Freqs[N.Index].Floating;
+ Float New = Loop.Scale * F;
+ DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
+ << "\n");
+ F = New;
}
}
-void BlockFrequencyInfoImplBase::finalizeMetrics() {
+void BlockFrequencyInfoImplBase::unwrapLoops() {
// Set initial frequencies from loop-local masses.
for (size_t Index = 0; Index < Working.size(); ++Index)
Freqs[Index].Floating = Working[Index].Mass.toFloat();
+ for (LoopData &Loop : Loops)
+ unwrapLoop(*this, Loop);
+}
+
+void BlockFrequencyInfoImplBase::finalizeMetrics() {
// Unwrap loop packages in reverse post-order, tracking min and max
// frequencies.
auto Min = Float::getLargest();
auto Max = Float::getZero();
for (size_t Index = 0; Index < Working.size(); ++Index) {
- if (Working[Index].isLoopHeader())
- unwrapLoopPackage(*this, BlockNode(Index));
-
- // Update max scale.
+ // Update min/max scale.
Min = std::min(Min, Freqs[Index].Floating);
Max = std::max(Max, Freqs[Index].Floating);
}