//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "block-freq"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "block-freq"
+
//===----------------------------------------------------------------------===//
//
-// PositiveFloat implementation.
+// UnsignedFloat implementation.
//
//===----------------------------------------------------------------------===//
-const int PositiveFloatBase::MaxExponent;
-const int PositiveFloatBase::MinExponent;
+#ifndef _MSC_VER
+const int32_t UnsignedFloatBase::MaxExponent;
+const int32_t UnsignedFloatBase::MinExponent;
+#endif
static void appendDigit(std::string &Str, unsigned D) {
assert(D < 10);
}
static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
- assert(E >= PositiveFloatBase::MinExponent);
- assert(E <= PositiveFloatBase::MaxExponent);
+ assert(E >= UnsignedFloatBase::MinExponent);
+ assert(E <= UnsignedFloatBase::MaxExponent);
// Find a new E, but don't let it increase past MaxExponent.
- int LeadingZeros = PositiveFloatBase::countLeadingZeros64(D);
- int NewE = std::min(PositiveFloatBase::MaxExponent, E + 63 - LeadingZeros);
+ int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D);
+ int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros);
int Shift = 63 - (NewE - E);
assert(Shift <= LeadingZeros);
- assert(Shift == LeadingZeros || NewE == PositiveFloatBase::MaxExponent);
+ assert(Shift == LeadingZeros || NewE == UnsignedFloatBase::MaxExponent);
D <<= Shift;
E = NewE;
// Check for a denormal.
unsigned AdjustedE = E + 16383;
if (!(D >> 63)) {
- assert(E == PositiveFloatBase::MaxExponent);
+ assert(E == UnsignedFloatBase::MaxExponent);
AdjustedE = 0;
}
return std::string(Chars.begin(), Chars.end());
}
-static std::string stripTrailingZeros(std::string Float) {
+static std::string stripTrailingZeros(const std::string &Float) {
size_t NonZero = Float.find_last_not_of('0');
assert(NonZero != std::string::npos && "no . in floating point string");
return Float.substr(0, NonZero + 1);
}
-std::string PositiveFloatBase::toString(uint64_t D, int16_t E, int Width,
+std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width,
unsigned Precision) {
if (!D)
return "0.0";
return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
}
-raw_ostream &PositiveFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
+raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
int Width, unsigned Precision) {
return OS << toString(D, E, Width, Precision);
}
-void PositiveFloatBase::dump(uint64_t D, int16_t E, int Width) {
+void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) {
print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
<< "]";
}
return std::make_pair(N, Shift);
}
-std::pair<uint64_t, int16_t> PositiveFloatBase::divide64(uint64_t Dividend,
+std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend,
uint64_t Divisor) {
// Input should be sanitized.
assert(Divisor);
return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift);
}
-static void addWithCarry(uint64_t &Upper, uint64_t &Lower, uint64_t N) {
- uint64_t NewLower = Lower + (N << 32);
- Upper += (N >> 32) + (NewLower < Lower);
- Lower = NewLower;
-}
-
-std::pair<uint64_t, int16_t> PositiveFloatBase::multiply64(uint64_t L,
+std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L,
uint64_t R) {
// Separate into two 32-bit digits (U.L).
uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX;
// Sum into two 64-bit digits.
uint64_t Upper = P1, Lower = P4;
- addWithCarry(Upper, Lower, P2);
- addWithCarry(Upper, Lower, P3);
+ auto addWithCarry = [&](uint64_t N) {
+ uint64_t NewLower = Lower + (N << 32);
+ Upper += (N >> 32) + (NewLower < Lower);
+ Lower = NewLower;
+ };
+ addWithCarry(P2);
+ addWithCarry(P3);
- // Check for the lower 32 bits.
+ // Check whether the upper digit is empty.
if (!Upper)
return std::make_pair(Lower, 0);
//===----------------------------------------------------------------------===//
BlockMass &BlockMass::operator*=(const BranchProbability &P) {
uint32_t N = P.getNumerator(), D = P.getDenominator();
- assert(D || "divide by 0");
- assert(N <= D || "fraction greater than 1");
+ assert(D && "divide by 0");
+ assert(N <= D && "fraction greater than 1");
// Fast path for multiplying by 1.0.
if (!Mass || N == D)
return *this;
}
-PositiveFloat<uint64_t> BlockMass::toFloat() const {
+UnsignedFloat<uint64_t> BlockMass::toFloat() const {
if (isFull())
- return PositiveFloat<uint64_t>(1, 0);
- return PositiveFloat<uint64_t>(getMass() + 1, -64);
+ return UnsignedFloat<uint64_t>(1, 0);
+ return UnsignedFloat<uint64_t>(getMass() + 1, -64);
}
void BlockMass::dump() const { print(dbgs()); }
typedef BlockFrequencyInfoImplBase::Distribution Distribution;
typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
typedef BlockFrequencyInfoImplBase::Float Float;
-typedef BlockFrequencyInfoImplBase::PackagedLoopData PackagedLoopData;
+typedef BlockFrequencyInfoImplBase::LoopData LoopData;
typedef BlockFrequencyInfoImplBase::Weight Weight;
typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
/// 2. Calculate the portion's mass as \a RemMass times P.
/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
/// the current portion's weight and mass.
-///
-/// Mass is distributed in two ways: full distribution and forward
-/// distribution. The latter ignores backedges, and uses the parallel fields
-/// \a RemForwardWeight and \a RemForwardMass.
struct DitheringDistributer {
uint32_t RemWeight;
- uint32_t RemForwardWeight;
-
BlockMass RemMass;
- BlockMass RemForwardMass;
DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
- BlockMass takeLocalMass(uint32_t Weight) {
- (void)takeMass(Weight);
- return takeForwardMass(Weight);
- }
- BlockMass takeExitMass(uint32_t Weight) {
- (void)takeForwardMass(Weight);
- return takeMass(Weight);
- }
- BlockMass takeBackedgeMass(uint32_t Weight) { return takeMass(Weight); }
-
-private:
- BlockMass takeForwardMass(uint32_t Weight);
BlockMass takeMass(uint32_t Weight);
};
}
const BlockMass &Mass) {
Dist.normalize();
RemWeight = Dist.Total;
- RemForwardWeight = Dist.ForwardTotal;
RemMass = Mass;
- RemForwardMass = Dist.ForwardTotal ? Mass : BlockMass();
}
-BlockMass DitheringDistributer::takeForwardMass(uint32_t Weight) {
- // Compute the amount of mass to take.
- assert(Weight && "invalid weight");
- assert(Weight <= RemForwardWeight);
- BlockMass Mass = RemForwardMass * BranchProbability(Weight, RemForwardWeight);
-
- // Decrement totals (dither).
- RemForwardWeight -= Weight;
- RemForwardMass -= Mass;
- return Mass;
-}
BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
assert(Weight && "invalid weight");
assert(Weight <= RemWeight);
W.Amount = Amount;
W.Type = Type;
Weights.push_back(W);
-
- if (Type == Weight::Backedge)
- return;
-
- // Update forward total. Don't worry about overflow here, since then Total
- // will exceed 32-bits and they'll both be recomputed in normalize().
- ForwardTotal += Amount;
}
static void combineWeight(Weight &W, const Weight &OtherW) {
}
assert(W.Type == OtherW.Type);
assert(W.TargetNode == OtherW.TargetNode);
- assert(W.Amount < W.Amount + OtherW.Amount);
+ assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
W.Amount += OtherW.Amount;
}
static void combineWeightsBySorting(WeightList &Weights) {
// Early exit when combined into a single successor.
if (Weights.size() == 1) {
Total = 1;
- ForwardTotal = Weights.front().Type != Weight::Backedge;
Weights.front().Amount = 1;
return;
}
return;
// Recompute the total through accumulation (rather than shifting it) so that
- // it's accurate after shifting. ForwardTotal is dirty here anyway.
+ // it's accurate after shifting.
Total = 0;
- ForwardTotal = 0;
// Sum the weights to each node and shift right if necessary.
for (Weight &W : Weights) {
// Update the total.
Total += W.Amount;
- if (W.Type == Weight::Backedge)
- continue;
-
- // Update the forward total.
- ForwardTotal += W.Amount;
}
assert(Total <= UINT32_MAX);
}
void BlockFrequencyInfoImplBase::clear() {
- *this = BlockFrequencyInfoImplBase();
+ // Swap with a default-constructed std::vector, since std::vector<>::clear()
+ // does not actually clear heap storage.
+ std::vector<FrequencyData>().swap(Freqs);
+ std::vector<WorkingData>().swap(Working);
+ Loops.clear();
}
/// \brief Clear all memory not needed downstream.
BFI.Freqs = std::move(SavedFreqs);
}
-/// \brief Get a possibly packaged node.
-///
-/// Get the node currently representing Node, which could be a containing
-/// loop.
-///
-/// This function should only be called when distributing mass. As long as
-/// there are no irreducilbe edges to Node, then it will have complexity O(1)
-/// in this context.
-///
-/// In general, the complexity is O(L), where L is the number of loop headers
-/// Node has been packaged into. Since this method is called in the context
-/// of distributing mass, L will be the number of loop headers an early exit
-/// edge jumps out of.
-static BlockNode getPackagedNode(const BlockFrequencyInfoImplBase &BFI,
- const BlockNode &Node) {
- assert(Node.isValid());
- if (!BFI.Working[Node.Index].IsPackaged)
- return Node;
- if (!BFI.Working[Node.Index].ContainingLoop.isValid())
- return Node;
- return getPackagedNode(BFI, BFI.Working[Node.Index].ContainingLoop);
-}
-
/// \brief Get the appropriate mass for a possible pseudo-node loop package.
///
/// Get appropriate mass for Node. If Node is a loop-header (whose loop has
static BlockMass &getPackageMass(BlockFrequencyInfoImplBase &BFI,
const BlockNode &Node) {
assert(Node.isValid());
- assert(!BFI.Working[Node.Index].IsPackaged);
- if (!BFI.Working[Node.Index].IsAPackage)
+ assert(!BFI.Working[Node.Index].isPackaged());
+ if (!BFI.Working[Node.Index].isAPackage())
return BFI.Working[Node.Index].Mass;
return BFI.getLoopPackage(Node).Mass;
}
void BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
- const BlockNode &LoopHead,
+ const LoopData *OuterLoop,
const BlockNode &Pred,
const BlockNode &Succ,
uint64_t Weight) {
if (!Weight)
Weight = 1;
+ auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
+ return OuterLoop && OuterLoop->isHeader(Node);
+ };
+
#ifndef NDEBUG
auto debugSuccessor = [&](const char *Type, const BlockNode &Resolved) {
dbgs() << " =>"
<< " [" << Type << "] weight = " << Weight;
- if (Succ != LoopHead)
+ if (!isLoopHeader(Succ))
dbgs() << ", succ = " << getBlockName(Succ);
if (Resolved != Succ)
dbgs() << ", resolved = " << getBlockName(Resolved);
(void)debugSuccessor;
#endif
- if (Succ == LoopHead) {
+ if (isLoopHeader(Succ)) {
DEBUG(debugSuccessor("backedge", Succ));
- Dist.addBackedge(LoopHead, Weight);
+ Dist.addBackedge(OuterLoop->getHeader(), Weight);
return;
}
- BlockNode Resolved = getPackagedNode(*this, Succ);
- assert(Resolved != LoopHead);
+ BlockNode Resolved = getPackagedNode(Succ);
+ assert(!isLoopHeader(Resolved));
- if (Working[Resolved.Index].ContainingLoop != LoopHead) {
+ if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
DEBUG(debugSuccessor(" exit ", Resolved));
Dist.addExit(Resolved, Weight);
return;
}
- if (!LoopHead.isValid() && Resolved < Pred) {
+ if (Resolved < Pred) {
// Irreducible backedge. Skip this edge in the distribution.
DEBUG(debugSuccessor("skipped ", Resolved));
return;
}
void BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
- const BlockNode &LoopHead, const BlockNode &LocalLoopHead,
- Distribution &Dist) {
- PackagedLoopData &LoopPackage = getLoopPackage(LocalLoopHead);
- const PackagedLoopData::ExitMap &Exits = LoopPackage.Exits;
-
+ const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
// Copy the exit map into Dist.
- for (const auto &I : Exits)
- addToDist(Dist, LoopHead, LocalLoopHead, I.first, I.second.getMass());
+ for (const auto &I : Loop.Exits)
+ addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, I.second.getMass());
// We don't need this map any more. Clear it to prevent quadratic memory
// usage in deeply nested loops with irreducible control flow.
- LoopPackage.Exits.clear();
+ Loop.Exits.clear();
}
/// \brief Get the maximum allowed loop scale.
///
-/// Gives the maximum number of estimated iterations allowed for a loop.
-/// Downstream users have trouble with very large numbers (even within
-/// 64-bits). Perhaps they can be changed to use PositiveFloat.
-///
-/// TODO: change downstream users so that this can be increased or removed.
+/// Gives the maximum number of estimated iterations allowed for a loop. Very
+/// large numbers cause problems downstream (even within 64-bits).
static Float getMaxLoopScale() { return Float(1, 12); }
/// \brief Compute the loop scale for a loop.
-void BlockFrequencyInfoImplBase::computeLoopScale(const BlockNode &LoopHead) {
+void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
// Compute loop scale.
- DEBUG(dbgs() << "compute-loop-scale: " << getBlockName(LoopHead) << "\n");
+ DEBUG(dbgs() << "compute-loop-scale: " << getBlockName(Loop.getHeader())
+ << "\n");
// LoopScale == 1 / ExitMass
// ExitMass == HeadMass - BackedgeMass
- PackagedLoopData &LoopPackage = getLoopPackage(LoopHead);
- BlockMass ExitMass = BlockMass::getFull() - LoopPackage.BackedgeMass;
+ BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
// Block scale stores the inverse of the scale.
- LoopPackage.Scale = ExitMass.toFloat().inverse();
+ Loop.Scale = ExitMass.toFloat().inverse();
DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
- << " - " << LoopPackage.BackedgeMass << ")\n"
- << " - scale = " << LoopPackage.Scale << "\n");
+ << " - " << Loop.BackedgeMass << ")\n"
+ << " - scale = " << Loop.Scale << "\n");
- if (LoopPackage.Scale > getMaxLoopScale()) {
- LoopPackage.Scale = getMaxLoopScale();
+ if (Loop.Scale > getMaxLoopScale()) {
+ Loop.Scale = getMaxLoopScale();
DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
}
}
/// \brief Package up a loop.
-void BlockFrequencyInfoImplBase::packageLoop(const BlockNode &LoopHead) {
- DEBUG(dbgs() << "packaging-loop: " << getBlockName(LoopHead) << "\n");
- Working[LoopHead.Index].IsAPackage = true;
- for (const BlockNode &M : getLoopPackage(LoopHead).Members) {
- DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
- Working[M.Index].IsPackaged = true;
- }
+void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
+ DEBUG(dbgs() << "packaging-loop: " << getBlockName(Loop.getHeader()) << "\n");
+ Loop.IsPackaged = true;
+ DEBUG(for (const BlockNode &M
+ : Loop.members()) {
+ dbgs() << " - node: " << getBlockName(M.Index) << "\n";
+ });
}
void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
- const BlockNode &LoopHead,
+ LoopData *OuterLoop,
Distribution &Dist) {
BlockMass Mass = getPackageMass(*this, Source);
- DEBUG(dbgs() << " => mass: " << Mass
- << " ( general | forward )\n");
+ DEBUG(dbgs() << " => mass: " << Mass << "\n");
// Distribute mass to successors as laid out in Dist.
DitheringDistributer D(Dist, Mass);
#ifndef NDEBUG
auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
const char *Desc) {
- dbgs() << " => assign " << M << " (" << D.RemMass << "|"
- << D.RemForwardMass << ")";
+ dbgs() << " => assign " << M << " (" << D.RemMass << ")";
if (Desc)
dbgs() << " [" << Desc << "]";
if (T.isValid())
(void)debugAssign;
#endif
- PackagedLoopData *LoopPackage = 0;
- if (LoopHead.isValid())
- LoopPackage = &getLoopPackage(LoopHead);
for (const Weight &W : Dist.Weights) {
- // Check for a local edge (forward and non-exit).
+ // Check for a local edge (non-backedge and non-exit).
+ BlockMass Taken = D.takeMass(W.Amount);
if (W.Type == Weight::Local) {
- BlockMass Local = D.takeLocalMass(W.Amount);
- getPackageMass(*this, W.TargetNode) += Local;
- DEBUG(debugAssign(W.TargetNode, Local, nullptr));
+ getPackageMass(*this, W.TargetNode) += Taken;
+ DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
continue;
}
// Backedges and exits only make sense if we're processing a loop.
- assert(LoopPackage && "backedge or exit outside of loop");
+ assert(OuterLoop && "backedge or exit outside of loop");
// Check for a backedge.
if (W.Type == Weight::Backedge) {
- BlockMass Back = D.takeBackedgeMass(W.Amount);
- LoopPackage->BackedgeMass += Back;
- DEBUG(debugAssign(BlockNode(), Back, "back"));
+ OuterLoop->BackedgeMass += Taken;
+ DEBUG(debugAssign(BlockNode(), Taken, "back"));
continue;
}
// This must be an exit.
assert(W.Type == Weight::Exit);
- BlockMass Exit = D.takeExitMass(W.Amount);
- LoopPackage->Exits.push_back(std::make_pair(W.TargetNode, Exit));
- DEBUG(debugAssign(W.TargetNode, Exit, "exit"));
+ OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
+ DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
}
}
}
}
-static void scaleBlockData(BlockFrequencyInfoImplBase &BFI,
- const BlockNode &Node,
- const PackagedLoopData &Loop) {
- Float F = Loop.Mass.toFloat() * Loop.Scale;
-
- Float &Current = BFI.Freqs[Node.Index].Floating;
- Float Updated = Current * F;
-
- DEBUG(dbgs() << " - " << BFI.getBlockName(Node) << ": " << Current << " => "
- << Updated << "\n");
-
- Current = Updated;
-}
-
/// \brief Unwrap a loop package.
///
/// Visits all the members of a loop, adjusting their BlockData according to
/// the loop's pseudo-node.
-static void unwrapLoopPackage(BlockFrequencyInfoImplBase &BFI,
- const BlockNode &Head) {
- assert(Head.isValid());
-
- PackagedLoopData &LoopPackage = BFI.getLoopPackage(Head);
- DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Head)
- << ": mass = " << LoopPackage.Mass
- << ", scale = " << LoopPackage.Scale << "\n");
- scaleBlockData(BFI, Head, LoopPackage);
+static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
+ DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Loop.getHeader())
+ << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
+ << "\n");
+ Loop.Scale *= Loop.Mass.toFloat();
+ Loop.IsPackaged = false;
+ DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
// Propagate the head scale through the loop. Since members are visited in
// RPO, the head scale will be updated by the loop scale first, and then the
// final head scale will be used for updated the rest of the members.
- for (const BlockNode &M : LoopPackage.Members) {
- const FrequencyData &HeadData = BFI.Freqs[Head.Index];
- FrequencyData &Freqs = BFI.Freqs[M.Index];
- Float NewFreq = Freqs.Floating * HeadData.Floating;
- DEBUG(dbgs() << " - " << BFI.getBlockName(M) << ": " << Freqs.Floating
- << " => " << NewFreq << "\n");
- Freqs.Floating = NewFreq;
+ for (const BlockNode &N : Loop.Nodes) {
+ const auto &Working = BFI.Working[N.Index];
+ Float &F = Working.isAPackage() ? BFI.getLoopPackage(N).Scale
+ : BFI.Freqs[N.Index].Floating;
+ Float New = Loop.Scale * F;
+ DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
+ << "\n");
+ F = New;
}
}
-void BlockFrequencyInfoImplBase::finalizeMetrics() {
+void BlockFrequencyInfoImplBase::unwrapLoops() {
// Set initial frequencies from loop-local masses.
for (size_t Index = 0; Index < Working.size(); ++Index)
Freqs[Index].Floating = Working[Index].Mass.toFloat();
+ for (LoopData &Loop : Loops)
+ unwrapLoop(*this, Loop);
+}
+
+void BlockFrequencyInfoImplBase::finalizeMetrics() {
// Unwrap loop packages in reverse post-order, tracking min and max
// frequencies.
auto Min = Float::getLargest();
auto Max = Float::getZero();
for (size_t Index = 0; Index < Working.size(); ++Index) {
- if (Working[Index].isLoopHeader())
- unwrapLoopPackage(*this, BlockNode(Index));
-
- // Update max scale.
+ // Update min/max scale.
Min = std::min(Min, Freqs[Index].Floating);
Max = std::max(Max, Freqs[Index].Floating);
}