#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <vector>
+#include <list>
+
+#define DEBUG_TYPE "block-freq"
//===----------------------------------------------------------------------===//
//
-// PositiveFloat definition.
+// UnsignedFloat definition.
//
// TODO: Make this private to BlockFrequencyInfoImpl or delete.
//
//===----------------------------------------------------------------------===//
namespace llvm {
-class PositiveFloatBase {
+class UnsignedFloatBase {
public:
static const int32_t MaxExponent = 16383;
static const int32_t MinExponent = -16382;
}
};
-/// \brief Simple representation of a positive floating point.
+/// \brief Simple representation of an unsigned floating point.
///
-/// PositiveFloat is a positive floating point number. It uses simple
+/// UnsignedFloat is a unsigned floating point number. It uses simple
/// saturation arithmetic, and every operation is well-defined for every value.
///
/// The number is split into a signed exponent and unsigned digits. The number
/// form, so the same number can be represented by many bit representations
/// (it's always in "denormal" mode).
///
-/// PositiveFloat is templated on the underlying integer type for digits, which
+/// UnsignedFloat is templated on the underlying integer type for digits, which
/// is expected to be one of uint64_t, uint32_t, uint16_t or uint8_t.
///
-/// Unlike builtin floating point types, PositiveFloat is portable.
+/// Unlike builtin floating point types, UnsignedFloat is portable.
///
-/// Unlike APFloat, PositiveFloat does not model architecture floating point
+/// Unlike APFloat, UnsignedFloat does not model architecture floating point
/// behaviour (this should make it a little faster), and implements most
/// operators (this makes it usable).
///
-/// PositiveFloat is totally ordered. However, there is no canonical form, so
+/// UnsignedFloat is totally ordered. However, there is no canonical form, so
/// there are multiple representations of most scalars. E.g.:
///
-/// PositiveFloat(8u, 0) == PositiveFloat(4u, 1)
-/// PositiveFloat(4u, 1) == PositiveFloat(2u, 2)
-/// PositiveFloat(2u, 2) == PositiveFloat(1u, 3)
+/// UnsignedFloat(8u, 0) == UnsignedFloat(4u, 1)
+/// UnsignedFloat(4u, 1) == UnsignedFloat(2u, 2)
+/// UnsignedFloat(2u, 2) == UnsignedFloat(1u, 3)
///
-/// PositiveFloat implements most arithmetic operations. Precision is kept
+/// UnsignedFloat implements most arithmetic operations. Precision is kept
/// where possible. Uses simple saturation arithmetic, so that operations
/// saturate to 0.0 or getLargest() rather than under or overflowing. It has
/// some extra arithmetic for unit inversion. 0.0/0.0 is defined to be 0.0.
/// both implemented, and both interpret negative shifts as positive shifts in
/// the opposite direction.
///
-/// Future work might extract most of the implementation into a base class
-/// (e.g., \c Float) that has an \c IsSigned template parameter. The initial
-/// use case for this only needed positive semantics, but it wouldn't take much
-/// work to extend.
-///
/// Exponents are limited to the range accepted by x87 long double. This makes
/// it trivial to add functionality to convert to APFloat (this is already
/// relied on for the implementation of printing).
-template <class DigitsT> class PositiveFloat : PositiveFloatBase {
+///
+/// The current plan is to gut this and make the necessary parts of it (even
+/// more) private to BlockFrequencyInfo.
+template <class DigitsT> class UnsignedFloat : UnsignedFloatBase {
public:
static_assert(!std::numeric_limits<DigitsT>::is_signed,
"only unsigned floats supported");
int16_t Exponent;
public:
- PositiveFloat() : Digits(0), Exponent(0) {}
+ UnsignedFloat() : Digits(0), Exponent(0) {}
- PositiveFloat(DigitsType Digits, int16_t Exponent)
+ UnsignedFloat(DigitsType Digits, int16_t Exponent)
: Digits(Digits), Exponent(Exponent) {}
private:
- PositiveFloat(const std::pair<uint64_t, int16_t> &X)
+ UnsignedFloat(const std::pair<uint64_t, int16_t> &X)
: Digits(X.first), Exponent(X.second) {}
public:
- static PositiveFloat getZero() { return PositiveFloat(0, 0); }
- static PositiveFloat getOne() { return PositiveFloat(1, 0); }
- static PositiveFloat getLargest() {
- return PositiveFloat(DigitsLimits::max(), MaxExponent);
+ static UnsignedFloat getZero() { return UnsignedFloat(0, 0); }
+ static UnsignedFloat getOne() { return UnsignedFloat(1, 0); }
+ static UnsignedFloat getLargest() {
+ return UnsignedFloat(DigitsLimits::max(), MaxExponent);
}
- static PositiveFloat getFloat(uint64_t N) { return adjustToWidth(N, 0); }
- static PositiveFloat getInverseFloat(uint64_t N) {
+ static UnsignedFloat getFloat(uint64_t N) { return adjustToWidth(N, 0); }
+ static UnsignedFloat getInverseFloat(uint64_t N) {
return getFloat(N).invert();
}
- static PositiveFloat getFraction(DigitsType N, DigitsType D) {
+ static UnsignedFloat getFraction(DigitsType N, DigitsType D) {
return getQuotient(N, D);
}
/// Get the lg ceiling. lg 0 is defined to be INT32_MIN.
int32_t lgCeiling() const { return extractLgCeiling(lgImpl()); }
- bool operator==(const PositiveFloat &X) const { return compare(X) == 0; }
- bool operator<(const PositiveFloat &X) const { return compare(X) < 0; }
- bool operator!=(const PositiveFloat &X) const { return compare(X) != 0; }
- bool operator>(const PositiveFloat &X) const { return compare(X) > 0; }
- bool operator<=(const PositiveFloat &X) const { return compare(X) <= 0; }
- bool operator>=(const PositiveFloat &X) const { return compare(X) >= 0; }
+ bool operator==(const UnsignedFloat &X) const { return compare(X) == 0; }
+ bool operator<(const UnsignedFloat &X) const { return compare(X) < 0; }
+ bool operator!=(const UnsignedFloat &X) const { return compare(X) != 0; }
+ bool operator>(const UnsignedFloat &X) const { return compare(X) > 0; }
+ bool operator<=(const UnsignedFloat &X) const { return compare(X) <= 0; }
+ bool operator>=(const UnsignedFloat &X) const { return compare(X) >= 0; }
bool operator!() const { return isZero(); }
/// 65432198.7654... => 65432198.77
/// 5432198.7654... => 5432198.765
std::string toString(unsigned Precision = DefaultPrecision) {
- return PositiveFloatBase::toString(Digits, Exponent, Width, Precision);
+ return UnsignedFloatBase::toString(Digits, Exponent, Width, Precision);
}
/// \brief Print a decimal representation.
/// Print a string. See toString for documentation.
raw_ostream &print(raw_ostream &OS,
unsigned Precision = DefaultPrecision) const {
- return PositiveFloatBase::print(OS, Digits, Exponent, Width, Precision);
+ return UnsignedFloatBase::print(OS, Digits, Exponent, Width, Precision);
}
- void dump() const { return PositiveFloatBase::dump(Digits, Exponent, Width); }
+ void dump() const { return UnsignedFloatBase::dump(Digits, Exponent, Width); }
- PositiveFloat &operator+=(const PositiveFloat &X);
- PositiveFloat &operator-=(const PositiveFloat &X);
- PositiveFloat &operator*=(const PositiveFloat &X);
- PositiveFloat &operator/=(const PositiveFloat &X);
- PositiveFloat &operator<<=(int16_t Shift) { shiftLeft(Shift); return *this; }
- PositiveFloat &operator>>=(int16_t Shift) { shiftRight(Shift); return *this; }
+ UnsignedFloat &operator+=(const UnsignedFloat &X);
+ UnsignedFloat &operator-=(const UnsignedFloat &X);
+ UnsignedFloat &operator*=(const UnsignedFloat &X);
+ UnsignedFloat &operator/=(const UnsignedFloat &X);
+ UnsignedFloat &operator<<=(int16_t Shift) { shiftLeft(Shift); return *this; }
+ UnsignedFloat &operator>>=(int16_t Shift) { shiftRight(Shift); return *this; }
private:
void shiftLeft(int32_t Shift);
///
/// The value that compares smaller will lose precision, and possibly become
/// \a isZero().
- PositiveFloat matchExponents(PositiveFloat X);
+ UnsignedFloat matchExponents(UnsignedFloat X);
/// \brief Increase exponent to match another float.
///
/// Increases \c this to have an exponent matching \c X. May decrease the
/// exponent of \c X in the process, and \c this may possibly become \a
/// isZero().
- void increaseExponentToMatch(PositiveFloat &X, int32_t ExponentDiff);
+ void increaseExponentToMatch(UnsignedFloat &X, int32_t ExponentDiff);
public:
/// \brief Scale a large number accurately.
return joinSigned(scaleByInverse(Unsigned.first), Unsigned.second);
}
- int compare(const PositiveFloat &X) const;
+ int compare(const UnsignedFloat &X) const;
int compareTo(uint64_t N) const {
- PositiveFloat Float = getFloat(N);
+ UnsignedFloat Float = getFloat(N);
int Compare = compare(Float);
if (Width == 64 || Compare != 0)
return Compare;
}
int compareTo(int64_t N) const { return N < 0 ? 1 : compareTo(uint64_t(N)); }
- PositiveFloat &invert() { return *this = PositiveFloat::getFloat(1) / *this; }
- PositiveFloat inverse() const { return PositiveFloat(*this).invert(); }
+ UnsignedFloat &invert() { return *this = UnsignedFloat::getFloat(1) / *this; }
+ UnsignedFloat inverse() const { return UnsignedFloat(*this).invert(); }
private:
- static PositiveFloat getProduct(DigitsType L, DigitsType R);
- static PositiveFloat getQuotient(DigitsType Dividend, DigitsType Divisor);
+ static UnsignedFloat getProduct(DigitsType L, DigitsType R);
+ static UnsignedFloat getQuotient(DigitsType Dividend, DigitsType Divisor);
std::pair<int32_t, int> lgImpl() const;
static int countLeadingZerosWidth(DigitsType Digits) {
return countLeadingZeros32(Digits) + Width - 32;
}
- static PositiveFloat adjustToWidth(uint64_t N, int32_t S) {
+ static UnsignedFloat adjustToWidth(uint64_t N, int32_t S) {
assert(S >= MinExponent);
assert(S <= MaxExponent);
if (Width == 64 || N <= DigitsLimits::max())
- return PositiveFloat(N, S);
+ return UnsignedFloat(N, S);
// Shift right.
int Shift = 64 - Width - countLeadingZeros64(N);
// Round.
assert(S + Shift <= MaxExponent);
- return getRounded(PositiveFloat(Shifted, S + Shift),
+ return getRounded(UnsignedFloat(Shifted, S + Shift),
N & UINT64_C(1) << (Shift - 1));
}
- static PositiveFloat getRounded(PositiveFloat P, bool Round) {
+ static UnsignedFloat getRounded(UnsignedFloat P, bool Round) {
if (!Round)
return P;
if (P.Digits == DigitsLimits::max())
// Careful of overflow in the exponent.
- return PositiveFloat(1, P.Exponent) <<= Width;
- return PositiveFloat(P.Digits + 1, P.Exponent);
+ return UnsignedFloat(1, P.Exponent) <<= Width;
+ return UnsignedFloat(P.Digits + 1, P.Exponent);
}
};
-#define POSITIVE_FLOAT_BOP(op, base) \
+#define UNSIGNED_FLOAT_BOP(op, base) \
template <class DigitsT> \
- PositiveFloat<DigitsT> operator op(const PositiveFloat<DigitsT> &L, \
- const PositiveFloat<DigitsT> &R) { \
- return PositiveFloat<DigitsT>(L) base R; \
+ UnsignedFloat<DigitsT> operator op(const UnsignedFloat<DigitsT> &L, \
+ const UnsignedFloat<DigitsT> &R) { \
+ return UnsignedFloat<DigitsT>(L) base R; \
}
-POSITIVE_FLOAT_BOP(+, += )
-POSITIVE_FLOAT_BOP(-, -= )
-POSITIVE_FLOAT_BOP(*, *= )
-POSITIVE_FLOAT_BOP(/, /= )
-POSITIVE_FLOAT_BOP(<<, <<= )
-POSITIVE_FLOAT_BOP(>>, >>= )
-#undef POSITIVE_FLOAT_BOP
+UNSIGNED_FLOAT_BOP(+, += )
+UNSIGNED_FLOAT_BOP(-, -= )
+UNSIGNED_FLOAT_BOP(*, *= )
+UNSIGNED_FLOAT_BOP(/, /= )
+UNSIGNED_FLOAT_BOP(<<, <<= )
+UNSIGNED_FLOAT_BOP(>>, >>= )
+#undef UNSIGNED_FLOAT_BOP
template <class DigitsT>
-raw_ostream &operator<<(raw_ostream &OS, const PositiveFloat<DigitsT> &X) {
+raw_ostream &operator<<(raw_ostream &OS, const UnsignedFloat<DigitsT> &X) {
return X.print(OS, 10);
}
-#define POSITIVE_FLOAT_COMPARE_TO_TYPE(op, T1, T2) \
+#define UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, T1, T2) \
template <class DigitsT> \
- bool operator op(const PositiveFloat<DigitsT> &L, T1 R) { \
+ bool operator op(const UnsignedFloat<DigitsT> &L, T1 R) { \
return L.compareTo(T2(R)) op 0; \
} \
template <class DigitsT> \
- bool operator op(T1 L, const PositiveFloat<DigitsT> &R) { \
+ bool operator op(T1 L, const UnsignedFloat<DigitsT> &R) { \
return 0 op R.compareTo(T2(L)); \
}
-#define POSITIVE_FLOAT_COMPARE_TO(op) \
- POSITIVE_FLOAT_COMPARE_TO_TYPE(op, uint64_t, uint64_t) \
- POSITIVE_FLOAT_COMPARE_TO_TYPE(op, uint32_t, uint64_t) \
- POSITIVE_FLOAT_COMPARE_TO_TYPE(op, int64_t, int64_t) \
- POSITIVE_FLOAT_COMPARE_TO_TYPE(op, int32_t, int64_t)
-POSITIVE_FLOAT_COMPARE_TO(< )
-POSITIVE_FLOAT_COMPARE_TO(> )
-POSITIVE_FLOAT_COMPARE_TO(== )
-POSITIVE_FLOAT_COMPARE_TO(!= )
-POSITIVE_FLOAT_COMPARE_TO(<= )
-POSITIVE_FLOAT_COMPARE_TO(>= )
-#undef POSITIVE_FLOAT_COMPARE_TO
-#undef POSITIVE_FLOAT_COMPARE_TO_TYPE
+#define UNSIGNED_FLOAT_COMPARE_TO(op) \
+ UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, uint64_t, uint64_t) \
+ UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, uint32_t, uint64_t) \
+ UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, int64_t, int64_t) \
+ UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, int32_t, int64_t)
+UNSIGNED_FLOAT_COMPARE_TO(< )
+UNSIGNED_FLOAT_COMPARE_TO(> )
+UNSIGNED_FLOAT_COMPARE_TO(== )
+UNSIGNED_FLOAT_COMPARE_TO(!= )
+UNSIGNED_FLOAT_COMPARE_TO(<= )
+UNSIGNED_FLOAT_COMPARE_TO(>= )
+#undef UNSIGNED_FLOAT_COMPARE_TO
+#undef UNSIGNED_FLOAT_COMPARE_TO_TYPE
template <class DigitsT>
-uint64_t PositiveFloat<DigitsT>::scale(uint64_t N) const {
+uint64_t UnsignedFloat<DigitsT>::scale(uint64_t N) const {
if (Width == 64 || N <= DigitsLimits::max())
return (getFloat(N) * *this).template toInt<uint64_t>();
// Defer to the 64-bit version.
- return PositiveFloat<uint64_t>(Digits, Exponent).scale(N);
+ return UnsignedFloat<uint64_t>(Digits, Exponent).scale(N);
}
template <class DigitsT>
-PositiveFloat<DigitsT> PositiveFloat<DigitsT>::getProduct(DigitsType L,
+UnsignedFloat<DigitsT> UnsignedFloat<DigitsT>::getProduct(DigitsType L,
DigitsType R) {
// Check for zero.
if (!L || !R)
return adjustToWidth(uint64_t(L) * uint64_t(R), 0);
// Do the full thing.
- return PositiveFloat(multiply64(L, R));
+ return UnsignedFloat(multiply64(L, R));
}
template <class DigitsT>
-PositiveFloat<DigitsT> PositiveFloat<DigitsT>::getQuotient(DigitsType Dividend,
+UnsignedFloat<DigitsT> UnsignedFloat<DigitsT>::getQuotient(DigitsType Dividend,
DigitsType Divisor) {
// Check for zero.
if (!Dividend)
return getLargest();
if (Width == 64)
- return PositiveFloat(divide64(Dividend, Divisor));
+ return UnsignedFloat(divide64(Dividend, Divisor));
// We can compute this with 64-bit math.
int Shift = countLeadingZeros64(Dividend);
return adjustToWidth(Quotient, -Shift);
// Round based on the value of the next bit.
- return getRounded(PositiveFloat(Quotient, -Shift),
+ return getRounded(UnsignedFloat(Quotient, -Shift),
Shifted % Divisor >= getHalf(Divisor));
}
template <class DigitsT>
template <class IntT>
-IntT PositiveFloat<DigitsT>::toInt() const {
+IntT UnsignedFloat<DigitsT>::toInt() const {
typedef std::numeric_limits<IntT> Limits;
if (*this < 1)
return 0;
}
template <class DigitsT>
-std::pair<int32_t, int> PositiveFloat<DigitsT>::lgImpl() const {
+std::pair<int32_t, int> UnsignedFloat<DigitsT>::lgImpl() const {
if (isZero())
return std::make_pair(INT32_MIN, 0);
}
template <class DigitsT>
-PositiveFloat<DigitsT> PositiveFloat<DigitsT>::matchExponents(PositiveFloat X) {
+UnsignedFloat<DigitsT> UnsignedFloat<DigitsT>::matchExponents(UnsignedFloat X) {
if (isZero() || X.isZero() || Exponent == X.Exponent)
return X;
return X;
}
template <class DigitsT>
-void PositiveFloat<DigitsT>::increaseExponentToMatch(PositiveFloat &X,
+void UnsignedFloat<DigitsT>::increaseExponentToMatch(UnsignedFloat &X,
int32_t ExponentDiff) {
assert(ExponentDiff > 0);
if (ExponentDiff >= 2 * Width) {
}
template <class DigitsT>
-PositiveFloat<DigitsT> &PositiveFloat<DigitsT>::
-operator+=(const PositiveFloat &X) {
+UnsignedFloat<DigitsT> &UnsignedFloat<DigitsT>::
+operator+=(const UnsignedFloat &X) {
if (isLargest() || X.isZero())
return *this;
if (isZero() || X.isLargest())
return *this = X;
// Normalize exponents.
- PositiveFloat Scaled = matchExponents(X);
+ UnsignedFloat Scaled = matchExponents(X);
// Check for zero again.
if (isZero())
return *this;
}
template <class DigitsT>
-PositiveFloat<DigitsT> &PositiveFloat<DigitsT>::
-operator-=(const PositiveFloat &X) {
+UnsignedFloat<DigitsT> &UnsignedFloat<DigitsT>::
+operator-=(const UnsignedFloat &X) {
if (X.isZero())
return *this;
if (*this <= X)
return *this = getZero();
// Normalize exponents.
- PositiveFloat Scaled = matchExponents(X);
+ UnsignedFloat Scaled = matchExponents(X);
assert(Digits >= Scaled.Digits);
// Compute difference.
// Check if X just barely lost its last bit. E.g., for 32-bit:
//
// 1*2^32 - 1*2^0 == 0xffffffff != 1*2^32
- if (*this == PositiveFloat(1, X.lgFloor() + Width)) {
+ if (*this == UnsignedFloat(1, X.lgFloor() + Width)) {
Digits = DigitsType(0) - 1;
--Exponent;
}
return *this;
}
template <class DigitsT>
-PositiveFloat<DigitsT> &PositiveFloat<DigitsT>::
-operator*=(const PositiveFloat &X) {
+UnsignedFloat<DigitsT> &UnsignedFloat<DigitsT>::
+operator*=(const UnsignedFloat &X) {
if (isZero())
return *this;
if (X.isZero())
return *this <<= Exponents;
}
template <class DigitsT>
-PositiveFloat<DigitsT> &PositiveFloat<DigitsT>::
-operator/=(const PositiveFloat &X) {
+UnsignedFloat<DigitsT> &UnsignedFloat<DigitsT>::
+operator/=(const UnsignedFloat &X) {
if (isZero())
return *this;
if (X.isZero())
return *this <<= Exponents;
}
template <class DigitsT>
-void PositiveFloat<DigitsT>::shiftLeft(int32_t Shift) {
+void UnsignedFloat<DigitsT>::shiftLeft(int32_t Shift) {
if (!Shift || isZero())
return;
assert(Shift != INT32_MIN);
}
template <class DigitsT>
-void PositiveFloat<DigitsT>::shiftRight(int32_t Shift) {
+void UnsignedFloat<DigitsT>::shiftRight(int32_t Shift) {
if (!Shift || isZero())
return;
assert(Shift != INT32_MIN);
}
template <class DigitsT>
-int PositiveFloat<DigitsT>::compare(const PositiveFloat &X) const {
+int UnsignedFloat<DigitsT>::compare(const UnsignedFloat &X) const {
// Check for zero.
if (isZero())
return X.isZero() ? 0 : -1;
// Compare digits.
if (Exponent < X.Exponent)
- return PositiveFloatBase::compare(Digits, X.Digits, X.Exponent - Exponent);
+ return UnsignedFloatBase::compare(Digits, X.Digits, X.Exponent - Exponent);
- return -PositiveFloatBase::compare(X.Digits, Digits, Exponent - X.Exponent);
+ return -UnsignedFloatBase::compare(X.Digits, Digits, Exponent - X.Exponent);
}
-template <class T> struct isPodLike<PositiveFloat<T>> {
+template <class T> struct isPodLike<UnsignedFloat<T>> {
static const bool value = true;
};
}
///
/// Convert to a float. \a isFull() gives 1.0, while \a isEmpty() gives
/// slightly above 0.0.
- PositiveFloat<uint64_t> toFloat() const;
+ UnsignedFloat<uint64_t> toFloat() const;
void dump() const;
raw_ostream &print(raw_ostream &OS) const;
/// BlockFrequencyInfoImpl. See there for details.
class BlockFrequencyInfoImplBase {
public:
- typedef PositiveFloat<uint64_t> Float;
+ typedef UnsignedFloat<uint64_t> Float;
/// \brief Representative of a block.
///
uint64_t Integer;
};
+ /// \brief Data about a loop.
+ ///
+ /// Contains the data necessary to represent represent a loop as a
+ /// pseudo-node once it's packaged.
+ struct LoopData {
+ typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap;
+ typedef SmallVector<BlockNode, 4> NodeList;
+ LoopData *Parent; ///< The parent loop.
+ bool IsPackaged; ///< Whether this has been packaged.
+ ExitMap Exits; ///< Successor edges (and weights).
+ NodeList Nodes; ///< Header and the members of the loop.
+ BlockMass BackedgeMass; ///< Mass returned to loop header.
+ BlockMass Mass;
+ Float Scale;
+
+ LoopData(LoopData *Parent, const BlockNode &Header)
+ : Parent(Parent), IsPackaged(false), Nodes(1, Header) {}
+ bool isHeader(const BlockNode &Node) const { return Node == Nodes[0]; }
+ BlockNode getHeader() const { return Nodes[0]; }
+
+ NodeList::const_iterator members_begin() const { return Nodes.begin() + 1; }
+ NodeList::const_iterator members_end() const { return Nodes.end(); }
+ iterator_range<NodeList::const_iterator> members() const {
+ return make_range(members_begin(), members_end());
+ }
+ };
+
/// \brief Index of loop information.
struct WorkingData {
- BlockNode ContainingLoop; ///< The block whose loop this block is inside.
- uint32_t LoopIndex; ///< Index into PackagedLoops.
- bool IsPackaged; ///< Has ContainingLoop been packaged up?
- bool IsAPackage; ///< Has this block's loop been packaged up?
- BlockMass Mass; ///< Mass distribution from the entry block.
+ BlockNode Node; ///< This node.
+ LoopData *Loop; ///< The loop this block is inside.
+ BlockMass Mass; ///< Mass distribution from the entry block.
+
+ WorkingData(const BlockNode &Node) : Node(Node), Loop(nullptr) {}
- WorkingData()
- : LoopIndex(UINT32_MAX), IsPackaged(false), IsAPackage(false) {}
+ bool isLoopHeader() const { return Loop && Loop->isHeader(Node); }
- bool hasLoopHeader() const { return ContainingLoop.isValid(); }
- bool isLoopHeader() const { return LoopIndex != UINT32_MAX; }
+ LoopData *getContainingLoop() const {
+ return isLoopHeader() ? Loop->Parent : Loop;
+ }
+
+ /// \brief Resolve a node to its representative.
+ ///
+ /// Get the node currently representing Node, which could be a containing
+ /// loop.
+ ///
+ /// This function should only be called when distributing mass. As long as
+ /// there are no irreducilbe edges to Node, then it will have complexity
+ /// O(1) in this context.
+ ///
+ /// In general, the complexity is O(L), where L is the number of loop
+ /// headers Node has been packaged into. Since this method is called in
+ /// the context of distributing mass, L will be the number of loop headers
+ /// an early exit edge jumps out of.
+ BlockNode getResolvedNode() const {
+ auto L = getPackagedLoop();
+ return L ? L->getHeader() : Node;
+ }
+ LoopData *getPackagedLoop() const {
+ if (!Loop || !Loop->IsPackaged)
+ return nullptr;
+ auto L = Loop;
+ while (L->Parent && L->Parent->IsPackaged)
+ L = L->Parent;
+ return L;
+ }
+
+ /// \brief Get the appropriate mass for a node.
+ ///
+ /// Get appropriate mass for Node. If Node is a loop-header (whose loop
+ /// has been packaged), returns the mass of its pseudo-node. If it's a
+ /// node inside a packaged loop, it returns the loop's mass.
+ BlockMass &getMass() { return isAPackage() ? Loop->Mass : Mass; }
+
+ /// \brief Has ContainingLoop been packaged up?
+ bool isPackaged() const { return getResolvedNode() != Node; }
+ /// \brief Has Loop been packaged up?
+ bool isAPackage() const { return isLoopHeader() && Loop->IsPackaged; }
};
/// \brief Unscaled probability weight.
/// This class collates the successor edge weights for later processing.
///
/// \a DidOverflow indicates whether \a Total did overflow while adding to
- /// the distribution. It should never overflow twice. There's no flag for
- /// whether \a ForwardTotal overflows, since when \a Total exceeds 32-bits
- /// they both get re-computed during \a normalize().
+ /// the distribution. It should never overflow twice.
struct Distribution {
typedef SmallVector<Weight, 4> WeightList;
WeightList Weights; ///< Individual successor weights.
uint64_t Total; ///< Sum of all weights.
bool DidOverflow; ///< Whether \a Total did overflow.
- uint32_t ForwardTotal; ///< Total excluding backedges.
- Distribution() : Total(0), DidOverflow(false), ForwardTotal(0) {}
+ Distribution() : Total(0), DidOverflow(false) {}
void addLocal(const BlockNode &Node, uint64_t Amount) {
add(Node, Amount, Weight::Local);
}
void add(const BlockNode &Node, uint64_t Amount, Weight::DistType Type);
};
- /// \brief Data for a packaged loop.
- ///
- /// Contains the data necessary to represent represent a loop as a node once
- /// it's packaged.
- ///
- /// PackagedLoopData inherits from BlockData to give the node the necessary
- /// stats. Further, it has a list of successors, list of members, and stores
- /// the backedge mass assigned to this loop.
- struct PackagedLoopData {
- typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap;
- typedef SmallVector<BlockNode, 4> MemberList;
- BlockNode Header; ///< Header.
- ExitMap Exits; ///< Successor edges (and weights).
- MemberList Members; ///< Members of the loop.
- BlockMass BackedgeMass; ///< Mass returned to loop header.
- BlockMass Mass;
- Float Scale;
-
- PackagedLoopData(const BlockNode &Header) : Header(Header) {}
- };
-
/// \brief Data about each block. This is used downstream.
std::vector<FrequencyData> Freqs;
/// \brief Loop data: see initializeLoops().
std::vector<WorkingData> Working;
- /// \brief Indexed information about packaged loops.
- std::vector<PackagedLoopData> PackagedLoops;
-
- /// \brief Create the initial loop packages.
- ///
- /// Initializes PackagedLoops using the data in Working about backedges
- /// and containing loops. Called by initializeLoops().
- ///
- /// \post WorkingData::LoopIndex has been initialized for every loop header
- /// and PackagedLoopData::Members has been initialized.
+ /// \brief Indexed information about loops.
+ std::list<LoopData> Loops;
/// \brief Add all edges out of a packaged loop to the distribution.
///
/// Adds all edges from LocalLoopHead to Dist. Calls addToDist() to add each
/// successor edge.
- void addLoopSuccessorsToDist(const BlockNode &LoopHead,
- const BlockNode &LocalLoopHead,
+ void addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop,
Distribution &Dist);
/// \brief Add an edge to the distribution.
///
/// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the
- /// edge is forward/exit/backedge is in the context of LoopHead. Otherwise,
- /// every edge should be a forward edge (since all the loops are packaged
- /// up).
- void addToDist(Distribution &Dist, const BlockNode &LoopHead,
+ /// edge is local/exit/backedge is in the context of LoopHead. Otherwise,
+ /// every edge should be a local edge (since all the loops are packaged up).
+ void addToDist(Distribution &Dist, const LoopData *OuterLoop,
const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight);
- PackagedLoopData &getLoopPackage(const BlockNode &Head) {
+ LoopData &getLoopPackage(const BlockNode &Head) {
assert(Head.Index < Working.size());
- size_t Index = Working[Head.Index].LoopIndex;
- assert(Index < PackagedLoops.size());
- return PackagedLoops[Index];
+ assert(Working[Head.Index].isLoopHeader());
+ return *Working[Head.Index].Loop;
}
/// \brief Distribute mass according to a distribution.
///
/// Distributes the mass in Source according to Dist. If LoopHead.isValid(),
- /// backedges and exits are stored in its entry in PackagedLoops.
+ /// backedges and exits are stored in its entry in Loops.
///
/// Mass is distributed in parallel from two copies of the source mass.
- ///
- /// The first mass (forward) represents the distribution of mass through the
- /// local DAG. This distribution should lose mass at loop exits and ignore
- /// backedges.
- ///
- /// The second mass (general) represents the behavior of the loop in the
- /// global context. In a given distribution from the head, how much mass
- /// exits, and to where? How much mass returns to the loop head?
- ///
- /// The forward mass should be split up between local successors and exits,
- /// but only actually distributed to the local successors. The general mass
- /// should be split up between all three types of successors, but distributed
- /// only to exits and backedges.
- void distributeMass(const BlockNode &Source, const BlockNode &LoopHead,
+ void distributeMass(const BlockNode &Source, LoopData *OuterLoop,
Distribution &Dist);
/// \brief Compute the loop scale for a loop.
- void computeLoopScale(const BlockNode &LoopHead);
+ void computeLoopScale(LoopData &Loop);
/// \brief Package up a loop.
- void packageLoop(const BlockNode &LoopHead);
+ void packageLoop(LoopData &Loop);
+
+ /// \brief Unwrap loops.
+ void unwrapLoops();
/// \brief Finalize frequency metrics.
///
- /// Unwraps loop packages, calculates final frequencies, and cleans up
- /// no-longer-needed data structures.
+ /// Calculates final frequencies and cleans up no-longer-needed data
+ /// structures.
void finalizeMetrics();
/// \brief Clear all memory.
/// MachineBlockFrequencyInfo, and calculates the relative frequencies of
/// blocks.
///
-/// This algorithm leverages BlockMass and PositiveFloat to maintain precision,
+/// This algorithm leverages BlockMass and UnsignedFloat to maintain precision,
/// separates mass distribution from loop scaling, and dithers to eliminate
/// probability mass loss.
///
///
/// - Fetch and categorize the weight distribution for its successors.
/// If this is a packaged-subloop, the weight distribution is stored
-/// in \a PackagedLoopData::Exits. Otherwise, fetch it from
+/// in \a LoopData::Exits. Otherwise, fetch it from
/// BranchProbabilityInfo.
///
-/// - Each successor is categorized as \a Weight::Local, a normal
-/// forward edge within the current loop, \a Weight::Backedge, a
-/// backedge to the loop header, or \a Weight::Exit, any successor
-/// outside the loop. The weight, the successor, and its category
-/// are stored in \a Distribution. There can be multiple edges to
-/// each successor.
+/// - Each successor is categorized as \a Weight::Local, a local edge
+/// within the current loop, \a Weight::Backedge, a backedge to the
+/// loop header, or \a Weight::Exit, any successor outside the loop.
+/// The weight, the successor, and its category are stored in \a
+/// Distribution. There can be multiple edges to each successor.
///
/// - Normalize the distribution: scale weights down so that their sum
/// is 32-bits, and coalesce multiple edges to the same node.
///
/// - Distribute the mass accordingly, dithering to minimize mass loss,
-/// as described in \a distributeMass(). Mass is distributed in
-/// parallel in two ways: forward, and general. Local successors
-/// take their mass from the forward mass, while exit and backedge
-/// successors take their mass from the general mass. Additionally,
-/// exit edges use up (ignored) mass from the forward mass, and local
-/// edges use up (ignored) mass from the general distribution.
+/// as described in \a distributeMass().
///
/// Finally, calculate the loop scale from the accumulated backedge mass.
///
return RPOT[Node.Index];
}
+ /// \brief Run (and save) a post-order traversal.
+ ///
+ /// Saves a reverse post-order traversal of all the nodes in \a F.
void initializeRPOT();
+
+ /// \brief Initialize loop data.
+ ///
+ /// Build up \a Loops using \a LoopInfo. \a LoopInfo gives us a mapping from
+ /// each block to the deepest loop it's in, but we need the inverse. For each
+ /// loop, we store in reverse post-order its "immediate" members, defined as
+ /// the header, the headers of immediate sub-loops, and all other blocks in
+ /// the loop that are not in sub-loops.
void initializeLoops();
- void runOnFunction(const FunctionT *F);
- void propagateMassToSuccessors(const BlockNode &LoopHead,
- const BlockNode &Node);
+ /// \brief Propagate to a block's successors.
+ ///
+ /// In the context of distributing mass through \c OuterLoop, divide the mass
+ /// currently assigned to \c Node between its successors.
+ void propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node);
+
+ /// \brief Compute mass in a particular loop.
+ ///
+ /// Assign mass to \c Loop's header, and then for each block in \c Loop in
+ /// reverse post-order, distribute mass to its successors. Only visits nodes
+ /// that have not been packaged into sub-loops.
+ ///
+ /// \pre \a computeMassInLoop() has been called for each subloop of \c Loop.
+ void computeMassInLoop(LoopData &Loop);
+
+ /// \brief Compute mass in all loops.
+ ///
+ /// For each loop bottom-up, call \a computeMassInLoop().
void computeMassInLoops();
- void computeMassInLoop(const BlockNode &LoopHead);
+
+ /// \brief Compute mass in the top-level function.
+ ///
+ /// Assign mass to the entry block, and then for each block in reverse
+ /// post-order, distribute mass to its successors. Skips nodes that have
+ /// been packaged into loops.
+ ///
+ /// \pre \a computeMassInLoops() has been called.
void computeMassInFunction();
std::string getBlockName(const BlockNode &Node) const override {
void doFunction(const FunctionT *F, const BranchProbabilityInfoT *BPI,
const LoopInfoT *LI);
- BlockFrequencyInfoImpl() : BPI(0), LI(0), F(0) {}
+ BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {}
using BlockFrequencyInfoImplBase::getEntryFreq;
BlockFrequency getBlockFreq(const BlockT *BB) const {
// the full function.
computeMassInLoops();
computeMassInFunction();
+ unwrapLoops();
finalizeMetrics();
}
Nodes[*I] = Node;
}
- Working.resize(RPOT.size());
+ Working.reserve(RPOT.size());
+ for (size_t Index = 0; Index < RPOT.size(); ++Index)
+ Working.emplace_back(Index);
Freqs.resize(RPOT.size());
}
return;
// Visit loops top down and assign them an index.
- std::deque<const LoopT *> Q;
- Q.insert(Q.end(), LI->begin(), LI->end());
+ std::deque<std::pair<const LoopT *, LoopData *>> Q;
+ for (const LoopT *L : *LI)
+ Q.emplace_back(L, nullptr);
while (!Q.empty()) {
- const LoopT *Loop = Q.front();
+ const LoopT *Loop = Q.front().first;
+ LoopData *Parent = Q.front().second;
Q.pop_front();
- Q.insert(Q.end(), Loop->begin(), Loop->end());
- // Save the order this loop was visited.
BlockNode Header = getNode(Loop->getHeader());
assert(Header.isValid());
- Working[Header.Index].LoopIndex = PackagedLoops.size();
- PackagedLoops.emplace_back(Header);
+ Loops.emplace_back(Parent, Header);
+ Working[Header.Index].Loop = &Loops.back();
DEBUG(dbgs() << " - loop = " << getBlockName(Header) << "\n");
+
+ for (const LoopT *L : *Loop)
+ Q.emplace_back(L, &Loops.back());
}
// Visit nodes in reverse post-order and add them to their deepest containing
// loop.
for (size_t Index = 0; Index < RPOT.size(); ++Index) {
+ // Loop headers have already been mostly mapped.
+ if (Working[Index].isLoopHeader()) {
+ LoopData *ContainingLoop = Working[Index].getContainingLoop();
+ if (ContainingLoop)
+ ContainingLoop->Nodes.push_back(Index);
+ continue;
+ }
+
const LoopT *Loop = LI->getLoopFor(RPOT[Index]);
if (!Loop)
continue;
- // If this is a loop header, find its parent loop (if any).
- if (Working[Index].isLoopHeader())
- if (!(Loop = Loop->getParentLoop()))
- continue;
-
// Add this node to its containing loop's member list.
BlockNode Header = getNode(Loop->getHeader());
assert(Header.isValid());
const auto &HeaderData = Working[Header.Index];
assert(HeaderData.isLoopHeader());
- Working[Index].ContainingLoop = Header;
- PackagedLoops[HeaderData.LoopIndex].Members.push_back(Index);
+ Working[Index].Loop = HeaderData.Loop;
+ HeaderData.Loop->Nodes.push_back(Index);
DEBUG(dbgs() << " - loop = " << getBlockName(Header)
<< ": member = " << getBlockName(Index) << "\n");
}
template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInLoops() {
// Visit loops with the deepest first, and the top-level loops last.
- for (auto L = PackagedLoops.rbegin(), LE = PackagedLoops.rend(); L != LE; ++L)
- computeMassInLoop(L->Header);
+ for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L)
+ computeMassInLoop(*L);
}
template <class BT>
-void BlockFrequencyInfoImpl<BT>::computeMassInLoop(const BlockNode &LoopHead) {
+void BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
// Compute mass in loop.
- DEBUG(dbgs() << "compute-mass-in-loop: " << getBlockName(LoopHead) << "\n");
+ DEBUG(dbgs() << "compute-mass-in-loop: " << getBlockName(Loop.getHeader())
+ << "\n");
- Working[LoopHead.Index].Mass = BlockMass::getFull();
- propagateMassToSuccessors(LoopHead, LoopHead);
+ Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
+ propagateMassToSuccessors(&Loop, Loop.getHeader());
- for (const BlockNode &M : getLoopPackage(LoopHead).Members)
- propagateMassToSuccessors(LoopHead, M);
+ for (const BlockNode &M : Loop.members())
+ propagateMassToSuccessors(&Loop, M);
- computeLoopScale(LoopHead);
- packageLoop(LoopHead);
+ computeLoopScale(Loop);
+ packageLoop(Loop);
}
template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() {
assert(!Working.empty() && "no blocks in function");
assert(!Working[0].isLoopHeader() && "entry block is a loop header");
- Working[0].Mass = BlockMass::getFull();
+ Working[0].getMass() = BlockMass::getFull();
for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE; ++I) {
// Check for nodes that have been packaged.
BlockNode Node = getNode(I);
- if (Working[Node.Index].hasLoopHeader())
+ if (Working[Node.Index].isPackaged())
continue;
- propagateMassToSuccessors(BlockNode(), Node);
+ propagateMassToSuccessors(nullptr, Node);
}
}
template <class BT>
void
-BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(const BlockNode &LoopHead,
+BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop,
const BlockNode &Node) {
DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n");
// Calculate probability for successors.
Distribution Dist;
- if (Node != LoopHead && Working[Node.Index].isLoopHeader())
- addLoopSuccessorsToDist(LoopHead, Node, Dist);
- else {
+ if (auto *Loop = Working[Node.Index].getPackagedLoop()) {
+ assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop");
+ addLoopSuccessorsToDist(OuterLoop, *Loop, Dist);
+ } else {
const BlockT *BB = getBlock(Node);
for (auto SI = Successor::child_begin(BB), SE = Successor::child_end(BB);
SI != SE; ++SI)
// Do not dereference SI, or getEdgeWeight() is linear in the number of
// successors.
- addToDist(Dist, LoopHead, Node, getNode(*SI), BPI->getEdgeWeight(BB, SI));
+ addToDist(Dist, OuterLoop, Node, getNode(*SI),
+ BPI->getEdgeWeight(BB, SI));
}
// Distribute mass to successors, saving exit and backedge data in the
// loop header.
- distributeMass(Node, LoopHead, Dist);
+ distributeMass(Node, OuterLoop, Dist);
}
template <class BT>
}
}
+#undef DEBUG_TYPE
+
#endif