X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=include%2Fllvm%2FAnalysis%2FBlockFrequencyInfoImpl.h;h=d7379b8fbeaa8cf14e55bec82593a01086af3efe;hp=264aff372581768e5321091ed3ccf2853b1ff1e0;hb=f066140cf7af087a198ed5825245fc1c3c06e923;hpb=5cf39383da201784a40eb47cfd289cbe98972145 diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 264aff37258..d7379b8fbea 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -31,679 +31,25 @@ #define DEBUG_TYPE "block-freq" -//===----------------------------------------------------------------------===// -// -// UnsignedFloat definition. -// -// TODO: Make this private to BlockFrequencyInfoImpl or delete. -// -//===----------------------------------------------------------------------===// namespace llvm { -class UnsignedFloatBase { -public: - static const int32_t MaxExponent = 16383; - static const int32_t MinExponent = -16382; - static const int DefaultPrecision = 10; - - static void dump(uint64_t D, int16_t E, int Width); - static raw_ostream &print(raw_ostream &OS, uint64_t D, int16_t E, int Width, - unsigned Precision); - static std::string toString(uint64_t D, int16_t E, int Width, - unsigned Precision); - static int countLeadingZeros32(uint32_t N) { return countLeadingZeros(N); } - static int countLeadingZeros64(uint64_t N) { return countLeadingZeros(N); } - static uint64_t getHalf(uint64_t N) { return (N >> 1) + (N & 1); } - - static std::pair splitSigned(int64_t N) { - if (N >= 0) - return std::make_pair(N, false); - uint64_t Unsigned = N == INT64_MIN ? UINT64_C(1) << 63 : uint64_t(-N); - return std::make_pair(Unsigned, true); - } - static int64_t joinSigned(uint64_t U, bool IsNeg) { - if (U > uint64_t(INT64_MAX)) - return IsNeg ? INT64_MIN : INT64_MAX; - return IsNeg ? -int64_t(U) : int64_t(U); - } - - static int32_t extractLg(const std::pair &Lg) { - return Lg.first; - } - static int32_t extractLgFloor(const std::pair &Lg) { - return Lg.first - (Lg.second > 0); - } - static int32_t extractLgCeiling(const std::pair &Lg) { - return Lg.first + (Lg.second < 0); - } - - static std::pair divide64(uint64_t L, uint64_t R); - static std::pair multiply64(uint64_t L, uint64_t R); - - static int compare(uint64_t L, uint64_t R, int Shift) { - assert(Shift >= 0); - assert(Shift < 64); - - uint64_t L_adjusted = L >> Shift; - if (L_adjusted < R) - return -1; - if (L_adjusted > R) - return 1; - - return L > L_adjusted << Shift ? 1 : 0; - } -}; - -/// \brief Simple representation of an unsigned floating point. -/// -/// UnsignedFloat is a unsigned floating point number. It uses simple -/// saturation arithmetic, and every operation is well-defined for every value. -/// -/// The number is split into a signed exponent and unsigned digits. The number -/// represented is \c getDigits()*2^getExponent(). In this way, the digits are -/// much like the mantissa in the x87 long double, but there is no canonical -/// form, so the same number can be represented by many bit representations -/// (it's always in "denormal" mode). -/// -/// UnsignedFloat is templated on the underlying integer type for digits, which -/// is expected to be one of uint64_t, uint32_t, uint16_t or uint8_t. -/// -/// Unlike builtin floating point types, UnsignedFloat is portable. -/// -/// Unlike APFloat, UnsignedFloat does not model architecture floating point -/// behaviour (this should make it a little faster), and implements most -/// operators (this makes it usable). -/// -/// UnsignedFloat is totally ordered. However, there is no canonical form, so -/// there are multiple representations of most scalars. E.g.: -/// -/// UnsignedFloat(8u, 0) == UnsignedFloat(4u, 1) -/// UnsignedFloat(4u, 1) == UnsignedFloat(2u, 2) -/// UnsignedFloat(2u, 2) == UnsignedFloat(1u, 3) -/// -/// UnsignedFloat implements most arithmetic operations. Precision is kept -/// where possible. Uses simple saturation arithmetic, so that operations -/// saturate to 0.0 or getLargest() rather than under or overflowing. It has -/// some extra arithmetic for unit inversion. 0.0/0.0 is defined to be 0.0. -/// Any other division by 0.0 is defined to be getLargest(). -/// -/// As a convenience for modifying the exponent, left and right shifting are -/// both implemented, and both interpret negative shifts as positive shifts in -/// the opposite direction. -/// -/// Exponents are limited to the range accepted by x87 long double. This makes -/// it trivial to add functionality to convert to APFloat (this is already -/// relied on for the implementation of printing). -/// -/// The current plan is to gut this and make the necessary parts of it (even -/// more) private to BlockFrequencyInfo. -template class UnsignedFloat : UnsignedFloatBase { -public: - static_assert(!std::numeric_limits::is_signed, - "only unsigned floats supported"); - - typedef DigitsT DigitsType; - -private: - typedef std::numeric_limits DigitsLimits; - - static const int Width = sizeof(DigitsType) * 8; - static_assert(Width <= 64, "invalid integer width for digits"); - -private: - DigitsType Digits; - int16_t Exponent; - -public: - UnsignedFloat() : Digits(0), Exponent(0) {} - - UnsignedFloat(DigitsType Digits, int16_t Exponent) - : Digits(Digits), Exponent(Exponent) {} - -private: - UnsignedFloat(const std::pair &X) - : Digits(X.first), Exponent(X.second) {} - -public: - static UnsignedFloat getZero() { return UnsignedFloat(0, 0); } - static UnsignedFloat getOne() { return UnsignedFloat(1, 0); } - static UnsignedFloat getLargest() { - return UnsignedFloat(DigitsLimits::max(), MaxExponent); - } - static UnsignedFloat getFloat(uint64_t N) { return adjustToWidth(N, 0); } - static UnsignedFloat getInverseFloat(uint64_t N) { - return getFloat(N).invert(); - } - static UnsignedFloat getFraction(DigitsType N, DigitsType D) { - return getQuotient(N, D); - } - - int16_t getExponent() const { return Exponent; } - DigitsType getDigits() const { return Digits; } - - /// \brief Convert to the given integer type. - /// - /// Convert to \c IntT using simple saturating arithmetic, truncating if - /// necessary. - template IntT toInt() const; - - bool isZero() const { return !Digits; } - bool isLargest() const { return *this == getLargest(); } - bool isOne() const { - if (Exponent > 0 || Exponent <= -Width) - return false; - return Digits == DigitsType(1) << -Exponent; - } - - /// \brief The log base 2, rounded. - /// - /// Get the lg of the scalar. lg 0 is defined to be INT32_MIN. - int32_t lg() const { return extractLg(lgImpl()); } - - /// \brief The log base 2, rounded towards INT32_MIN. - /// - /// Get the lg floor. lg 0 is defined to be INT32_MIN. - int32_t lgFloor() const { return extractLgFloor(lgImpl()); } - - /// \brief The log base 2, rounded towards INT32_MAX. - /// - /// Get the lg ceiling. lg 0 is defined to be INT32_MIN. - int32_t lgCeiling() const { return extractLgCeiling(lgImpl()); } - - bool operator==(const UnsignedFloat &X) const { return compare(X) == 0; } - bool operator<(const UnsignedFloat &X) const { return compare(X) < 0; } - bool operator!=(const UnsignedFloat &X) const { return compare(X) != 0; } - bool operator>(const UnsignedFloat &X) const { return compare(X) > 0; } - bool operator<=(const UnsignedFloat &X) const { return compare(X) <= 0; } - bool operator>=(const UnsignedFloat &X) const { return compare(X) >= 0; } - - bool operator!() const { return isZero(); } - - /// \brief Convert to a decimal representation in a string. - /// - /// Convert to a string. Uses scientific notation for very large/small - /// numbers. Scientific notation is used roughly for numbers outside of the - /// range 2^-64 through 2^64. - /// - /// \c Precision indicates the number of decimal digits of precision to use; - /// 0 requests the maximum available. - /// - /// As a special case to make debugging easier, if the number is small enough - /// to convert without scientific notation and has more than \c Precision - /// digits before the decimal place, it's printed accurately to the first - /// digit past zero. E.g., assuming 10 digits of precision: - /// - /// 98765432198.7654... => 98765432198.8 - /// 8765432198.7654... => 8765432198.8 - /// 765432198.7654... => 765432198.8 - /// 65432198.7654... => 65432198.77 - /// 5432198.7654... => 5432198.765 - std::string toString(unsigned Precision = DefaultPrecision) { - return UnsignedFloatBase::toString(Digits, Exponent, Width, Precision); - } - - /// \brief Print a decimal representation. - /// - /// Print a string. See toString for documentation. - raw_ostream &print(raw_ostream &OS, - unsigned Precision = DefaultPrecision) const { - return UnsignedFloatBase::print(OS, Digits, Exponent, Width, Precision); - } - void dump() const { return UnsignedFloatBase::dump(Digits, Exponent, Width); } - - UnsignedFloat &operator+=(const UnsignedFloat &X); - UnsignedFloat &operator-=(const UnsignedFloat &X); - UnsignedFloat &operator*=(const UnsignedFloat &X); - UnsignedFloat &operator/=(const UnsignedFloat &X); - UnsignedFloat &operator<<=(int16_t Shift) { shiftLeft(Shift); return *this; } - UnsignedFloat &operator>>=(int16_t Shift) { shiftRight(Shift); return *this; } - -private: - void shiftLeft(int32_t Shift); - void shiftRight(int32_t Shift); - - /// \brief Adjust two floats to have matching exponents. - /// - /// Adjust \c this and \c X to have matching exponents. Returns the new \c X - /// by value. Does nothing if \a isZero() for either. - /// - /// The value that compares smaller will lose precision, and possibly become - /// \a isZero(). - UnsignedFloat matchExponents(UnsignedFloat X); - - /// \brief Increase exponent to match another float. - /// - /// Increases \c this to have an exponent matching \c X. May decrease the - /// exponent of \c X in the process, and \c this may possibly become \a - /// isZero(). - void increaseExponentToMatch(UnsignedFloat &X, int32_t ExponentDiff); - -public: - /// \brief Scale a large number accurately. - /// - /// Scale N (multiply it by this). Uses full precision multiplication, even - /// if Width is smaller than 64, so information is not lost. - uint64_t scale(uint64_t N) const; - uint64_t scaleByInverse(uint64_t N) const { - // TODO: implement directly, rather than relying on inverse. Inverse is - // expensive. - return inverse().scale(N); - } - int64_t scale(int64_t N) const { - std::pair Unsigned = splitSigned(N); - return joinSigned(scale(Unsigned.first), Unsigned.second); - } - int64_t scaleByInverse(int64_t N) const { - std::pair Unsigned = splitSigned(N); - return joinSigned(scaleByInverse(Unsigned.first), Unsigned.second); - } - - int compare(const UnsignedFloat &X) const; - int compareTo(uint64_t N) const { - UnsignedFloat Float = getFloat(N); - int Compare = compare(Float); - if (Width == 64 || Compare != 0) - return Compare; - - // Check for precision loss. We know *this == RoundTrip. - uint64_t RoundTrip = Float.template toInt(); - return N == RoundTrip ? 0 : RoundTrip < N ? -1 : 1; - } - int compareTo(int64_t N) const { return N < 0 ? 1 : compareTo(uint64_t(N)); } - - UnsignedFloat &invert() { return *this = UnsignedFloat::getFloat(1) / *this; } - UnsignedFloat inverse() const { return UnsignedFloat(*this).invert(); } - -private: - static UnsignedFloat getProduct(DigitsType L, DigitsType R); - static UnsignedFloat getQuotient(DigitsType Dividend, DigitsType Divisor); - - std::pair lgImpl() const; - static int countLeadingZerosWidth(DigitsType Digits) { - if (Width == 64) - return countLeadingZeros64(Digits); - if (Width == 32) - return countLeadingZeros32(Digits); - return countLeadingZeros32(Digits) + Width - 32; - } - - /// \brief Adjust a number to width, rounding up if necessary. - /// - /// Should only be called for \c Shift close to zero. - /// - /// \pre Shift >= MinExponent && Shift + 64 <= MaxExponent. - static UnsignedFloat adjustToWidth(uint64_t N, int32_t Shift) { - assert(Shift >= MinExponent && "Shift should be close to 0"); - assert(Shift <= MaxExponent - 64 && "Shift should be close to 0"); - auto Adjusted = ScaledNumbers::getAdjusted(N, Shift); - return Adjusted; - } - - static UnsignedFloat getRounded(UnsignedFloat P, bool Round) { - // Saturate. - if (P.isLargest()) - return P; - - return ScaledNumbers::getRounded(P.Digits, P.Exponent, Round); - } -}; - -#define UNSIGNED_FLOAT_BOP(op, base) \ - template \ - UnsignedFloat operator op(const UnsignedFloat &L, \ - const UnsignedFloat &R) { \ - return UnsignedFloat(L) base R; \ - } -UNSIGNED_FLOAT_BOP(+, += ) -UNSIGNED_FLOAT_BOP(-, -= ) -UNSIGNED_FLOAT_BOP(*, *= ) -UNSIGNED_FLOAT_BOP(/, /= ) -UNSIGNED_FLOAT_BOP(<<, <<= ) -UNSIGNED_FLOAT_BOP(>>, >>= ) -#undef UNSIGNED_FLOAT_BOP - -template -raw_ostream &operator<<(raw_ostream &OS, const UnsignedFloat &X) { - return X.print(OS, 10); -} - -#define UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, T1, T2) \ - template \ - bool operator op(const UnsignedFloat &L, T1 R) { \ - return L.compareTo(T2(R)) op 0; \ - } \ - template \ - bool operator op(T1 L, const UnsignedFloat &R) { \ - return 0 op R.compareTo(T2(L)); \ - } -#define UNSIGNED_FLOAT_COMPARE_TO(op) \ - UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, uint64_t, uint64_t) \ - UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, uint32_t, uint64_t) \ - UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, int64_t, int64_t) \ - UNSIGNED_FLOAT_COMPARE_TO_TYPE(op, int32_t, int64_t) -UNSIGNED_FLOAT_COMPARE_TO(< ) -UNSIGNED_FLOAT_COMPARE_TO(> ) -UNSIGNED_FLOAT_COMPARE_TO(== ) -UNSIGNED_FLOAT_COMPARE_TO(!= ) -UNSIGNED_FLOAT_COMPARE_TO(<= ) -UNSIGNED_FLOAT_COMPARE_TO(>= ) -#undef UNSIGNED_FLOAT_COMPARE_TO -#undef UNSIGNED_FLOAT_COMPARE_TO_TYPE - -template -uint64_t UnsignedFloat::scale(uint64_t N) const { - if (Width == 64 || N <= DigitsLimits::max()) - return (getFloat(N) * *this).template toInt(); - - // Defer to the 64-bit version. - return UnsignedFloat(Digits, Exponent).scale(N); -} - -template -UnsignedFloat UnsignedFloat::getProduct(DigitsType L, - DigitsType R) { - // Check for zero. - if (!L || !R) - return getZero(); - - // Check for numbers that we can compute with 64-bit math. - if (Width <= 32 || (L <= UINT32_MAX && R <= UINT32_MAX)) - return adjustToWidth(uint64_t(L) * uint64_t(R), 0); - - // Do the full thing. - return UnsignedFloat(multiply64(L, R)); -} -template -UnsignedFloat UnsignedFloat::getQuotient(DigitsType Dividend, - DigitsType Divisor) { - // Check for zero. - if (!Dividend) - return getZero(); - if (!Divisor) - return getLargest(); - - if (Width == 64) - return UnsignedFloat(divide64(Dividend, Divisor)); - - // We can compute this with 64-bit math. - int Shift = countLeadingZeros64(Dividend); - uint64_t Shifted = uint64_t(Dividend) << Shift; - uint64_t Quotient = Shifted / Divisor; - - // If Quotient needs to be shifted, then adjustToWidth will round. - if (Quotient > DigitsLimits::max()) - return adjustToWidth(Quotient, -Shift); - - // Round based on the value of the next bit. - return getRounded(UnsignedFloat(Quotient, -Shift), - Shifted % Divisor >= getHalf(Divisor)); -} - -template -template -IntT UnsignedFloat::toInt() const { - typedef std::numeric_limits Limits; - if (*this < 1) - return 0; - if (*this >= Limits::max()) - return Limits::max(); - - IntT N = Digits; - if (Exponent > 0) { - assert(size_t(Exponent) < sizeof(IntT) * 8); - return N << Exponent; - } - if (Exponent < 0) { - assert(size_t(-Exponent) < sizeof(IntT) * 8); - return N >> -Exponent; - } - return N; -} - -template -std::pair UnsignedFloat::lgImpl() const { - if (isZero()) - return std::make_pair(INT32_MIN, 0); - - // Get the floor of the lg of Digits. - int32_t LocalFloor = Width - countLeadingZerosWidth(Digits) - 1; - - // Get the floor of the lg of this. - int32_t Floor = Exponent + LocalFloor; - if (Digits == UINT64_C(1) << LocalFloor) - return std::make_pair(Floor, 0); - - // Round based on the next digit. - assert(LocalFloor >= 1); - bool Round = Digits & UINT64_C(1) << (LocalFloor - 1); - return std::make_pair(Floor + Round, Round ? 1 : -1); -} - -template -UnsignedFloat UnsignedFloat::matchExponents(UnsignedFloat X) { - if (isZero() || X.isZero() || Exponent == X.Exponent) - return X; - - int32_t Diff = int32_t(X.Exponent) - int32_t(Exponent); - if (Diff > 0) - increaseExponentToMatch(X, Diff); - else - X.increaseExponentToMatch(*this, -Diff); - return X; -} -template -void UnsignedFloat::increaseExponentToMatch(UnsignedFloat &X, - int32_t ExponentDiff) { - assert(ExponentDiff > 0); - if (ExponentDiff >= 2 * Width) { - *this = getZero(); - return; - } - - // Use up any leading zeros on X, and then shift this. - int32_t ShiftX = std::min(countLeadingZerosWidth(X.Digits), ExponentDiff); - assert(ShiftX < Width); - - int32_t ShiftThis = ExponentDiff - ShiftX; - if (ShiftThis >= Width) { - *this = getZero(); - return; - } - - X.Digits <<= ShiftX; - X.Exponent -= ShiftX; - Digits >>= ShiftThis; - Exponent += ShiftThis; - return; -} - -template -UnsignedFloat &UnsignedFloat:: -operator+=(const UnsignedFloat &X) { - if (isLargest() || X.isZero()) - return *this; - if (isZero() || X.isLargest()) - return *this = X; - - // Normalize exponents. - UnsignedFloat Scaled = matchExponents(X); - - // Check for zero again. - if (isZero()) - return *this = Scaled; - if (Scaled.isZero()) - return *this; - - // Compute sum. - DigitsType Sum = Digits + Scaled.Digits; - bool DidOverflow = Sum < Digits; - Digits = Sum; - if (!DidOverflow) - return *this; - - if (Exponent == MaxExponent) - return *this = getLargest(); - - ++Exponent; - Digits = UINT64_C(1) << (Width - 1) | Digits >> 1; - - return *this; -} -template -UnsignedFloat &UnsignedFloat:: -operator-=(const UnsignedFloat &X) { - if (X.isZero()) - return *this; - if (*this <= X) - return *this = getZero(); - - // Normalize exponents. - UnsignedFloat Scaled = matchExponents(X); - assert(Digits >= Scaled.Digits); - - // Compute difference. - if (!Scaled.isZero()) { - Digits -= Scaled.Digits; - return *this; - } - - // Check if X just barely lost its last bit. E.g., for 32-bit: - // - // 1*2^32 - 1*2^0 == 0xffffffff != 1*2^32 - if (*this == UnsignedFloat(1, X.lgFloor() + Width)) { - Digits = DigitsType(0) - 1; - --Exponent; - } - return *this; -} -template -UnsignedFloat &UnsignedFloat:: -operator*=(const UnsignedFloat &X) { - if (isZero()) - return *this; - if (X.isZero()) - return *this = X; - - // Save the exponents. - int32_t Exponents = int32_t(Exponent) + int32_t(X.Exponent); - - // Get the raw product. - *this = getProduct(Digits, X.Digits); - - // Combine with exponents. - return *this <<= Exponents; -} -template -UnsignedFloat &UnsignedFloat:: -operator/=(const UnsignedFloat &X) { - if (isZero()) - return *this; - if (X.isZero()) - return *this = getLargest(); - - // Save the exponents. - int32_t Exponents = int32_t(Exponent) - int32_t(X.Exponent); - - // Get the raw quotient. - *this = getQuotient(Digits, X.Digits); - - // Combine with exponents. - return *this <<= Exponents; -} -template -void UnsignedFloat::shiftLeft(int32_t Shift) { - if (!Shift || isZero()) - return; - assert(Shift != INT32_MIN); - if (Shift < 0) { - shiftRight(-Shift); - return; - } - - // Shift as much as we can in the exponent. - int32_t ExponentShift = std::min(Shift, MaxExponent - Exponent); - Exponent += ExponentShift; - if (ExponentShift == Shift) - return; - - // Check this late, since it's rare. - if (isLargest()) - return; - - // Shift the digits themselves. - Shift -= ExponentShift; - if (Shift > countLeadingZerosWidth(Digits)) { - // Saturate. - *this = getLargest(); - return; - } - - Digits <<= Shift; - return; -} - -template -void UnsignedFloat::shiftRight(int32_t Shift) { - if (!Shift || isZero()) - return; - assert(Shift != INT32_MIN); - if (Shift < 0) { - shiftLeft(-Shift); - return; - } - - // Shift as much as we can in the exponent. - int32_t ExponentShift = std::min(Shift, Exponent - MinExponent); - Exponent -= ExponentShift; - if (ExponentShift == Shift) - return; - - // Shift the digits themselves. - Shift -= ExponentShift; - if (Shift >= Width) { - // Saturate. - *this = getZero(); - return; - } - - Digits >>= Shift; - return; -} +class BasicBlock; +class BranchProbabilityInfo; +class Function; +class Loop; +class LoopInfo; +class MachineBasicBlock; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineLoop; +class MachineLoopInfo; -template -int UnsignedFloat::compare(const UnsignedFloat &X) const { - // Check for zero. - if (isZero()) - return X.isZero() ? 0 : -1; - if (X.isZero()) - return 1; - - // Check for the scale. Use lgFloor to be sure that the exponent difference - // is always lower than 64. - int32_t lgL = lgFloor(), lgR = X.lgFloor(); - if (lgL != lgR) - return lgL < lgR ? -1 : 1; - - // Compare digits. - if (Exponent < X.Exponent) - return UnsignedFloatBase::compare(Digits, X.Digits, X.Exponent - Exponent); - - return -UnsignedFloatBase::compare(X.Digits, Digits, Exponent - X.Exponent); -} +namespace bfi_detail { -template struct isPodLike> { - static const bool value = true; -}; -} +struct IrreducibleGraph; -//===----------------------------------------------------------------------===// -// -// BlockMass definition. -// -// TODO: Make this private to BlockFrequencyInfoImpl or delete. -// -//===----------------------------------------------------------------------===// -namespace llvm { +// This is part of a workaround for a GCC 4.7 crash on lambdas. +template struct BlockEdgesAdder; /// \brief Mass of a block. /// @@ -738,7 +84,7 @@ public: /// \brief Add another mass. /// /// Adds another mass, saturating at \a isFull() rather than overflowing. - BlockMass &operator+=(const BlockMass &X) { + BlockMass &operator+=(BlockMass X) { uint64_t Sum = Mass + X.Mass; Mass = Sum < Mass ? UINT64_MAX : Sum; return *this; @@ -748,80 +94,56 @@ public: /// /// Subtracts another mass, saturating at \a isEmpty() rather than /// undeflowing. - BlockMass &operator-=(const BlockMass &X) { + BlockMass &operator-=(BlockMass X) { uint64_t Diff = Mass - X.Mass; Mass = Diff > Mass ? 0 : Diff; return *this; } - BlockMass &operator*=(const BranchProbability &P) { + BlockMass &operator*=(BranchProbability P) { Mass = P.scale(Mass); return *this; } - bool operator==(const BlockMass &X) const { return Mass == X.Mass; } - bool operator!=(const BlockMass &X) const { return Mass != X.Mass; } - bool operator<=(const BlockMass &X) const { return Mass <= X.Mass; } - bool operator>=(const BlockMass &X) const { return Mass >= X.Mass; } - bool operator<(const BlockMass &X) const { return Mass < X.Mass; } - bool operator>(const BlockMass &X) const { return Mass > X.Mass; } + bool operator==(BlockMass X) const { return Mass == X.Mass; } + bool operator!=(BlockMass X) const { return Mass != X.Mass; } + bool operator<=(BlockMass X) const { return Mass <= X.Mass; } + bool operator>=(BlockMass X) const { return Mass >= X.Mass; } + bool operator<(BlockMass X) const { return Mass < X.Mass; } + bool operator>(BlockMass X) const { return Mass > X.Mass; } - /// \brief Convert to floating point. + /// \brief Convert to scaled number. /// - /// Convert to a float. \a isFull() gives 1.0, while \a isEmpty() gives - /// slightly above 0.0. - UnsignedFloat toFloat() const; + /// Convert to \a ScaledNumber. \a isFull() gives 1.0, while \a isEmpty() + /// gives slightly above 0.0. + ScaledNumber toScaled() const; void dump() const; raw_ostream &print(raw_ostream &OS) const; }; -inline BlockMass operator+(const BlockMass &L, const BlockMass &R) { +inline BlockMass operator+(BlockMass L, BlockMass R) { return BlockMass(L) += R; } -inline BlockMass operator-(const BlockMass &L, const BlockMass &R) { +inline BlockMass operator-(BlockMass L, BlockMass R) { return BlockMass(L) -= R; } -inline BlockMass operator*(const BlockMass &L, const BranchProbability &R) { +inline BlockMass operator*(BlockMass L, BranchProbability R) { return BlockMass(L) *= R; } -inline BlockMass operator*(const BranchProbability &L, const BlockMass &R) { +inline BlockMass operator*(BranchProbability L, BlockMass R) { return BlockMass(R) *= L; } -inline raw_ostream &operator<<(raw_ostream &OS, const BlockMass &X) { +inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) { return X.print(OS); } -template <> struct isPodLike { +} // end namespace bfi_detail + +template <> struct isPodLike { static const bool value = true; }; -} - -//===----------------------------------------------------------------------===// -// -// BlockFrequencyInfoImpl definition. -// -//===----------------------------------------------------------------------===// -namespace llvm { - -class BasicBlock; -class BranchProbabilityInfo; -class Function; -class Loop; -class LoopInfo; -class MachineBasicBlock; -class MachineBranchProbabilityInfo; -class MachineFunction; -class MachineLoop; -class MachineLoopInfo; - -namespace bfi_detail { -struct IrreducibleGraph; - -// This is part of a workaround for a GCC 4.7 crash on lambdas. -template struct BlockEdgesAdder; -} /// \brief Base class for BlockFrequencyInfoImpl /// @@ -833,7 +155,8 @@ template struct BlockEdgesAdder; /// BlockFrequencyInfoImpl. See there for details. class BlockFrequencyInfoImplBase { public: - typedef UnsignedFloat Float; + typedef ScaledNumber Scaled64; + typedef bfi_detail::BlockMass BlockMass; /// \brief Representative of a block. /// @@ -862,34 +185,37 @@ public: /// \brief Stats about a block itself. struct FrequencyData { - Float Floating; + Scaled64 Scaled; uint64_t Integer; }; /// \brief Data about a loop. /// - /// Contains the data necessary to represent represent a loop as a - /// pseudo-node once it's packaged. + /// Contains the data necessary to represent a loop as a pseudo-node once it's + /// packaged. struct LoopData { typedef SmallVector, 4> ExitMap; typedef SmallVector NodeList; - LoopData *Parent; ///< The parent loop. - bool IsPackaged; ///< Whether this has been packaged. - uint32_t NumHeaders; ///< Number of headers. - ExitMap Exits; ///< Successor edges (and weights). - NodeList Nodes; ///< Header and the members of the loop. - BlockMass BackedgeMass; ///< Mass returned to loop header. + typedef SmallVector HeaderMassList; + LoopData *Parent; ///< The parent loop. + bool IsPackaged; ///< Whether this has been packaged. + uint32_t NumHeaders; ///< Number of headers. + ExitMap Exits; ///< Successor edges (and weights). + NodeList Nodes; ///< Header and the members of the loop. + HeaderMassList BackedgeMass; ///< Mass returned to each loop header. BlockMass Mass; - Float Scale; + Scaled64 Scale; LoopData(LoopData *Parent, const BlockNode &Header) - : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header) {} + : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header), + BackedgeMass(1) {} template LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther, It2 LastOther) : Parent(Parent), IsPackaged(false), Nodes(FirstHeader, LastHeader) { NumHeaders = Nodes.size(); Nodes.insert(Nodes.end(), FirstOther, LastOther); + BackedgeMass.resize(NumHeaders); } bool isHeader(const BlockNode &Node) const { if (isIrreducible()) @@ -900,6 +226,14 @@ public: BlockNode getHeader() const { return Nodes[0]; } bool isIrreducible() const { return NumHeaders > 1; } + HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) { + assert(isHeader(B) && "this is only valid on loop header blocks"); + if (isIrreducible()) + return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) - + Nodes.begin(); + return 0; + } + NodeList::const_iterator members_begin() const { return Nodes.begin() + NumHeaders; } @@ -937,7 +271,7 @@ public: /// loop. /// /// This function should only be called when distributing mass. As long as - /// there are no irreducilbe edges to Node, then it will have complexity + /// there are no irreducible edges to Node, then it will have complexity /// O(1) in this context. /// /// In general, the complexity is O(L), where L is the number of loop @@ -999,6 +333,8 @@ public: BlockNode TargetNode; uint64_t Amount; Weight() : Type(Local), Amount(0) {} + Weight(DistType Type, BlockNode TargetNode, uint64_t Amount) + : Type(Type), TargetNode(TargetNode), Amount(Amount) {} }; /// \brief Distribution of unscaled probability weight. @@ -1106,6 +442,16 @@ public: /// \brief Compute the loop scale for a loop. void computeLoopScale(LoopData &Loop); + /// Adjust the mass of all headers in an irreducible loop. + /// + /// Initially, irreducible loops are assumed to distribute their mass + /// equally among its headers. This can lead to wrong frequency estimates + /// since some headers may be executed more frequently than others. + /// + /// This adjusts header mass distribution so it matches the weights of + /// the backedges going into each of the loop headers. + void adjustLoopHeaderMass(LoopData &Loop); + /// \brief Package up a loop. void packageLoop(LoopData &Loop); @@ -1127,10 +473,12 @@ public: virtual raw_ostream &print(raw_ostream &OS) const { return OS; } void dump() const { print(dbgs()); } - Float getFloatingBlockFreq(const BlockNode &Node) const; + Scaled64 getFloatingBlockFreq(const BlockNode &Node) const; BlockFrequency getBlockFreq(const BlockNode &Node) const; + void setBlockFreq(const BlockNode &Node, uint64_t Freq); + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const; raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency &Freq) const; @@ -1306,7 +654,7 @@ void IrreducibleGraph::addEdges(const BlockNode &Node, /// entries point to this block. Its successors are the headers, which split /// the frequency evenly. /// -/// This algorithm leverages BlockMass and UnsignedFloat to maintain precision, +/// This algorithm leverages BlockMass and ScaledNumber to maintain precision, /// separates mass distribution from loop scaling, and dithers to eliminate /// probability mass loss. /// @@ -1370,6 +718,17 @@ void IrreducibleGraph::addEdges(const BlockNode &Node, /// - Distribute the mass accordingly, dithering to minimize mass loss, /// as described in \a distributeMass(). /// +/// In the case of irreducible loops, instead of a single loop header, +/// there will be several. The computation of backedge masses is similar +/// but instead of having a single backedge mass, there will be one +/// backedge per loop header. In these cases, each backedge will carry +/// a mass proportional to the edge weights along the corresponding +/// path. +/// +/// At the end of propagation, the full mass assigned to the loop will be +/// distributed among the loop headers proportionally according to the +/// mass flowing through their backedges. +/// /// Finally, calculate the loop scale from the accumulated backedge mass. /// /// 3. Distribute mass in the function (\a computeMassInFunction()). @@ -1393,9 +752,6 @@ void IrreducibleGraph::addEdges(const BlockNode &Node, /// /// It has some known flaws. /// -/// - Loop scale is limited to 4096 per loop (2^12) to avoid exhausting -/// BlockFrequency's 64-bit integer precision. -/// /// - The model of irreducible control flow is a rough approximation. /// /// Modelling irreducible control flow exactly involves setting up and @@ -1413,11 +769,6 @@ void IrreducibleGraph::addEdges(const BlockNode &Node, /// as sub-loops, rather than arbitrarily shoving the problematic /// blocks into the headers of the main irreducible SCC. /// -/// - Backedge frequencies are assumed to be evenly split between the -/// headers of a given irreducible SCC. Instead, we could track the -/// backedge mass separately for each header, and adjust their relative -/// frequencies. -/// /// - Entry frequencies are assumed to be evenly split between the /// headers of a given irreducible SCC, which is the only option if we /// need to compute mass in the SCC before its parent loop. Instead, @@ -1524,7 +875,7 @@ template class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { /// /// \pre \a computeMassInLoop() has been called for each subloop of \c /// OuterLoop. - /// \pre \c Insert points at the the last loop successfully processed by \a + /// \pre \c Insert points at the last loop successfully processed by \a /// computeMassInLoop(). /// \pre \c OuterLoop has irreducible SCCs. void computeIrreducibleMass(LoopData *OuterLoop, @@ -1556,15 +907,16 @@ template class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { public: const FunctionT *getFunction() const { return F; } - void doFunction(const FunctionT *F, const BranchProbabilityInfoT *BPI, - const LoopInfoT *LI); + void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI, + const LoopInfoT &LI); BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {} using BlockFrequencyInfoImplBase::getEntryFreq; BlockFrequency getBlockFreq(const BlockT *BB) const { return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); } - Float getFloatingBlockFreq(const BlockT *BB) const { + void setBlockFreq(const BlockT *BB, uint64_t Freq); + Scaled64 getFloatingBlockFreq(const BlockT *BB) const { return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB)); } @@ -1589,13 +941,13 @@ public: }; template -void BlockFrequencyInfoImpl::doFunction(const FunctionT *F, - const BranchProbabilityInfoT *BPI, - const LoopInfoT *LI) { +void BlockFrequencyInfoImpl::calculate(const FunctionT &F, + const BranchProbabilityInfoT &BPI, + const LoopInfoT &LI) { // Save the parameters. - this->BPI = BPI; - this->LI = LI; - this->F = F; + this->BPI = &BPI; + this->LI = &LI; + this->F = &F; // Clean up left-over data structures. BlockFrequencyInfoImplBase::clear(); @@ -1603,12 +955,12 @@ void BlockFrequencyInfoImpl::doFunction(const FunctionT *F, Nodes.clear(); // Initialize. - DEBUG(dbgs() << "\nblock-frequency: " << F->getName() << "\n=================" - << std::string(F->getName().size(), '=') << "\n"); + DEBUG(dbgs() << "\nblock-frequency: " << F.getName() << "\n=================" + << std::string(F.getName().size(), '=') << "\n"); initializeRPOT(); initializeLoops(); - // Visit loops in post-order to find thelocal mass distribution, and then do + // Visit loops in post-order to find the local mass distribution, and then do // the full function. computeMassInLoops(); computeMassInFunction(); @@ -1616,8 +968,23 @@ void BlockFrequencyInfoImpl::doFunction(const FunctionT *F, finalizeMetrics(); } +template +void BlockFrequencyInfoImpl::setBlockFreq(const BlockT *BB, uint64_t Freq) { + if (Nodes.count(BB)) + BlockFrequencyInfoImplBase::setBlockFreq(getNode(BB), Freq); + else { + // If BB is a newly added block after BFI is done, we need to create a new + // BlockNode for it assigned with a new index. The index can be determined + // by the size of Freqs. + BlockNode NewNode(Freqs.size()); + Nodes[BB] = NewNode; + Freqs.emplace_back(); + BlockFrequencyInfoImplBase::setBlockFreq(NewNode, Freq); + } +} + template void BlockFrequencyInfoImpl::initializeRPOT() { - const BlockT *Entry = F->begin(); + const BlockT *Entry = &F->front(); RPOT.reserve(F->size()); std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT)); std::reverse(RPOT.begin(), RPOT.end()); @@ -1720,6 +1087,8 @@ bool BlockFrequencyInfoImpl::computeMassInLoop(LoopData &Loop) { for (const BlockNode &M : Loop.Nodes) if (!propagateMassToSuccessors(&Loop, M)) llvm_unreachable("unhandled irreducible control flow"); + + adjustLoopHeaderMass(Loop); } else { Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); if (!propagateMassToSuccessors(&Loop, Loop.getHeader())) @@ -1839,16 +1208,18 @@ raw_ostream &BlockFrequencyInfoImpl::print(raw_ostream &OS) const { if (!F) return OS; OS << "block-frequency-info: " << F->getName() << "\n"; - for (const BlockT &BB : *F) - OS << " - " << bfi_detail::getBlockName(&BB) - << ": float = " << getFloatingBlockFreq(&BB) - << ", int = " << getBlockFreq(&BB).getFrequency() << "\n"; + for (const BlockT &BB : *F) { + OS << " - " << bfi_detail::getBlockName(&BB) << ": float = "; + getFloatingBlockFreq(&BB).print(OS, 5) + << ", int = " << getBlockFreq(&BB).getFrequency() << "\n"; + } // Add an extra newline for readability. OS << "\n"; return OS; } -} + +} // end namespace llvm #undef DEBUG_TYPE