X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FAnalysis%2FBlockFrequencyInfoImpl.cpp;h=48e23af2690a729f5e862a5d5caf1d0a78b5e5fc;hb=3386f43f2a50c67f9d22351b4e1ab6fa0980e99b;hp=9da93424a8b05cff5d2be3f720cc218b0069f4ca;hpb=b85e7ae9abd02462530a9c097d016f587538eae6;p=oota-llvm.git diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 9da93424a8b..48e23af2690 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -13,8 +13,8 @@ #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/ADT/SCCIterator.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; using namespace llvm::bfi_detail; @@ -123,8 +123,12 @@ static void combineWeight(Weight &W, const Weight &OtherW) { } assert(W.Type == OtherW.Type); assert(W.TargetNode == OtherW.TargetNode); - assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow"); - W.Amount += OtherW.Amount; + assert(OtherW.Amount && "Expected non-zero weight"); + if (W.Amount > W.Amount + OtherW.Amount) + // Saturate on overflow. + W.Amount = UINT64_MAX; + else + W.Amount += OtherW.Amount; } static void combineWeightsBySorting(WeightList &Weights) { // Sort so edges to the same node are adjacent. @@ -207,11 +211,19 @@ void Distribution::normalize() { Shift = 33 - countLeadingZeros(Total); // Early exit if nothing needs to be scaled. - if (!Shift) + if (!Shift) { + // If we didn't overflow then combineWeights() shouldn't have changed the + // sum of the weights, but let's double-check. + assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0), + [](uint64_t Sum, const Weight &W) { + return Sum + W.Amount; + }) && + "Expected total to be correct"); return; + } // Recompute the total through accumulation (rather than shifting it) so that - // it's accurate after shifting. + // it's accurate after shifting and any changes combineWeights() made above. Total = 0; // Sum the weights to each node and shift right if necessary. @@ -274,7 +286,7 @@ bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, if (isLoopHeader(Resolved)) { DEBUG(debugSuccessor("backedge")); - Dist.addBackedge(OuterLoop->getHeader(), Weight); + Dist.addBackedge(Resolved, Weight); return true; } @@ -319,32 +331,38 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( return true; } -/// \brief Get the maximum allowed loop scale. -/// -/// Gives the maximum number of estimated iterations allowed for a loop. Very -/// large numbers cause problems downstream (even within 64-bits). -static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); } - /// \brief Compute the loop scale for a loop. void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { // Compute loop scale. DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); + // Infinite loops need special handling. If we give the back edge an infinite + // mass, they may saturate all the other scales in the function down to 1, + // making all the other region temperatures look exactly the same. Choose an + // arbitrary scale to avoid these issues. + // + // FIXME: An alternate way would be to select a symbolic scale which is later + // replaced to be the maximum of all computed scales plus 1. This would + // appropriately describe the loop as having a large scale, without skewing + // the final frequency computation. + const Scaled64 InifiniteLoopScale(1, 12); + // LoopScale == 1 / ExitMass // ExitMass == HeadMass - BackedgeMass - BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass; + BlockMass TotalBackedgeMass; + for (auto &Mass : Loop.BackedgeMass) + TotalBackedgeMass += Mass; + BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass; - // Block scale stores the inverse of the scale. - Loop.Scale = ExitMass.toScaled().inverse(); + // Block scale stores the inverse of the scale. If this is an infinite loop, + // its exit mass will be zero. In this case, use an arbitrary scale for the + // loop scale. + Loop.Scale = + ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse(); DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() - << " - " << Loop.BackedgeMass << ")\n" + << " - " << TotalBackedgeMass << ")\n" << " - scale = " << Loop.Scale << "\n"); - - if (Loop.Scale > getMaxLoopScale()) { - Loop.Scale = getMaxLoopScale(); - DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n"); - } } /// \brief Package up a loop. @@ -360,6 +378,19 @@ void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) { Loop.IsPackaged = true; } +#ifndef NDEBUG +static void debugAssign(const BlockFrequencyInfoImplBase &BFI, + const DitheringDistributer &D, const BlockNode &T, + const BlockMass &M, const char *Desc) { + dbgs() << " => assign " << M << " (" << D.RemMass << ")"; + if (Desc) + dbgs() << " [" << Desc << "]"; + if (T.isValid()) + dbgs() << " to " << BFI.getBlockName(T); + dbgs() << "\n"; +} +#endif + void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist) { @@ -369,25 +400,12 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, // Distribute mass to successors as laid out in Dist. DitheringDistributer D(Dist, Mass); -#ifndef NDEBUG - auto debugAssign = [&](const BlockNode &T, const BlockMass &M, - const char *Desc) { - dbgs() << " => assign " << M << " (" << D.RemMass << ")"; - if (Desc) - dbgs() << " [" << Desc << "]"; - if (T.isValid()) - dbgs() << " to " << getBlockName(T); - dbgs() << "\n"; - }; - (void)debugAssign; -#endif - for (const Weight &W : Dist.Weights) { // Check for a local edge (non-backedge and non-exit). BlockMass Taken = D.takeMass(W.Amount); if (W.Type == Weight::Local) { Working[W.TargetNode.Index].getMass() += Taken; - DEBUG(debugAssign(W.TargetNode, Taken, nullptr)); + DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); continue; } @@ -396,15 +414,15 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, // Check for a backedge. if (W.Type == Weight::Backedge) { - OuterLoop->BackedgeMass += Taken; - DEBUG(debugAssign(BlockNode(), Taken, "back")); + OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken; + DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back")); continue; } // This must be an exit. assert(W.Type == Weight::Exit); OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken)); - DEBUG(debugAssign(W.TargetNode, Taken, "exit")); + DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit")); } } @@ -412,15 +430,24 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max) { // Scale the Factor to a size that creates integers. Ideally, integers would // be scaled so that Max == UINT64_MAX so that they can be best - // differentiated. However, the register allocator currently deals poorly - // with large numbers. Instead, push Min up a little from 1 to give some - // room to differentiate small, unequal numbers. - // - // TODO: fix issues downstream so that ScalingFactor can be - // Scaled64(1,64)/Max. - Scaled64 ScalingFactor = Min.inverse(); - if ((Max / Min).lg() < 60) + // differentiated. However, in the presence of large frequency values, small + // frequencies are scaled down to 1, making it impossible to differentiate + // small, unequal numbers. When the spread between Min and Max frequencies + // fits well within MaxBits, we make the scale be at least 8. + const unsigned MaxBits = 64; + const unsigned SpreadBits = (Max / Min).lg(); + Scaled64 ScalingFactor; + if (SpreadBits <= MaxBits - 3) { + // If the values are small enough, make the scaling factor at least 8 to + // allow distinguishing small values. + ScalingFactor = Min.inverse(); ScalingFactor <<= 3; + } else { + // If the values need more than MaxBits to be represented, saturate small + // frequency values down to 1 by using a scaling factor that benefits large + // frequency values. + ScalingFactor = Scaled64(1, MaxBits) / Max; + } // Translate the floats to integers. DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max @@ -503,6 +530,13 @@ BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { return Freqs[Node.Index].Scaled; } +void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node, + uint64_t Freq) { + assert(Node.isValid() && "Expected valid node"); + assert(Node.Index < Freqs.size() && "Expected legal index"); + Freqs[Node.Index].Integer = Freq; +} + std::string BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { return std::string(); @@ -689,10 +723,47 @@ BlockFrequencyInfoImplBase::analyzeIrreducible( void BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) { OuterLoop.Exits.clear(); - OuterLoop.BackedgeMass = BlockMass::getEmpty(); + for (auto &Mass : OuterLoop.BackedgeMass) + Mass = BlockMass::getEmpty(); auto O = OuterLoop.Nodes.begin() + 1; for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I) if (!Working[I->Index].isPackaged()) *O++ = *I; OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end()); } + +void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) { + assert(Loop.isIrreducible() && "this only makes sense on irreducible loops"); + + // Since the loop has more than one header block, the mass flowing back into + // each header will be different. Adjust the mass in each header loop to + // reflect the masses flowing through back edges. + // + // To do this, we distribute the initial mass using the backedge masses + // as weights for the distribution. + BlockMass LoopMass = BlockMass::getFull(); + Distribution Dist; + + DEBUG(dbgs() << "adjust-loop-header-mass:\n"); + for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { + auto &HeaderNode = Loop.Nodes[H]; + auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)]; + DEBUG(dbgs() << " - Add back edge mass for node " + << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n"); + if (BackedgeMass.getMass() > 0) + Dist.addLocal(HeaderNode, BackedgeMass.getMass()); + else + DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n"); + } + + DitheringDistributer D(Dist, LoopMass); + + DEBUG(dbgs() << " Distribute loop mass " << LoopMass + << " to headers using above weights\n"); + for (const Weight &W : Dist.Weights) { + BlockMass Taken = D.takeMass(W.Amount); + assert(W.Type == Weight::Local && "all weights should be local"); + Working[W.TargetNode.Index].getMass() = Taken; + DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); + } +}