X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FCodeGen%2FRegAllocPBQP.cpp;h=af6d16eefd7988fb054f324b04202bb2d415c1ac;hp=15a88e224faa9906e10945bb1613813f0e095792;hb=52e57900a311eac02f6cfec7572e53175fdb1311;hpb=604b3573f955d61ba87215c25ba2f52477c71ecc diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 15a88e224fa..af6d16eefd7 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -29,51 +29,50 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" - #include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" #include "Spiller.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PBQP/Graph.h" -#include "llvm/CodeGen/PBQP/HeuristicSolver.h" -#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include #include +#include #include #include #include using namespace llvm; +#define DEBUG_TYPE "regalloc" + static RegisterRegAlloc -registerPBQPRepAlloc("pbqp", "PBQP register allocator", +RegisterPBQPRepAlloc("pbqp", "PBQP register allocator", createDefaultPBQPRegisterAllocator); static cl::opt -pbqpCoalescing("pbqp-coalescing", +PBQPCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden); #ifndef NDEBUG static cl::opt -pbqpDumpGraphs("pbqp-dump-graphs", +PBQPDumpGraphs("pbqp-dump-graphs", cl::desc("Dump graphs for each function/round in the compilation unit."), cl::init(false), cl::Hidden); #endif @@ -90,26 +89,24 @@ public: static char ID; /// Construct a PBQP register allocator. - RegAllocPBQP(OwningPtr &b, char *cPassID=0) - : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) { + RegAllocPBQP(char *cPassID = nullptr) + : MachineFunctionPass(ID), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); } /// Return the pass name. - virtual const char* getPassName() const { + const char* getPassName() const override { return "PBQP Register Allocator"; } /// PBQP analysis usage. - virtual void getAnalysisUsage(AnalysisUsage &au) const; + void getAnalysisUsage(AnalysisUsage &au) const override; /// Perform register allocation - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: @@ -121,305 +118,323 @@ private: typedef std::map CoalesceMap; typedef std::set RegSet; - - OwningPtr builder; - char *customPassID; - MachineFunction *mf; - const TargetMachine *tm; - const TargetRegisterInfo *tri; - const TargetInstrInfo *tii; - const MachineLoopInfo *loopInfo; - MachineRegisterInfo *mri; + RegSet VRegsToAlloc, EmptyIntervalVRegs; - OwningPtr spiller; - LiveIntervals *lis; - LiveStacks *lss; - VirtRegMap *vrm; + /// \brief Finds the initial set of vreg intervals to allocate. + void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); - RegSet vregsToAlloc, emptyIntervalVRegs; + /// \brief Constructs an initial graph. + void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller); - /// \brief Finds the initial set of vreg intervals to allocate. - void findVRegIntervalsToAlloc(); + /// \brief Spill the given VReg. + void spillVReg(unsigned VReg, SmallVectorImpl &NewIntervals, + MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, + Spiller &VRegSpiller); /// \brief Given a solved PBQP problem maps this solution back to a register /// assignment. - bool mapPBQPToRegAlloc(const PBQPRAProblem &problem, - const PBQP::Solution &solution); + bool mapPBQPToRegAlloc(const PBQPRAGraph &G, + const PBQP::Solution &Solution, + VirtRegMap &VRM, + Spiller &VRegSpiller); /// \brief Postprocessing before final spilling. Sets basic block "live in" /// variables. - void finalizeAlloc() const; + void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS, + VirtRegMap &VRM) const; }; char RegAllocPBQP::ID = 0; -} // End anonymous namespace. - -unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const { - Node2VReg::const_iterator vregItr = node2VReg.find(node); - assert(vregItr != node2VReg.end() && "No vreg for node."); - return vregItr->second; -} - -PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const { - VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); - assert(nodeItr != vreg2Node.end() && "No node for vreg."); - return nodeItr->second; - -} - -const PBQPRAProblem::AllowedSet& - PBQPRAProblem::getAllowedSet(unsigned vreg) const { - AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg); - assert(allowedSetItr != allowedSets.end() && "No pregs for vreg."); - const AllowedSet &allowedSet = allowedSetItr->second; - return allowedSet; -} +/// @brief Set spill costs for each node in the PBQP reg-alloc graph. +class SpillCosts : public PBQPRAConstraint { +public: + void apply(PBQPRAGraph &G) override { + LiveIntervals &LIS = G.getMetadata().LIS; + + // A minimum spill costs, so that register constraints can can be set + // without normalization in the [0.0:MinSpillCost( interval. + const PBQP::PBQPNum MinSpillCost = 10.0; + + for (auto NId : G.nodeIds()) { + PBQP::PBQPNum SpillCost = + LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight; + if (SpillCost == 0.0) + SpillCost = std::numeric_limits::min(); + else + SpillCost += MinSpillCost; + PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId)); + NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost; + G.setNodeCosts(NId, std::move(NodeCosts)); + } + } +}; -unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { - assert(isPRegOption(vreg, option) && "Not a preg option."); +/// @brief Add interference edges between overlapping vregs. +class Interference : public PBQPRAConstraint { +private: - const AllowedSet& allowedSet = getAllowedSet(vreg); - assert(option <= allowedSet.size() && "Option outside allowed set."); - return allowedSet[option - 1]; -} + typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; + typedef std::pair IMatrixKey; + typedef DenseMap IMatrixCache; -PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, - const RegSet &vregs) { + // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required + // for the fast interference graph construction algorithm. The last is there + // to save us from looking up node ids via the VRegToNode map in the graph + // metadata. + typedef std::tuple + IntervalInfo; - LiveIntervals *LIS = const_cast(lis); - MachineRegisterInfo *mri = &mf->getRegInfo(); - const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); + static SlotIndex getStartPoint(const IntervalInfo &I) { + return std::get<0>(I)->segments[std::get<1>(I)].start; + } - OwningPtr p(new PBQPRAProblem()); - PBQP::Graph &g = p->getGraph(); - RegSet pregs; + static SlotIndex getEndPoint(const IntervalInfo &I) { + return std::get<0>(I)->segments[std::get<1>(I)].end; + } - // Collect the set of preg intervals, record that they're used in the MF. - for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) { - if (mri->def_empty(Reg)) - continue; - pregs.insert(Reg); - mri->setPhysRegUsed(Reg); + static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) { + return std::get<2>(I); } - // Iterate over vregs. - for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end(); - vregItr != vregEnd; ++vregItr) { - unsigned vreg = *vregItr; - const TargetRegisterClass *trc = mri->getRegClass(vreg); - LiveInterval *vregLI = &LIS->getInterval(vreg); + static bool lowestStartPoint(const IntervalInfo &I1, + const IntervalInfo &I2) { + // Condition reversed because priority queue has the *highest* element at + // the front, rather than the lowest. + return getStartPoint(I1) > getStartPoint(I2); + } - // Record any overlaps with regmask operands. - BitVector regMaskOverlaps; - LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps); + static bool lowestEndPoint(const IntervalInfo &I1, + const IntervalInfo &I2) { + SlotIndex E1 = getEndPoint(I1); + SlotIndex E2 = getEndPoint(I2); - // Compute an initial allowed set for the current vreg. - typedef std::vector VRAllowed; - VRAllowed vrAllowed; - ArrayRef rawOrder = trc->getRawAllocationOrder(*mf); - for (unsigned i = 0; i != rawOrder.size(); ++i) { - unsigned preg = rawOrder[i]; - if (mri->isReserved(preg)) - continue; + if (E1 < E2) + return true; - // vregLI crosses a regmask operand that clobbers preg. - if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg)) - continue; + if (E1 > E2) + return false; - // vregLI overlaps fixed regunit interference. - bool Interference = false; - for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) { - if (vregLI->overlaps(LIS->getRegUnit(*Units))) { - Interference = true; - break; - } - } - if (Interference) - continue; + // If two intervals end at the same point, we need a way to break the tie or + // the set will assume they're actually equal and refuse to insert a + // "duplicate". Just compare the vregs - fast and guaranteed unique. + return std::get<0>(I1)->reg < std::get<0>(I2)->reg; + } - // preg is usable for this virtual register. - vrAllowed.push_back(preg); - } + static bool isAtLastSegment(const IntervalInfo &I) { + return std::get<1>(I) == std::get<0>(I)->size() - 1; + } - // Construct the node. - PBQP::Graph::NodeItr node = - g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); + static IntervalInfo nextSegment(const IntervalInfo &I) { + return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I)); + } - // Record the mapping and allowed set in the problem. - p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end()); +public: - PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ? - vregLI->weight : std::numeric_limits::min(); + void apply(PBQPRAGraph &G) override { + // The following is loosely based on the linear scan algorithm introduced in + // "Linear Scan Register Allocation" by Poletto and Sarkar. This version + // isn't linear, because the size of the active set isn't bound by the + // number of registers, but rather the size of the largest clique in the + // graph. Still, we expect this to be better than N^2. + LiveIntervals &LIS = G.getMetadata().LIS; + + // Interferenc matrices are incredibly regular - they're only a function of + // the allowed sets, so we cache them to avoid the overhead of constructing + // and uniquing them. + IMatrixCache C; + + typedef std::set IntervalSet; + typedef std::priority_queue, + decltype(&lowestStartPoint)> IntervalQueue; + IntervalSet Active(lowestEndPoint); + IntervalQueue Inactive(lowestStartPoint); + + // Start by building the inactive set. + for (auto NId : G.nodeIds()) { + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + LiveInterval &LI = LIS.getInterval(VReg); + assert(!LI.empty() && "PBQP graph contains node for empty interval"); + Inactive.push(std::make_tuple(&LI, 0, NId)); + } - addSpillCosts(g.getNodeCosts(node), spillCost); - } + while (!Inactive.empty()) { + // Tentatively grab the "next" interval - this choice may be overriden + // below. + IntervalInfo Cur = Inactive.top(); + + // Retire any active intervals that end before Cur starts. + IntervalSet::iterator RetireItr = Active.begin(); + while (RetireItr != Active.end() && + (getEndPoint(*RetireItr) <= getStartPoint(Cur))) { + // If this interval has subsequent segments, add the next one to the + // inactive list. + if (!isAtLastSegment(*RetireItr)) + Inactive.push(nextSegment(*RetireItr)); + + ++RetireItr; + } + Active.erase(Active.begin(), RetireItr); + + // One of the newly retired segments may actually start before the + // Cur segment, so re-grab the front of the inactive list. + Cur = Inactive.top(); + Inactive.pop(); + + // At this point we know that Cur overlaps all active intervals. Add the + // interference edges. + PBQP::GraphBase::NodeId NId = getNodeId(Cur); + for (const auto &A : Active) { + PBQP::GraphBase::NodeId MId = getNodeId(A); + + // Check that we haven't already added this edge + // FIXME: findEdge is expensive in the worst case (O(max_clique(G))). + // It might be better to replace this with a local bit-matrix. + if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId()) + continue; - for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end(); - vr1Itr != vrEnd; ++vr1Itr) { - unsigned vr1 = *vr1Itr; - const LiveInterval &l1 = lis->getInterval(vr1); - const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1); - - for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr); - vr2Itr != vrEnd; ++vr2Itr) { - unsigned vr2 = *vr2Itr; - const LiveInterval &l2 = lis->getInterval(vr2); - const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2); - - assert(!l2.empty() && "Empty interval in vreg set?"); - if (l1.overlaps(l2)) { - PBQP::Graph::EdgeItr edge = - g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), - PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0)); - - addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri); + // This is a new edge - add it to the graph. + createInterferenceEdge(G, NId, MId, C); } + + // Finally, add Cur to the Active set. + Active.insert(Cur); } } - return p.take(); -} +private: -void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec, - PBQP::PBQPNum spillCost) { - costVec[0] = spillCost; -} + void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, IMatrixCache &C) { -void PBQPBuilder::addInterferenceCosts( - PBQP::Matrix &costMat, - const PBQPRAProblem::AllowedSet &vr1Allowed, - const PBQPRAProblem::AllowedSet &vr2Allowed, - const TargetRegisterInfo *tri) { - assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch."); - assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch."); + const TargetRegisterInfo &TRI = + *G.getMetadata().MF.getSubtarget().getRegisterInfo(); - for (unsigned i = 0; i != vr1Allowed.size(); ++i) { - unsigned preg1 = vr1Allowed[i]; + const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs(); + const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs(); - for (unsigned j = 0; j != vr2Allowed.size(); ++j) { - unsigned preg2 = vr2Allowed[j]; + // Try looking the edge costs up in the IMatrixCache first. + IMatrixKey K(&NRegs, &MRegs); + IMatrixCache::iterator I = C.find(K); + if (I != C.end()) { + G.addEdgeBypassingCostAllocator(NId, MId, I->second); + return; + } - if (tri->regsOverlap(preg1, preg2)) { - costMat[i + 1][j + 1] = std::numeric_limits::infinity(); + PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0); + for (unsigned I = 0; I != NRegs.size(); ++I) { + unsigned PRegN = NRegs[I]; + for (unsigned J = 0; J != MRegs.size(); ++J) { + unsigned PRegM = MRegs[J]; + if (TRI.regsOverlap(PRegN, PRegM)) + M[I + 1][J + 1] = std::numeric_limits::infinity(); } } - } -} - -PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, - const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, - const RegSet &vregs) { - OwningPtr p(PBQPBuilder::build(mf, lis, loopInfo, vregs)); - PBQP::Graph &g = p->getGraph(); - - const TargetMachine &tm = mf->getTarget(); - CoalescerPair cp(*tm.getRegisterInfo()); + PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M)); + C[K] = G.getEdgeCostsPtr(EId); + } +}; - // Scan the machine function and add a coalescing cost whenever CoalescerPair - // gives the Ok. - for (MachineFunction::const_iterator mbbItr = mf->begin(), - mbbEnd = mf->end(); - mbbItr != mbbEnd; ++mbbItr) { - const MachineBasicBlock *mbb = &*mbbItr; - for (MachineBasicBlock::const_iterator miItr = mbb->begin(), - miEnd = mbb->end(); - miItr != miEnd; ++miItr) { - const MachineInstr *mi = &*miItr; +class Coalescing : public PBQPRAConstraint { +public: + void apply(PBQPRAGraph &G) override { + MachineFunction &MF = G.getMetadata().MF; + MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI; + CoalescerPair CP(*MF.getSubtarget().getRegisterInfo()); + + // Scan the machine function and add a coalescing cost whenever CoalescerPair + // gives the Ok. + for (const auto &MBB : MF) { + for (const auto &MI : MBB) { + + // Skip not-coalescable or already coalesced copies. + if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg()) + continue; - if (!cp.setRegisters(mi)) { - continue; // Not coalescable. - } + unsigned DstReg = CP.getDstReg(); + unsigned SrcReg = CP.getSrcReg(); - if (cp.getSrcReg() == cp.getDstReg()) { - continue; // Already coalesced. - } + const float Scale = 1.0f / MBFI.getEntryFreq(); + PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale; - unsigned dst = cp.getDstReg(), - src = cp.getSrcReg(); + if (CP.isPhys()) { + if (!MF.getRegInfo().isAllocatable(DstReg)) + continue; - const float copyFactor = 0.5; // Cost of copy relative to load. Current - // value plucked randomly out of the air. + PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg); - PBQP::PBQPNum cBenefit = - copyFactor * LiveIntervals::getSpillWeight(false, true, - loopInfo->getLoopDepth(mbb)); + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed = + G.getNodeMetadata(NId).getAllowedRegs(); - if (cp.isPhys()) { - if (!mf->getRegInfo().isAllocatable(dst)) { - continue; - } + unsigned PRegOpt = 0; + while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg) + ++PRegOpt; - const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src); - unsigned pregOpt = 0; - while (pregOpt < allowed.size() && allowed[pregOpt] != dst) { - ++pregOpt; - } - if (pregOpt < allowed.size()) { - ++pregOpt; // +1 to account for spill option. - PBQP::Graph::NodeItr node = p->getNodeForVReg(src); - addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit); - } - } else { - const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); - const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); - PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst); - PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src); - PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2); - if (edge == g.edgesEnd()) { - edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1, - allowed2->size() + 1, - 0)); + if (PRegOpt < Allowed.size()) { + PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId)); + NewCosts[PRegOpt + 1] -= CBenefit; + G.setNodeCosts(NId, std::move(NewCosts)); + } } else { - if (g.getEdgeNode1(edge) == node2) { - std::swap(node1, node2); - std::swap(allowed1, allowed2); + PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg); + PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg); + const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 = + &G.getNodeMetadata(N1Id).getAllowedRegs(); + const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 = + &G.getNodeMetadata(N2Id).getAllowedRegs(); + + PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id); + if (EId == G.invalidEdgeId()) { + PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1, + Allowed2->size() + 1, 0); + addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); + G.addEdge(N1Id, N2Id, std::move(Costs)); + } else { + if (G.getEdgeNode1Id(EId) == N2Id) { + std::swap(N1Id, N2Id); + std::swap(Allowed1, Allowed2); + } + PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId)); + addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); + G.setEdgeCosts(EId, std::move(Costs)); } } - - addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2, - cBenefit); } } } - return p.take(); -} - -void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec, - unsigned pregOption, - PBQP::PBQPNum benefit) { - costVec[pregOption] += -benefit; -} - -void PBQPBuilderWithCoalescing::addVirtRegCoalesce( - PBQP::Matrix &costMat, - const PBQPRAProblem::AllowedSet &vr1Allowed, - const PBQPRAProblem::AllowedSet &vr2Allowed, - PBQP::PBQPNum benefit) { - - assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch."); - assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch."); - - for (unsigned i = 0; i != vr1Allowed.size(); ++i) { - unsigned preg1 = vr1Allowed[i]; - for (unsigned j = 0; j != vr2Allowed.size(); ++j) { - unsigned preg2 = vr2Allowed[j]; +private: - if (preg1 == preg2) { - costMat[i + 1][j + 1] += -benefit; + void addVirtRegCoalesce( + PBQPRAGraph::RawMatrix &CostMat, + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1, + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2, + PBQP::PBQPNum Benefit) { + assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch."); + assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch."); + for (unsigned I = 0; I != Allowed1.size(); ++I) { + unsigned PReg1 = Allowed1[I]; + for (unsigned J = 0; J != Allowed2.size(); ++J) { + unsigned PReg2 = Allowed2[J]; + if (PReg1 == PReg2) + CostMat[I + 1][J + 1] -= Benefit; } } } -} +}; + +} // End anonymous namespace. + +// Out-of-line destructor/anchor for PBQPRAConstraint. +PBQPRAConstraint::~PBQPRAConstraint() {} +void PBQPRAConstraint::anchor() {} +void PBQPRAConstraintList::anchor() {} void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesCFG(); @@ -432,128 +447,235 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { //au.addRequiredID(SplitCriticalEdgesID); if (customPassID) au.addRequiredID(*customPassID); - au.addRequired(); au.addRequired(); au.addPreserved(); - au.addRequired(); - au.addPreserved(); + au.addRequired(); + au.addPreserved(); au.addRequired(); au.addPreserved(); + au.addRequired(); + au.addPreserved(); au.addRequired(); au.addPreserved(); MachineFunctionPass::getAnalysisUsage(au); } -void RegAllocPBQP::findVRegIntervalsToAlloc() { +void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, + LiveIntervals &LIS) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); // Iterate over all live ranges. - for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (mri->reg_nodbg_empty(Reg)) + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + if (MRI.reg_nodbg_empty(Reg)) continue; - LiveInterval *li = &lis->getInterval(Reg); + LiveInterval &LI = LIS.getInterval(Reg); // If this live interval is non-empty we will use pbqp to allocate it. // Empty intervals we allocate in a simple post-processing stage in // finalizeAlloc. - if (!li->empty()) { - vregsToAlloc.insert(li->reg); + if (!LI.empty()) { + VRegsToAlloc.insert(LI.reg); } else { - emptyIntervalVRegs.insert(li->reg); + EmptyIntervalVRegs.insert(LI.reg); + } + } +} + +static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, + const MachineFunction &MF) { + const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSR[i] != 0; ++i) + if (TRI.regsOverlap(reg, CSR[i])) + return true; + return false; +} + +void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, + Spiller &VRegSpiller) { + MachineFunction &MF = G.getMetadata().MF; + + LiveIntervals &LIS = G.getMetadata().LIS; + const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); + const TargetRegisterInfo &TRI = + *G.getMetadata().MF.getSubtarget().getRegisterInfo(); + + std::vector Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end()); + + while (!Worklist.empty()) { + unsigned VReg = Worklist.back(); + Worklist.pop_back(); + + const TargetRegisterClass *TRC = MRI.getRegClass(VReg); + LiveInterval &VRegLI = LIS.getInterval(VReg); + + // Record any overlaps with regmask operands. + BitVector RegMaskOverlaps; + LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps); + + // Compute an initial allowed set for the current vreg. + std::vector VRegAllowed; + ArrayRef RawPRegOrder = TRC->getRawAllocationOrder(MF); + for (unsigned I = 0; I != RawPRegOrder.size(); ++I) { + unsigned PReg = RawPRegOrder[I]; + if (MRI.isReserved(PReg)) + continue; + + // vregLI crosses a regmask operand that clobbers preg. + if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg)) + continue; + + // vregLI overlaps fixed regunit interference. + bool Interference = false; + for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) { + if (VRegLI.overlaps(LIS.getRegUnit(*Units))) { + Interference = true; + break; + } + } + if (Interference) + continue; + + // preg is usable for this virtual register. + VRegAllowed.push_back(PReg); + } + + // Check for vregs that have no allowed registers. These should be + // pre-spilled and the new vregs added to the worklist. + if (VRegAllowed.empty()) { + SmallVector NewVRegs; + spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); + for (auto NewVReg : NewVRegs) + Worklist.push_back(NewVReg); + continue; } + + PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0); + + // Tweak cost of callee saved registers, as using then force spilling and + // restoring them. This would only happen in the prologue / epilogue though. + for (unsigned i = 0; i != VRegAllowed.size(); ++i) + if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF)) + NodeCosts[1 + i] += 1.0; + + PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts)); + G.getNodeMetadata(NId).setVReg(VReg); + G.getNodeMetadata(NId).setAllowedRegs( + G.getMetadata().getAllowedRegs(std::move(VRegAllowed))); + G.getMetadata().setNodeIdForVReg(VReg, NId); + } +} + +void RegAllocPBQP::spillVReg(unsigned VReg, + SmallVectorImpl &NewIntervals, + MachineFunction &MF, LiveIntervals &LIS, + VirtRegMap &VRM, Spiller &VRegSpiller) { + + VRegsToAlloc.erase(VReg); + LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM); + VRegSpiller.spill(LRE); + + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + (void)TRI; + DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: " + << LRE.getParent().weight << ", New vregs: "); + + // Copy any newly inserted live intervals into the list of regs to + // allocate. + for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); + I != E; ++I) { + const LiveInterval &LI = LIS.getInterval(*I); + assert(!LI.empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " "); + VRegsToAlloc.insert(LI.reg); } + + DEBUG(dbgs() << ")\n"); } -bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, - const PBQP::Solution &solution) { +bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, + const PBQP::Solution &Solution, + VirtRegMap &VRM, + Spiller &VRegSpiller) { + MachineFunction &MF = G.getMetadata().MF; + LiveIntervals &LIS = G.getMetadata().LIS; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + (void)TRI; + // Set to true if we have any spills - bool anotherRoundNeeded = false; + bool AnotherRoundNeeded = false; // Clear the existing allocation. - vrm->clearAllVirt(); + VRM.clearAllVirt(); - const PBQP::Graph &g = problem.getGraph(); // Iterate over the nodes mapping the PBQP solution to a register // assignment. - for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(), - nodeEnd = g.nodesEnd(); - node != nodeEnd; ++node) { - unsigned vreg = problem.getVRegForNode(node); - unsigned alloc = solution.getSelection(node); - - if (problem.isPRegOption(vreg, alloc)) { - unsigned preg = problem.getPRegForOption(vreg, alloc); - DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> " - << tri->getName(preg) << "\n"); - assert(preg != 0 && "Invalid preg selected."); - vrm->assignVirt2Phys(vreg, preg); - } else if (problem.isSpillOption(vreg, alloc)) { - vregsToAlloc.erase(vreg); - SmallVector newSpills; - LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm); - spiller->spill(LRE); - - DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: " - << LRE.getParent().weight << ", New vregs: "); - - // Copy any newly inserted live intervals into the list of regs to - // allocate. - for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); - itr != end; ++itr) { - assert(!(*itr)->empty() && "Empty spill range."); - DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " "); - vregsToAlloc.insert((*itr)->reg); - } - - DEBUG(dbgs() << ")\n"); - - // We need another round if spill intervals were added. - anotherRoundNeeded |= !LRE.empty(); + for (auto NId : G.nodeIds()) { + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + unsigned AllocOption = Solution.getSelection(NId); + + if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) { + unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1]; + DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> " + << TRI.getName(PReg) << "\n"); + assert(PReg != 0 && "Invalid preg selected."); + VRM.assignVirt2Phys(VReg, PReg); } else { - llvm_unreachable("Unknown allocation option."); + // Spill VReg. If this introduces new intervals we'll need another round + // of allocation. + SmallVector NewVRegs; + spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); + AnotherRoundNeeded |= !NewVRegs.empty(); } } - return !anotherRoundNeeded; + return !AnotherRoundNeeded; } +void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, + LiveIntervals &LIS, + VirtRegMap &VRM) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); -void RegAllocPBQP::finalizeAlloc() const { // First allocate registers for the empty intervals. for (RegSet::const_iterator - itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end(); - itr != end; ++itr) { - LiveInterval *li = &lis->getInterval(*itr); + I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end(); + I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); - unsigned physReg = mri->getSimpleHint(li->reg); + unsigned PReg = MRI.getSimpleHint(LI.reg); - if (physReg == 0) { - const TargetRegisterClass *liRC = mri->getRegClass(li->reg); - physReg = liRC->getRawAllocationOrder(*mf).front(); + if (PReg == 0) { + const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); + PReg = RC.getRawAllocationOrder(MF).front(); } - vrm->assignVirt2Phys(li->reg, physReg); + VRM.assignVirt2Phys(LI.reg, PReg); } } +static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size, + unsigned NumInstr) { + // All intervals have a spill weight that is mostly proportional to the number + // of uses, with uses in loops having a bigger weight. + return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1); +} + bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { + LiveIntervals &LIS = getAnalysis(); + MachineBlockFrequencyInfo &MBFI = + getAnalysis(); - mf = &MF; - tm = &mf->getTarget(); - tri = tm->getRegisterInfo(); - tii = tm->getInstrInfo(); - mri = &mf->getRegInfo(); + calculateSpillWeightsAndHints(LIS, MF, getAnalysis(), MBFI, + normalizePBQPSpillWeight); - lis = &getAnalysis(); - lss = &getAnalysis(); - loopInfo = &getAnalysis(); + VirtRegMap &VRM = getAnalysis(); - vrm = &getAnalysis(); - spiller.reset(createInlineSpiller(*this, MF, *vrm)); + std::unique_ptr VRegSpiller(createInlineSpiller(*this, MF, VRM)); - mri->freezeReservedRegs(MF); + MF.getRegInfo().freezeReservedRegs(MF); - DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n"); + DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); // Allocator main loop: // @@ -565,73 +687,145 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // This process is continued till no more spills are generated. // Find the vreg intervals in need of allocation. - findVRegIntervalsToAlloc(); + findVRegIntervalsToAlloc(MF, LIS); #ifndef NDEBUG - const Function* func = mf->getFunction(); - std::string fqn = - func->getParent()->getModuleIdentifier() + "." + - func->getName().str(); + const Function &F = *MF.getFunction(); + std::string FullyQualifiedName = + F.getParent()->getModuleIdentifier() + "." + F.getName().str(); #endif // If there are non-empty intervals allocate them using pbqp. - if (!vregsToAlloc.empty()) { + if (!VRegsToAlloc.empty()) { + + const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); + std::unique_ptr ConstraintsRoot = + llvm::make_unique(); + ConstraintsRoot->addConstraint(llvm::make_unique()); + ConstraintsRoot->addConstraint(llvm::make_unique()); + if (PBQPCoalescing) + ConstraintsRoot->addConstraint(llvm::make_unique()); + ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints()); - bool pbqpAllocComplete = false; - unsigned round = 0; + bool PBQPAllocComplete = false; + unsigned Round = 0; - while (!pbqpAllocComplete) { - DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); + while (!PBQPAllocComplete) { + DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); - OwningPtr problem( - builder->build(mf, lis, loopInfo, vregsToAlloc)); + PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); + initializeGraph(G, VRM, *VRegSpiller); + ConstraintsRoot->apply(G); #ifndef NDEBUG - if (pbqpDumpGraphs) { - std::ostringstream rs; - rs << round; - std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph"); - std::string tmp; - raw_fd_ostream os(graphFileName.c_str(), tmp); - DEBUG(dbgs() << "Dumping graph for round " << round << " to \"" - << graphFileName << "\"\n"); - problem->getGraph().dump(os); + if (PBQPDumpGraphs) { + std::ostringstream RS; + RS << Round; + std::string GraphFileName = FullyQualifiedName + "." + RS.str() + + ".pbqpgraph"; + std::error_code EC; + raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); + DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" + << GraphFileName << "\"\n"); + G.dump(OS); } #endif - PBQP::Solution solution = - PBQP::HeuristicSolver::solve( - problem->getGraph()); - - pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution); - - ++round; + PBQP::Solution Solution = PBQP::RegAlloc::solve(G); + PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller); + ++Round; } } // Finalise allocation, allocate empty ranges. - finalizeAlloc(); - vregsToAlloc.clear(); - emptyIntervalVRegs.clear(); + finalizeAlloc(MF, LIS, VRM); + VRegsToAlloc.clear(); + EmptyIntervalVRegs.clear(); - DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n"); return true; } -FunctionPass* llvm::createPBQPRegisterAllocator( - OwningPtr &builder, - char *customPassID) { - return new RegAllocPBQP(builder, customPassID); +namespace { +// A helper class for printing node and register info in a consistent way +class PrintNodeInfo { +public: + typedef PBQP::RegAlloc::PBQPRAGraph Graph; + typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId; + + PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {} + + void print(raw_ostream &OS) const { + const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); + OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; + } + +private: + const Graph &G; + NodeId NId; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) { + PR.print(OS); + return OS; +} +} // anonymous namespace + +void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { + for (auto NId : nodeIds()) { + const Vector &Costs = getNodeCosts(NId); + assert(Costs.getLength() != 0 && "Empty vector in graph."); + OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n'; + } + OS << '\n'; + + for (auto EId : edgeIds()) { + NodeId N1Id = getEdgeNode1Id(EId); + NodeId N2Id = getEdgeNode2Id(EId); + assert(N1Id != N2Id && "PBQP graphs should not have self-edges."); + const Matrix &M = getEdgeCosts(EId); + assert(M.getRows() != 0 && "No rows in matrix."); + assert(M.getCols() != 0 && "No cols in matrix."); + OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / "; + OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n"; + OS << M << '\n'; + } +} + +void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); } + +void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const { + OS << "graph {\n"; + for (auto NId : nodeIds()) { + OS << " node" << NId << " [ label=\"" + << PrintNodeInfo(NId, *this) << "\\n" + << getNodeCosts(NId) << "\" ]\n"; + } + + OS << " edge [ len=" << nodeIds().size() << " ]\n"; + for (auto EId : edgeIds()) { + OS << " node" << getEdgeNode1Id(EId) + << " -- node" << getEdgeNode2Id(EId) + << " [ label=\""; + const Matrix &EdgeCosts = getEdgeCosts(EId); + for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) { + OS << EdgeCosts.getRowAsVector(i) << "\\n"; + } + OS << "\" ]\n"; + } + OS << "}\n"; +} + +FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { + return new RegAllocPBQP(customPassID); } FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { - OwningPtr Builder; - if (pbqpCoalescing) - Builder.reset(new PBQPBuilderWithCoalescing()); - else - Builder.reset(new PBQPBuilder()); - return createPBQPRegisterAllocator(Builder); + return createPBQPRegisterAllocator(); } #undef DEBUG_TYPE