X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FRegAllocGreedy.cpp;h=26f42c93323ad1190dbb0a15e46f78a8acb10a5e;hb=d3c29ac587e8d4a1590a0b3c2efa5f1ce35e5c90;hp=3c3f622759f1715b78ebe7e735205f9fad4f7f20;hpb=126099ea231574040b73673c2a94d91bcff19556;p=oota-llvm.git diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 3c3f622759f..26f42c93323 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "InterferenceCache.h" @@ -37,16 +36,21 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/PassAnalysisSupport.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); STATISTIC(NumEvicted, "Number of interferences evicted"); @@ -71,6 +75,23 @@ static cl::opt LastChanceRecoloringMaxInterference( " interference at a time"), cl::init(8)); +static cl::opt +ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, + cl::desc("Exhaustive Search for registers bypassing the depth " + "and interference cutoffs of last chance recoloring")); + +static cl::opt EnableLocalReassignment( + "enable-local-reassign", cl::Hidden, + cl::desc("Local reassignment can yield better allocation decisions, but " + "may be compile time intensive"), + cl::init(false)); + +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt +CSRFirstTimeCost("regalloc-csr-first-time-cost", + cl::desc("Cost for first time use of callee-saved register."), + cl::init(0), cl::Hidden); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -101,7 +122,7 @@ class RAGreedy : public MachineFunctionPass, LiveDebugVariables *DebugVars; // state - OwningPtr SpillerInstance; + std::unique_ptr SpillerInstance; PQueue Queue; unsigned NextCascade; @@ -141,6 +162,22 @@ class RAGreedy : public MachineFunctionPass, RS_Done }; + // Enum CutOffStage to keep a track whether the register allocation failed + // because of the cutoffs encountered in last chance recoloring. + // Note: This is used as bitmask. New value should be next power of 2. + enum CutOffStage { + // No cutoffs encountered + CO_None = 0, + + // lcr-max-depth cutoff encountered + CO_Depth = 1, + + // lcr-max-interf cutoff encountered + CO_Interf = 2 + }; + + uint8_t CutOffInfo; + #ifndef NDEBUG static const char *const StageName[]; #endif @@ -190,15 +227,14 @@ class RAGreedy : public MachineFunctionPass, void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } bool operator<(const EvictionCost &O) const { - if (BrokenHints != O.BrokenHints) - return BrokenHints < O.BrokenHints; - return MaxWeight < O.MaxWeight; + return std::tie(BrokenHints, MaxWeight) < + std::tie(O.BrokenHints, O.MaxWeight); } }; // splitting state. - OwningPtr SA; - OwningPtr SE; + std::unique_ptr SA; + std::unique_ptr SE; /// Cached per-block interference maps InterferenceCache IntfCache; @@ -247,31 +283,41 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector GlobalCand; - enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u }; + enum : unsigned { NoCand = ~0u }; /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to /// NoCand which indicates the stack interval. SmallVector BundleCand; + /// Callee-save register cost, calculated once per machine function. + BlockFrequency CSRCost; + + /// Run or not the local reassignment heuristic. This information is + /// obtained from the TargetSubtargetInfo. + bool EnableLocalReassign; + + /// Set of broken hints that may be reconciled later because of eviction. + SmallSetVector SetOfBrokenHints; + public: RAGreedy(); /// Return the pass name. - virtual const char* getPassName() const { + const char* getPassName() const override { return "Greedy Register Allocator"; } /// RAGreedy analysis usage. - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory(); - virtual Spiller &spiller() { return *SpillerInstance; } - virtual void enqueue(LiveInterval *LI); - virtual LiveInterval *dequeue(); - virtual unsigned selectOrSplit(LiveInterval&, - SmallVectorImpl&); + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + Spiller &spiller() override { return *SpillerInstance; } + void enqueue(LiveInterval *LI) override; + LiveInterval *dequeue() override; + unsigned selectOrSplit(LiveInterval&, SmallVectorImpl&) override; + void aboutToRemoveInterval(LiveInterval &) override; /// Perform register allocation. - virtual bool runOnMachineFunction(MachineFunction &mf); + bool runOnMachineFunction(MachineFunction &mf) override; static char ID; @@ -279,9 +325,9 @@ private: unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl &, SmallVirtRegSet &, unsigned = 0); - bool LRE_CanEraseVirtReg(unsigned); - void LRE_WillShrinkVirtReg(unsigned); - void LRE_DidCloneVirtReg(unsigned, unsigned); + bool LRE_CanEraseVirtReg(unsigned) override; + void LRE_WillShrinkVirtReg(unsigned) override; + void LRE_DidCloneVirtReg(unsigned, unsigned) override; void enqueue(PQueue &CurQueue, LiveInterval *LI); LiveInterval *dequeue(PQueue &CurQueue); @@ -308,6 +354,21 @@ private: SmallVectorImpl&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + /// Calculate cost of region splitting. + unsigned calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, bool IgnoreCSR); + /// Perform region splitting. + unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, + SmallVectorImpl &NewVRegs); + /// Check other options before using a callee-saved register for the first + /// time. + unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, + unsigned PhysReg, unsigned &CostPerUseLimit, + SmallVectorImpl &NewVRegs); + void initializeCSRCost(); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, @@ -321,6 +382,24 @@ private: SmallVirtRegSet &, unsigned); bool tryRecoloringCandidates(PQueue &, SmallVectorImpl &, SmallVirtRegSet &, unsigned); + void tryHintRecoloring(LiveInterval &); + void tryHintsRecoloring(); + + /// Model the information carried by one end of a copy. + struct HintInfo { + /// The frequency of the copy. + BlockFrequency Freq; + /// The virtual register or physical register. + unsigned Reg; + /// Its currently assigned register. + /// In case of a physical register Reg == PhysReg. + unsigned PhysReg; + HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) + : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} + }; + typedef SmallVector HintsInfo; + BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); + void collectHintInfo(unsigned, HintsInfo &); }; } // end anonymous namespace @@ -396,7 +475,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { if (VRM->hasPhys(VirtReg)) { - Matrix->unassign(LIS->getInterval(VirtReg)); + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + aboutToRemoveInterval(LI); return true; } // Unassigned virtreg is probably in the priority queue. @@ -429,7 +510,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { } void RAGreedy::releaseMemory() { - SpillerInstance.reset(0); + SpillerInstance.reset(); ExtraRegInfo.clear(); GlobalCand.clear(); } @@ -454,21 +535,28 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // everything else has been allocated. Prio = Size; } else { - if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() && + // Giant live ranges fall back to the global assignment heuristic, which + // prevents excessive spilling in pathological cases. + bool ReverseLocal = TRI->reverseLocalAssignment(); + const TargetRegisterClass &RC = *MRI->getRegClass(Reg); + bool ForceGlobal = !ReverseLocal && + (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); + + if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { // Allocate original local ranges in linear instruction order. Since they // are singly defined, this produces optimal coloring in the absence of // global interference and other constraints. - if (!TRI->reverseLocalAssignment()) + if (!ReverseLocal) Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); else { // Allocating bottom up may allow many short LRGs to be assigned first // to one of the cheap registers. This could be much faster for very // large blocks on targets with many physical registers. - Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); + Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); } - } - else { + Prio |= RC.AllocationPriority << 24; + } else { // Allocate global and split ranges in long->short order. Long ranges that // don't fit should be spilled (or split) ASAP so they don't create // interference. Mark a bit to prioritize global above local ranges. @@ -490,7 +578,7 @@ LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { if (CurQueue.empty()) - return 0; + return nullptr; LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); CurQueue.pop(); return LI; @@ -601,7 +689,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, } /// canEvictInterference - Return true if all interferences between VirtReg and -/// PhysReg can be evicted. When OnlyCheap is set, don't do anything +/// PhysReg can be evicted. /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. @@ -679,7 +767,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Evicting another local live range in this case could lead to suboptimal // coloring. if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && - !canReassign(*Intf, PhysReg)) { + (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { return false; } } @@ -754,7 +842,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); unsigned MinCost = RegClassInfo.getMinCost(RC); if (MinCost >= CostPerUseLimit) { - DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost + DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost << ", no cheaper registers to be found.\n"); return 0; } @@ -904,14 +992,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, BCS[B].Exit = SpillPlacement::PrefSpill; if (++B == GroupSize) { - ArrayRef Array(BCS, B); - SpillPlacer->addConstraints(Array); + SpillPlacer->addConstraints(makeArrayRef(BCS, B)); B = 0; } } - ArrayRef Array(BCS, B); - SpillPlacer->addConstraints(Array); + SpillPlacer->addConstraints(makeArrayRef(BCS, B)); SpillPlacer->addLinks(makeArrayRef(TBS, T)); } @@ -950,7 +1036,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Compute through constraints from the interference, or assume that all // through blocks prefer spilling when forming compact regions. - ArrayRef NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); + auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); if (Cand.PhysReg) addThroughConstraints(Cand.Intf, NewBlocks); else @@ -1227,9 +1313,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { unsigned NumCands = 0; - unsigned BestCand = NoCand; BlockFrequency BestCost; - SmallVector UsedCands; // Check if we can split this live range around a compact region. bool HasCompact = calcCompactRegion(GlobalCand.front()); @@ -1245,8 +1329,29 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } + unsigned BestCand = + calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, + false/*IgnoreCSR*/); + + // No solutions found, fall back to single block splitting. + if (!HasCompact && BestCand == NoCand) + return 0; + + return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); +} + +unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, + bool IgnoreCSR) { + unsigned BestCand = NoCand; Order.rewind(); while (unsigned PhysReg = Order.next()) { + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (IgnoreCSR && !MRI->isPhysRegUsed(CSR)) + continue; + // Discard bad candidates before we run out of interference cache cursors. // This will only affect register classes with a lot of registers (>32). if (NumCands == IntfCache.getMaxCursors()) { @@ -1313,11 +1418,13 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } ++NumCands; } + return BestCand; +} - // No solutions found, fall back to single block splitting. - if (!HasCompact && BestCand == NoCand) - return 0; - +unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, + SmallVectorImpl &NewVRegs) { + SmallVector UsedCands; // Prepare split editor. LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); @@ -1448,7 +1555,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); - const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); + const TargetRegisterClass *SuperRC = + TRI->getLargestLegalSuperClass(CurRC, *MF); unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); // Split around every non-copy instruction if this split will relax // the constraints on the virtual register. @@ -1707,9 +1815,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // instructions. // // Try to guess the size of the new interval. - const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1), - Uses[SplitBefore].distance(Uses[SplitAfter]) + - (LiveBefore + LiveAfter)*SlotIndex::InstrDist); + const float EstWeight = normalizeSpillWeight( + blockFreq * (NewGaps + 1), + Uses[SplitBefore].distance(Uses[SplitAfter]) + + (LiveBefore + LiveAfter) * SlotIndex::InstrDist, + 1); // Would this split be possible to allocate? // Never allocate all gaps, we wouldn't be making progress. DEBUG(dbgs() << " w=" << EstWeight); @@ -1865,8 +1975,9 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, // If there is LastChanceRecoloringMaxInterference or more interferences, // chances are one would not be recolorable. if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= - LastChanceRecoloringMaxInterference) { + LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { DEBUG(dbgs() << "Early abort: too many interferences.\n"); + CutOffInfo |= CO_Interf; return false; } for (unsigned i = Q.interferingVRegs().size(); i; --i) { @@ -1916,7 +2027,7 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, /// R3 is available. /// Recoloring => vC = R1, vA = R2, vB = R3 /// -/// \p Order defines the prefered allocation order for \p VirtReg. +/// \p Order defines the preferred allocation order for \p VirtReg. /// \p NewRegs will contain any new virtual register that have been created /// (split, spill) during the process and that must be assigned. /// \p FixedRegisters contains all the virtual registers that cannot be @@ -1937,8 +2048,9 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // We may want to reconsider that if we end up with a too large search space // for target with hundreds of registers. // Indeed, in that case we may want to cut the search space earlier. - if (Depth >= LastChanceRecoloringMaxDepth) { + if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { DEBUG(dbgs() << "Abort because max depth has been reached.\n"); + CutOffInfo |= CO_Depth; return ~0u; } @@ -2063,18 +2175,291 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { + CutOffInfo = CO_None; + LLVMContext &Ctx = MF->getFunction()->getContext(); SmallVirtRegSet FixedRegisters; - return selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + if (Reg == ~0U && (CutOffInfo != CO_None)) { + uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); + if (CutOffEncountered == CO_Depth) + Ctx.emitError("register allocation failed: maximum depth for recoloring " + "reached. Use -fexhaustive-register-search to skip " + "cutoffs"); + else if (CutOffEncountered == CO_Interf) + Ctx.emitError("register allocation failed: maximum interference for " + "recoloring reached. Use -fexhaustive-register-search " + "to skip cutoffs"); + else if (CutOffEncountered == (CO_Depth | CO_Interf)) + Ctx.emitError("register allocation failed: maximum interference and " + "depth for recoloring reached. Use " + "-fexhaustive-register-search to skip cutoffs"); + } + return Reg; +} + +/// Using a CSR for the first time has a cost because it causes push|pop +/// to be added to prologue|epilogue. Splitting a cold section of the live +/// range can have lower cost than using the CSR for the first time; +/// Spilling a live range in the cold path can have lower cost than using +/// the CSR for the first time. Returns the physical register if we decide +/// to use the CSR; otherwise return 0. +unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, + AllocationOrder &Order, + unsigned PhysReg, + unsigned &CostPerUseLimit, + SmallVectorImpl &NewVRegs) { + if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { + // We choose spill over using the CSR for the first time if the spill cost + // is lower than CSRCost. + SA->analyze(&VirtReg); + if (calcSpillCost() >= CSRCost) + return PhysReg; + + // We are going to spill, set CostPerUseLimit to 1 to make sure that + // we will not use a callee-saved register in tryEvict. + CostPerUseLimit = 1; + return 0; + } + if (getStage(VirtReg) < RS_Split) { + // We choose pre-splitting over using the CSR for the first time if + // the cost of splitting is lower than CSRCost. + SA->analyze(&VirtReg); + unsigned NumCands = 0; + BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. + unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, + NumCands, true /*IgnoreCSR*/); + if (BestCand == NoCand) + // Use the CSR if we can't find a region split below CSRCost. + return PhysReg; + + // Perform the actual pre-splitting. + doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); + return 0; + } + return PhysReg; +} + +void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { + // Do not keep invalid information around. + SetOfBrokenHints.remove(&LI); +} + +void RAGreedy::initializeCSRCost() { + // We use the larger one out of the command-line option and the value report + // by TRI. + CSRCost = BlockFrequency( + std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); + if (!CSRCost.getFrequency()) + return; + + // Raw cost is relative to Entry == 2^14; scale it appropriately. + uint64_t ActualEntry = MBFI->getEntryFreq(); + if (!ActualEntry) { + CSRCost = 0; + return; + } + uint64_t FixedEntry = 1 << 14; + if (ActualEntry < FixedEntry) + CSRCost *= BranchProbability(ActualEntry, FixedEntry); + else if (ActualEntry <= UINT32_MAX) + // Invert the fraction and divide. + CSRCost /= BranchProbability(FixedEntry, ActualEntry); + else + // Can't use BranchProbability in general, since it takes 32-bit numbers. + CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); +} + +/// \brief Collect the hint info for \p Reg. +/// The results are stored into \p Out. +/// \p Out is not cleared before being populated. +void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { + for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { + if (!Instr.isFullCopy()) + continue; + // Look for the other end of the copy. + unsigned OtherReg = Instr.getOperand(0).getReg(); + if (OtherReg == Reg) { + OtherReg = Instr.getOperand(1).getReg(); + if (OtherReg == Reg) + continue; + } + // Get the current assignment. + unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) + ? OtherReg + : VRM->getPhys(OtherReg); + // Push the collected information. + Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, + OtherPhysReg)); + } +} + +/// \brief Using the given \p List, compute the cost of the broken hints if +/// \p PhysReg was used. +/// \return The cost of \p List for \p PhysReg. +BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, + unsigned PhysReg) { + BlockFrequency Cost = 0; + for (const HintInfo &Info : List) { + if (Info.PhysReg != PhysReg) + Cost += Info.Freq; + } + return Cost; +} + +/// \brief Using the register assigned to \p VirtReg, try to recolor +/// all the live ranges that are copy-related with \p VirtReg. +/// The recoloring is then propagated to all the live-ranges that have +/// been recolored and so on, until no more copies can be coalesced or +/// it is not profitable. +/// For a given live range, profitability is determined by the sum of the +/// frequencies of the non-identity copies it would introduce with the old +/// and new register. +void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { + // We have a broken hint, check if it is possible to fix it by + // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted + // some register and PhysReg may be available for the other live-ranges. + SmallSet Visited; + SmallVector RecoloringCandidates; + HintsInfo Info; + unsigned Reg = VirtReg.reg; + unsigned PhysReg = VRM->getPhys(Reg); + // Start the recoloring algorithm from the input live-interval, then + // it will propagate to the ones that are copy-related with it. + Visited.insert(Reg); + RecoloringCandidates.push_back(Reg); + + DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' + << PrintReg(PhysReg, TRI) << ")\n"); + + do { + Reg = RecoloringCandidates.pop_back_val(); + + // We cannot recolor physcal register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); + + // Get the live interval mapped with this virtual register to be able + // to check for the interference with the new color. + LiveInterval &LI = LIS->getInterval(Reg); + unsigned CurrPhys = VRM->getPhys(Reg); + // Check that the new color matches the register class constraints and + // that it is free for this live range. + if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || + Matrix->checkInterference(LI, PhysReg))) + continue; + + DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) + << ") is recolorable.\n"); + + // Gather the hint info. + Info.clear(); + collectHintInfo(Reg, Info); + // Check if recoloring the live-range will increase the cost of the + // non-identity copies. + if (CurrPhys != PhysReg) { + DEBUG(dbgs() << "Checking profitability:\n"); + BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); + BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); + DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() + << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); + if (OldCopiesCost < NewCopiesCost) { + DEBUG(dbgs() << "=> Not profitable.\n"); + continue; + } + // At this point, the cost is either cheaper or equal. If it is + // equal, we consider this is profitable because it may expose + // more recoloring opportunities. + DEBUG(dbgs() << "=> Profitable.\n"); + // Recolor the live-range. + Matrix->unassign(LI); + Matrix->assign(LI, PhysReg); + } + // Push all copy-related live-ranges to keep reconciling the broken + // hints. + for (const HintInfo &HI : Info) { + if (Visited.insert(HI.Reg).second) + RecoloringCandidates.push_back(HI.Reg); + } + } while (!RecoloringCandidates.empty()); +} + +/// \brief Try to recolor broken hints. +/// Broken hints may be repaired by recoloring when an evicted variable +/// freed up a register for a larger live-range. +/// Consider the following example: +/// BB1: +/// a = +/// b = +/// BB2: +/// ... +/// = b +/// = a +/// Let us assume b gets split: +/// BB1: +/// a = +/// b = +/// BB2: +/// c = b +/// ... +/// d = c +/// = d +/// = a +/// Because of how the allocation work, b, c, and d may be assigned different +/// colors. Now, if a gets evicted later: +/// BB1: +/// a = +/// st a, SpillSlot +/// b = +/// BB2: +/// c = b +/// ... +/// d = c +/// = d +/// e = ld SpillSlot +/// = e +/// This is likely that we can assign the same register for b, c, and d, +/// getting rid of 2 copies. +void RAGreedy::tryHintsRecoloring() { + for (LiveInterval *LI : SetOfBrokenHints) { + assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && + "Recoloring is possible only for virtual registers"); + // Some dead defs may be around (e.g., because of debug uses). + // Ignore those. + if (!VRM->hasPhys(LI->reg)) + continue; + tryHintRecoloring(*LI); + } } unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs, SmallVirtRegSet &FixedRegisters, unsigned Depth) { + unsigned CostPerUseLimit = ~0u; // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); - if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) - return PhysReg; + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { + // We check other options if we are using a CSR for the first time. + bool CSRFirstUse = false; + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (!MRI->isPhysRegUsed(CSR)) + CSRFirstUse = true; + + // When NewVRegs is not empty, we may have made decisions such as evicting + // a virtual register, go with the earlier decisions and use the physical + // register. + if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) { + unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, + CostPerUseLimit, NewVRegs); + if (CSRReg || !NewVRegs.empty()) + // Return now if we decide to use a CSR or create new vregs due to + // pre-splitting. + return CSRReg; + } else + return PhysReg; + } LiveRangeStage Stage = getStage(VirtReg); DEBUG(dbgs() << StageName[Stage] @@ -2084,8 +2469,18 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. if (Stage != RS_Split) - if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) + if (unsigned PhysReg = + tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { + unsigned Hint = MRI->getSimpleHint(VirtReg.reg); + // If VirtReg has a hint and that hint is broken record this + // virtual register as a recoloring candidate for broken hint. + // Indeed, since we evicted a variable in its neighborhood it is + // likely we can at least partially recolor some of the + // copy-related live-ranges. + if (Hint && Hint != PhysReg) + SetOfBrokenHints.insert(&VirtReg); return PhysReg; + } assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); @@ -2129,9 +2524,14 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; - TRI = MF->getTarget().getRegisterInfo(); - TII = MF->getTarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); RCI.runOnMachineFunction(mf); + + EnableLocalReassign = EnableLocalReassignment || + MF->getSubtarget().enableRALocalReassignment( + MF->getTarget().getOptLevel()); + if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); @@ -2147,6 +2547,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis(); DebugVars = &getAnalysis(); + initializeCSRCost(); + calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); DEBUG(LIS->dump()); @@ -2158,8 +2560,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { NextCascade = 1; IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. + SetOfBrokenHints.clear(); allocatePhysRegs(); + tryHintsRecoloring(); releaseMemory(); return true; }