X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FRegAllocGreedy.cpp;h=945cb9e2c993c05bd193912fd671e95727538a10;hb=eecbba2d64a093f61469dcc37b0fab5188f50012;hp=901b993eea80a93f6e06dfb47c4eea0e97ad9f87;hpb=5599fde88e00ef850190c59e050fa78e6643b913;p=oota-llvm.git diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 901b993eea8..945cb9e2c99 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -86,6 +86,14 @@ static cl::opt EnableLocalReassignment( "may be compile time intensive"), cl::init(false)); +static cl::opt EnableDeferredSpilling( + "enable-deferred-spilling", cl::Hidden, + cl::desc("Instead of spilling a variable right away, defer the actual " + "code insertion to the end of the allocation. That way the " + "allocator might still find a suitable coloring for this " + "variable because of other evicted variables."), + cl::init(false)); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt CSRFirstTimeCost("regalloc-csr-first-time-cost", @@ -157,6 +165,11 @@ class RAGreedy : public MachineFunctionPass, /// Live range will be spilled. No more splitting will be attempted. RS_Spill, + + /// Live range is in memory. Because of other evictions, it might get moved + /// in a register in the end. + RS_Memory, + /// There is nothing more we can do to this live range. Abort compilation /// if it can't be assigned. RS_Done @@ -296,6 +309,9 @@ class RAGreedy : public MachineFunctionPass, /// obtained from the TargetSubtargetInfo. bool EnableLocalReassign; + /// Set of broken hints that may be reconciled later because of eviction. + SmallSetVector SetOfBrokenHints; + public: RAGreedy(); @@ -311,6 +327,7 @@ public: void enqueue(LiveInterval *LI) override; LiveInterval *dequeue() override; unsigned selectOrSplit(LiveInterval&, SmallVectorImpl&) override; + void aboutToRemoveInterval(LiveInterval &) override; /// Perform register allocation. bool runOnMachineFunction(MachineFunction &mf) override; @@ -378,6 +395,26 @@ private: SmallVirtRegSet &, unsigned); bool tryRecoloringCandidates(PQueue &, SmallVectorImpl &, SmallVirtRegSet &, unsigned); + void tryHintRecoloring(LiveInterval &); + void tryHintsRecoloring(); + + /// Model the information carried by one end of a copy. + struct HintInfo { + /// The frequency of the copy. + BlockFrequency Freq; + /// The virtual register or physical register. + unsigned Reg; + /// Its currently assigned register. + /// In case of a physical register Reg == PhysReg. + unsigned PhysReg; + HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) + : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} + }; + typedef SmallVector HintsInfo; + BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); + void collectHintInfo(unsigned, HintsInfo &); + + bool isUnusedCalleeSavedReg(unsigned PhysReg) const; }; } // end anonymous namespace @@ -390,6 +427,7 @@ const char *const RAGreedy::StageName[] = { "RS_Split", "RS_Split2", "RS_Spill", + "RS_Memory", "RS_Done" }; #endif @@ -423,8 +461,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -453,7 +491,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { if (VRM->hasPhys(VirtReg)) { - Matrix->unassign(LIS->getInterval(VirtReg)); + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + aboutToRemoveInterval(LI); return true; } // Unassigned virtreg is probably in the priority queue. @@ -486,7 +526,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { } void RAGreedy::releaseMemory() { - SpillerInstance.reset(nullptr); + SpillerInstance.reset(); ExtraRegInfo.clear(); GlobalCand.clear(); } @@ -510,12 +550,20 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Unsplit ranges that couldn't be allocated immediately are deferred until // everything else has been allocated. Prio = Size; + } else if (ExtraRegInfo[Reg].Stage == RS_Memory) { + // Memory operand should be considered last. + // Change the priority such that Memory operand are assigned in + // the reverse order that they came in. + // TODO: Make this a member variable and probably do something about hints. + static unsigned MemOp = 0; + Prio = MemOp++; } else { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); - bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() && - (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); + const TargetRegisterClass &RC = *MRI->getRegClass(Reg); + bool ForceGlobal = !ReverseLocal && + (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { @@ -528,10 +576,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Allocating bottom up may allow many short LRGs to be assigned first // to one of the cheap registers. This could be much faster for very // large blocks on targets with many physical registers. - Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); + Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); } - } - else { + Prio |= RC.AllocationPriority << 24; + } else { // Allocate global and split ranges in long->short order. Long ranges that // don't fit should be spilled (or split) ASAP so they don't create // interference. Mark a bit to prioritize global above local ranges. @@ -610,7 +658,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, //===----------------------------------------------------------------------===// unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); unsigned PhysReg; while ((PhysReg = Order.next())) { if (PhysReg == PrevReg) @@ -791,6 +839,16 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, } } +/// Returns true if the given \p PhysReg is a callee saved register and has not +/// been used for allocation yet. +bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const { + unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); + if (CSR == 0) + return false; + + return !Matrix->isPhysRegUsed(PhysReg); +} + /// tryEvict - Try to evict all interferences for a physreg. /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. @@ -817,7 +875,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); unsigned MinCost = RegClassInfo.getMinCost(RC); if (MinCost >= CostPerUseLimit) { - DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost + DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost << ", no cheaper registers to be found.\n"); return 0; } @@ -836,13 +894,12 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, continue; // The first use of a callee-saved register in a function has cost 1. // Don't start using a CSR when the CostPerUseLimit is low. - if (CostPerUseLimit == 1) - if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) - if (!MRI->isPhysRegUsed(CSR)) { - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " - << PrintReg(CSR, TRI) << '\n'); - continue; - } + if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " + << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) + << '\n'); + continue; + } if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) continue; @@ -967,14 +1024,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, BCS[B].Exit = SpillPlacement::PrefSpill; if (++B == GroupSize) { - ArrayRef Array(BCS, B); - SpillPlacer->addConstraints(Array); + SpillPlacer->addConstraints(makeArrayRef(BCS, B)); B = 0; } } - ArrayRef Array(BCS, B); - SpillPlacer->addConstraints(Array); + SpillPlacer->addConstraints(makeArrayRef(BCS, B)); SpillPlacer->addLinks(makeArrayRef(TBS, T)); } @@ -1013,7 +1068,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Compute through constraints from the interference, or assume that all // through blocks prefer spilling when forming compact regions. - ArrayRef NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); + auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); if (Cand.PhysReg) addThroughConstraints(Cand.Intf, NewBlocks); else @@ -1325,9 +1380,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, unsigned BestCand = NoCand; Order.rewind(); while (unsigned PhysReg = Order.next()) { - if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) - if (IgnoreCSR && !MRI->isPhysRegUsed(CSR)) - continue; + if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg)) + continue; // Discard bad candidates before we run out of interference cache cursors. // This will only affect register classes with a lot of registers (>32). @@ -1532,7 +1586,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); - const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); + const TargetRegisterClass *SuperRC = + TRI->getLargestLegalSuperClass(CurRC, *MF); unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); // Split around every non-copy instruction if this split will relax // the constraints on the virtual register. @@ -1791,9 +1846,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // instructions. // // Try to guess the size of the new interval. - const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1), - Uses[SplitBefore].distance(Uses[SplitAfter]) + - (LiveBefore + LiveAfter)*SlotIndex::InstrDist); + const float EstWeight = normalizeSpillWeight( + blockFreq * (NewGaps + 1), + Uses[SplitBefore].distance(Uses[SplitAfter]) + + (LiveBefore + LiveAfter) * SlotIndex::InstrDist, + 1); // Would this split be possible to allocate? // Never allocate all gaps, we wouldn't be making progress. DEBUG(dbgs() << " w=" << EstWeight); @@ -2108,7 +2165,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, unsigned ItVirtReg = (*It)->reg; if (VRM->hasPhys(ItVirtReg)) Matrix->unassign(**It); - Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]); + unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg]; + Matrix->assign(**It, ItPhysReg); } } @@ -2213,6 +2271,11 @@ unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, return PhysReg; } +void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { + // Do not keep invalid information around. + SetOfBrokenHints.remove(&LI); +} + void RAGreedy::initializeCSRCost() { // We use the larger one out of the command-line option and the value report // by TRI. @@ -2238,24 +2301,183 @@ void RAGreedy::initializeCSRCost() { CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); } +/// \brief Collect the hint info for \p Reg. +/// The results are stored into \p Out. +/// \p Out is not cleared before being populated. +void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { + for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { + if (!Instr.isFullCopy()) + continue; + // Look for the other end of the copy. + unsigned OtherReg = Instr.getOperand(0).getReg(); + if (OtherReg == Reg) { + OtherReg = Instr.getOperand(1).getReg(); + if (OtherReg == Reg) + continue; + } + // Get the current assignment. + unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) + ? OtherReg + : VRM->getPhys(OtherReg); + // Push the collected information. + Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, + OtherPhysReg)); + } +} + +/// \brief Using the given \p List, compute the cost of the broken hints if +/// \p PhysReg was used. +/// \return The cost of \p List for \p PhysReg. +BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, + unsigned PhysReg) { + BlockFrequency Cost = 0; + for (const HintInfo &Info : List) { + if (Info.PhysReg != PhysReg) + Cost += Info.Freq; + } + return Cost; +} + +/// \brief Using the register assigned to \p VirtReg, try to recolor +/// all the live ranges that are copy-related with \p VirtReg. +/// The recoloring is then propagated to all the live-ranges that have +/// been recolored and so on, until no more copies can be coalesced or +/// it is not profitable. +/// For a given live range, profitability is determined by the sum of the +/// frequencies of the non-identity copies it would introduce with the old +/// and new register. +void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { + // We have a broken hint, check if it is possible to fix it by + // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted + // some register and PhysReg may be available for the other live-ranges. + SmallSet Visited; + SmallVector RecoloringCandidates; + HintsInfo Info; + unsigned Reg = VirtReg.reg; + unsigned PhysReg = VRM->getPhys(Reg); + // Start the recoloring algorithm from the input live-interval, then + // it will propagate to the ones that are copy-related with it. + Visited.insert(Reg); + RecoloringCandidates.push_back(Reg); + + DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' + << PrintReg(PhysReg, TRI) << ")\n"); + + do { + Reg = RecoloringCandidates.pop_back_val(); + + // We cannot recolor physcal register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); + + // Get the live interval mapped with this virtual register to be able + // to check for the interference with the new color. + LiveInterval &LI = LIS->getInterval(Reg); + unsigned CurrPhys = VRM->getPhys(Reg); + // Check that the new color matches the register class constraints and + // that it is free for this live range. + if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || + Matrix->checkInterference(LI, PhysReg))) + continue; + + DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) + << ") is recolorable.\n"); + + // Gather the hint info. + Info.clear(); + collectHintInfo(Reg, Info); + // Check if recoloring the live-range will increase the cost of the + // non-identity copies. + if (CurrPhys != PhysReg) { + DEBUG(dbgs() << "Checking profitability:\n"); + BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); + BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); + DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() + << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); + if (OldCopiesCost < NewCopiesCost) { + DEBUG(dbgs() << "=> Not profitable.\n"); + continue; + } + // At this point, the cost is either cheaper or equal. If it is + // equal, we consider this is profitable because it may expose + // more recoloring opportunities. + DEBUG(dbgs() << "=> Profitable.\n"); + // Recolor the live-range. + Matrix->unassign(LI); + Matrix->assign(LI, PhysReg); + } + // Push all copy-related live-ranges to keep reconciling the broken + // hints. + for (const HintInfo &HI : Info) { + if (Visited.insert(HI.Reg).second) + RecoloringCandidates.push_back(HI.Reg); + } + } while (!RecoloringCandidates.empty()); +} + +/// \brief Try to recolor broken hints. +/// Broken hints may be repaired by recoloring when an evicted variable +/// freed up a register for a larger live-range. +/// Consider the following example: +/// BB1: +/// a = +/// b = +/// BB2: +/// ... +/// = b +/// = a +/// Let us assume b gets split: +/// BB1: +/// a = +/// b = +/// BB2: +/// c = b +/// ... +/// d = c +/// = d +/// = a +/// Because of how the allocation work, b, c, and d may be assigned different +/// colors. Now, if a gets evicted later: +/// BB1: +/// a = +/// st a, SpillSlot +/// b = +/// BB2: +/// c = b +/// ... +/// d = c +/// = d +/// e = ld SpillSlot +/// = e +/// This is likely that we can assign the same register for b, c, and d, +/// getting rid of 2 copies. +void RAGreedy::tryHintsRecoloring() { + for (LiveInterval *LI : SetOfBrokenHints) { + assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && + "Recoloring is possible only for virtual registers"); + // Some dead defs may be around (e.g., because of debug uses). + // Ignore those. + if (!VRM->hasPhys(LI->reg)) + continue; + tryHintRecoloring(*LI); + } +} + unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs, SmallVirtRegSet &FixedRegisters, unsigned Depth) { unsigned CostPerUseLimit = ~0u; // First try assigning a free register. - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { - // We check other options if we are using a CSR for the first time. - bool CSRFirstUse = false; - if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) - if (!MRI->isPhysRegUsed(CSR)) - CSRFirstUse = true; - // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. - if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) { + if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && + NewVRegs.empty()) { unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, CostPerUseLimit, NewVRegs); if (CSRReg || !NewVRegs.empty()) @@ -2274,8 +2496,18 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. if (Stage != RS_Split) - if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) + if (unsigned PhysReg = + tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { + unsigned Hint = MRI->getSimpleHint(VirtReg.reg); + // If VirtReg has a hint and that hint is broken record this + // virtual register as a recoloring candidate for broken hint. + // Indeed, since we evicted a variable in its neighborhood it is + // likely we can at least partially recolor some of the + // copy-related live-ranges. + if (Hint && Hint != PhysReg) + SetOfBrokenHints.insert(&VirtReg); return PhysReg; + } assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); @@ -2301,13 +2533,23 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, return PhysReg; // Finally spill VirtReg itself. - NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); - spiller().spill(LRE); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); + if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) { + // TODO: This is experimental and in particular, we do not model + // the live range splitting done by spilling correctly. + // We would need a deep integration with the spiller to do the + // right thing here. Anyway, that is still good for early testing. + setStage(VirtReg, RS_Memory); + DEBUG(dbgs() << "Do as if this register is in memory\n"); + NewVRegs.push_back(VirtReg.reg); + } else { + NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + spiller().spill(LRE); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); - if (VerifyEnabled) - MF->verify(this, "After spilling"); + if (VerifyEnabled) + MF->verify(this, "After spilling"); + } // The live virtual register requesting allocation was spilled, so tell // the caller not to allocate anything during this round. @@ -2319,13 +2561,13 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; - const TargetMachine &TM = MF->getTarget(); - TRI = TM.getRegisterInfo(); - TII = TM.getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); RCI.runOnMachineFunction(mf); EnableLocalReassign = EnableLocalReassignment || - TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel()); + MF->getSubtarget().enableRALocalReassignment( + MF->getTarget().getOptLevel()); if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); @@ -2344,7 +2586,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { initializeCSRCost(); - calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); + calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); DEBUG(LIS->dump()); @@ -2355,8 +2597,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { NextCascade = 1; IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. + SetOfBrokenHints.clear(); allocatePhysRegs(); + tryHintsRecoloring(); releaseMemory(); return true; }