/// obtained from the TargetSubtargetInfo.
bool EnableLocalReassign;
+ /// Set of broken hints that may be reconciled later because of eviction.
+ SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
+
public:
RAGreedy();
void enqueue(LiveInterval *LI) override;
LiveInterval *dequeue() override;
unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override;
+ void aboutToRemoveInterval(LiveInterval &) override;
/// Perform register allocation.
bool runOnMachineFunction(MachineFunction &mf) override;
SmallVirtRegSet &, unsigned);
bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &,
SmallVirtRegSet &, unsigned);
+ void tryHintRecoloring(LiveInterval &);
+ void tryHintsRecoloring();
+
+ /// Model the information carried by one end of a copy.
+ struct HintInfo {
+ /// The frequency of the copy.
+ BlockFrequency Freq;
+ /// The virtual register or physical register.
+ unsigned Reg;
+ /// Its currently assigned register.
+ /// In case of a physical register Reg == PhysReg.
+ unsigned PhysReg;
+ HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg)
+ : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
+ };
+ typedef SmallVector<HintInfo, 4> HintsInfo;
+ BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
+ void collectHintInfo(unsigned, HintsInfo &);
};
} // end anonymous namespace
bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
if (VRM->hasPhys(VirtReg)) {
- Matrix->unassign(LIS->getInterval(VirtReg));
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ aboutToRemoveInterval(LI);
return true;
}
// Unassigned virtreg is probably in the priority queue.
return PhysReg;
}
+void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) {
+ // Do not keep invalid information around.
+ SetOfBrokenHints.remove(&LI);
+}
+
void RAGreedy::initializeCSRCost() {
// We use the larger one out of the command-line option and the value report
// by TRI.
CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
}
+/// \brief Collect the hint info for \p Reg.
+/// The results are stored into \p Out.
+/// \p Out is not cleared before being populated.
+void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
+ for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
+ if (!Instr.isFullCopy())
+ continue;
+ // Look for the other end of the copy.
+ unsigned OtherReg = Instr.getOperand(0).getReg();
+ if (OtherReg == Reg) {
+ OtherReg = Instr.getOperand(1).getReg();
+ if (OtherReg == Reg)
+ continue;
+ }
+ // Get the current assignment.
+ unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
+ ? OtherReg
+ : VRM->getPhys(OtherReg);
+ // Push the collected information.
+ Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
+ OtherPhysReg));
+ }
+}
+
+/// \brief Using the given \p List, compute the cost of the broken hints if
+/// \p PhysReg was used.
+/// \return The cost of \p List for \p PhysReg.
+BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
+ unsigned PhysReg) {
+ BlockFrequency Cost = 0;
+ for (const HintInfo &Info : List) {
+ if (Info.PhysReg != PhysReg)
+ Cost += Info.Freq;
+ }
+ return Cost;
+}
+
+/// \brief Using the register assigned to \p VirtReg, try to recolor
+/// all the live ranges that are copy-related with \p VirtReg.
+/// The recoloring is then propagated to all the live-ranges that have
+/// been recolored and so on, until no more copies can be coalesced or
+/// it is not profitable.
+/// For a given live range, profitability is determined by the sum of the
+/// frequencies of the non-identity copies it would introduce with the old
+/// and new register.
+void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
+ // We have a broken hint, check if it is possible to fix it by
+ // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
+ // some register and PhysReg may be available for the other live-ranges.
+ SmallSet<unsigned, 4> Visited;
+ SmallVector<unsigned, 2> RecoloringCandidates;
+ HintsInfo Info;
+ unsigned Reg = VirtReg.reg;
+ unsigned PhysReg = VRM->getPhys(Reg);
+ // Start the recoloring algorithm from the input live-interval, then
+ // it will propagate to the ones that are copy-related with it.
+ Visited.insert(Reg);
+ RecoloringCandidates.push_back(Reg);
+
+ DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '('
+ << PrintReg(PhysReg, TRI) << ")\n");
+
+ do {
+ Reg = RecoloringCandidates.pop_back_val();
+
+ // We cannot recolor physcal register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ assert(VRM->hasPhys(Reg) && "We have unallocated variable!!");
+
+ // Get the live interval mapped with this virtual register to be able
+ // to check for the interference with the new color.
+ LiveInterval &LI = LIS->getInterval(Reg);
+ unsigned CurrPhys = VRM->getPhys(Reg);
+ // Check that the new color matches the register class constraints and
+ // that it is free for this live range.
+ if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
+ Matrix->checkInterference(LI, PhysReg)))
+ continue;
+
+ DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI)
+ << ") is recolorable.\n");
+
+ // Gather the hint info.
+ Info.clear();
+ collectHintInfo(Reg, Info);
+ // Check if recoloring the live-range will increase the cost of the
+ // non-identity copies.
+ if (CurrPhys != PhysReg) {
+ DEBUG(dbgs() << "Checking profitability:\n");
+ BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
+ BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
+ DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
+ << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n');
+ if (OldCopiesCost < NewCopiesCost) {
+ DEBUG(dbgs() << "=> Not profitable.\n");
+ continue;
+ }
+ // At this point, the cost is either cheaper or equal. If it is
+ // equal, we consider this is profitable because it may expose
+ // more recoloring opportunities.
+ DEBUG(dbgs() << "=> Profitable.\n");
+ // Recolor the live-range.
+ Matrix->unassign(LI);
+ Matrix->assign(LI, PhysReg);
+ }
+ // Push all copy-related live-ranges to keep reconciling the broken
+ // hints.
+ for (const HintInfo &HI : Info) {
+ if (Visited.insert(HI.Reg).second)
+ RecoloringCandidates.push_back(HI.Reg);
+ }
+ } while (!RecoloringCandidates.empty());
+}
+
+/// \brief Try to recolor broken hints.
+/// Broken hints may be repaired by recoloring when an evicted variable
+/// freed up a register for a larger live-range.
+/// Consider the following example:
+/// BB1:
+/// a =
+/// b =
+/// BB2:
+/// ...
+/// = b
+/// = a
+/// Let us assume b gets split:
+/// BB1:
+/// a =
+/// b =
+/// BB2:
+/// c = b
+/// ...
+/// d = c
+/// = d
+/// = a
+/// Because of how the allocation work, b, c, and d may be assigned different
+/// colors. Now, if a gets evicted later:
+/// BB1:
+/// a =
+/// st a, SpillSlot
+/// b =
+/// BB2:
+/// c = b
+/// ...
+/// d = c
+/// = d
+/// e = ld SpillSlot
+/// = e
+/// This is likely that we can assign the same register for b, c, and d,
+/// getting rid of 2 copies.
+void RAGreedy::tryHintsRecoloring() {
+ for (LiveInterval *LI : SetOfBrokenHints) {
+ assert(TargetRegisterInfo::isVirtualRegister(LI->reg) &&
+ "Recoloring is possible only for virtual registers");
+ // Some dead defs may be around (e.g., because of debug uses).
+ // Ignore those.
+ if (!VRM->hasPhys(LI->reg))
+ continue;
+ tryHintRecoloring(*LI);
+ }
+}
+
unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
SmallVectorImpl<unsigned> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
// queue. The RS_Split ranges already failed to do this, and they should not
// get a second chance until they have been split.
if (Stage != RS_Split)
- if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit))
+ if (unsigned PhysReg =
+ tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) {
+ unsigned Hint = MRI->getSimpleHint(VirtReg.reg);
+ // If VirtReg has a hint and that hint is broken record this
+ // virtual register as a recoloring candidate for broken hint.
+ // Indeed, since we evicted a variable in its neighborhood it is
+ // likely we can at least partially recolor some of the
+ // copy-related live-ranges.
+ if (Hint && Hint != PhysReg)
+ SetOfBrokenHints.insert(&VirtReg);
return PhysReg;
+ }
assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
NextCascade = 1;
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
+ SetOfBrokenHints.clear();
allocatePhysRegs();
+ tryHintsRecoloring();
releaseMemory();
return true;
}
--- /dev/null
+; RUN: llc < %s -o - -mtriple=x86_64-apple-macosx | FileCheck %s
+; Test case for the recoloring of broken hints.
+; This is tricky to have something reasonably small to kick this optimization since
+; it requires that spliting and spilling occur.
+; The bottom line is that this test case is fragile.
+; This was reduced from the make_list function from the llvm-testsuite:
+; SingleSource/Benchmarks/McGill/chomp.c
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct._list = type { i32*, %struct._list* }
+
+@ncol = external global i32, align 4
+@nrow = external global i32, align 4
+
+declare noalias i32* @copy_data()
+
+declare noalias i8* @malloc(i64)
+
+declare i32 @get_value()
+
+declare i32 @in_wanted(i32* nocapture readonly)
+
+declare noalias i32* @make_data()
+
+; CHECK-LABEL: make_list:
+; Function prologue.
+; CHECK: pushq
+; CHECK: subq ${{[0-9]+}}, %rsp
+; Move the first argument (%data) into a temporary register.
+; It will not survive the call to malloc otherwise.
+; CHECK: movq %rdi, [[ARG1:%r[0-9a-z]+]]
+; CHECK: callq _malloc
+; Compute %data - 1 as used for load in land.rhs.i (via the variable %indvars.iv.next.i).
+; CHECK: addq $-4, [[ARG1]]
+; We use to produce a useless copy here and move %data in another temporary register.
+; CHECK-NOT: movq [[ARG1]]
+; End of the first basic block.
+; CHECK: .align
+; Now check that %data is used in an address computation.
+; CHECK: leaq ([[ARG1]]
+define %struct._list* @make_list(i32* nocapture readonly %data, i32* nocapture %value, i32* nocapture %all) {
+entry:
+ %call = tail call i8* @malloc(i64 16)
+ %next = getelementptr inbounds i8* %call, i64 8
+ %tmp = bitcast i8* %next to %struct._list**
+ %tmp2 = bitcast i8* %call to %struct._list*
+ %.pre78 = load i32* @ncol, align 4
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.inc32, %entry
+ %tmp4 = phi i32 [ %.pre78, %entry ], [ 0, %for.inc32 ]
+ %current.077 = phi %struct._list* [ %tmp2, %entry ], [ %current.1.lcssa, %for.inc32 ]
+ %cmp270 = icmp eq i32 %tmp4, 0
+ br i1 %cmp270, label %for.inc32, label %for.body3
+
+for.body3: ; preds = %if.end31, %for.cond1.preheader
+ %current.173 = phi %struct._list* [ %current.2, %if.end31 ], [ %current.077, %for.cond1.preheader ]
+ %row.172 = phi i32 [ %row.3, %if.end31 ], [ 0, %for.cond1.preheader ]
+ %col.071 = phi i32 [ %inc, %if.end31 ], [ 0, %for.cond1.preheader ]
+ %call4 = tail call i32* @make_data()
+ %tmp5 = load i32* @ncol, align 4
+ %tobool14.i = icmp eq i32 %tmp5, 0
+ br i1 %tobool14.i, label %while.cond.i, label %while.body.lr.ph.i
+
+while.body.lr.ph.i: ; preds = %for.body3
+ %tmp6 = sext i32 %tmp5 to i64
+ br label %while.body.i
+
+while.body.i: ; preds = %while.body.i, %while.body.lr.ph.i
+ %indvars.iv.i = phi i64 [ %tmp6, %while.body.lr.ph.i ], [ %indvars.iv.next.i, %while.body.i ]
+ %indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1
+ %tmp9 = trunc i64 %indvars.iv.next.i to i32
+ %tobool.i = icmp eq i32 %tmp9, 0
+ br i1 %tobool.i, label %while.cond.i, label %while.body.i
+
+while.cond.i: ; preds = %land.rhs.i, %while.body.i, %for.body3
+ %indvars.iv.i64 = phi i64 [ %indvars.iv.next.i65, %land.rhs.i ], [ 0, %for.body3 ], [ %tmp6, %while.body.i ]
+ %indvars.iv.next.i65 = add nsw i64 %indvars.iv.i64, -1
+ %tmp10 = trunc i64 %indvars.iv.i64 to i32
+ %tobool.i66 = icmp eq i32 %tmp10, 0
+ br i1 %tobool.i66, label %if.else, label %land.rhs.i
+
+land.rhs.i: ; preds = %while.cond.i
+ %arrayidx.i67 = getelementptr inbounds i32* %call4, i64 %indvars.iv.next.i65
+ %tmp11 = load i32* %arrayidx.i67, align 4
+ %arrayidx2.i68 = getelementptr inbounds i32* %data, i64 %indvars.iv.next.i65
+ %tmp12 = load i32* %arrayidx2.i68, align 4
+ %cmp.i69 = icmp eq i32 %tmp11, %tmp12
+ br i1 %cmp.i69, label %while.cond.i, label %equal_data.exit
+
+equal_data.exit: ; preds = %land.rhs.i
+ %cmp3.i = icmp slt i32 %tmp10, 1
+ br i1 %cmp3.i, label %if.else, label %if.then
+
+if.then: ; preds = %equal_data.exit
+ %next7 = getelementptr inbounds %struct._list* %current.173, i64 0, i32 1
+ %tmp14 = load %struct._list** %next7, align 8
+ %next12 = getelementptr inbounds %struct._list* %tmp14, i64 0, i32 1
+ store %struct._list* null, %struct._list** %next12, align 8
+ %tmp15 = load %struct._list** %next7, align 8
+ %tmp16 = load i32* %value, align 4
+ %cmp14 = icmp eq i32 %tmp16, 1
+ %.tmp16 = select i1 %cmp14, i32 0, i32 %tmp16
+ %tmp18 = load i32* %all, align 4
+ %tmp19 = or i32 %tmp18, %.tmp16
+ %tmp20 = icmp eq i32 %tmp19, 0
+ br i1 %tmp20, label %if.then19, label %if.end31
+
+if.then19: ; preds = %if.then
+ %call21 = tail call i32 @in_wanted(i32* %call4)
+ br label %if.end31
+
+if.else: ; preds = %equal_data.exit, %while.cond.i
+ %cmp26 = icmp eq i32 %col.071, 0
+ %.row.172 = select i1 %cmp26, i32 0, i32 %row.172
+ %sub30 = add nsw i32 %tmp5, -1
+ br label %if.end31
+
+if.end31: ; preds = %if.else, %if.then19, %if.then
+ %col.1 = phi i32 [ %sub30, %if.else ], [ 0, %if.then ], [ 0, %if.then19 ]
+ %row.3 = phi i32 [ %.row.172, %if.else ], [ %row.172, %if.then ], [ 0, %if.then19 ]
+ %current.2 = phi %struct._list* [ %current.173, %if.else ], [ %tmp15, %if.then ], [ %tmp15, %if.then19 ]
+ %inc = add nsw i32 %col.1, 1
+ %tmp25 = load i32* @ncol, align 4
+ %cmp2 = icmp eq i32 %inc, %tmp25
+ br i1 %cmp2, label %for.cond1.for.inc32_crit_edge, label %for.body3
+
+for.cond1.for.inc32_crit_edge: ; preds = %if.end31
+ %.pre79 = load i32* @nrow, align 4
+ br label %for.inc32
+
+for.inc32: ; preds = %for.cond1.for.inc32_crit_edge, %for.cond1.preheader
+ %tmp26 = phi i32 [ %.pre79, %for.cond1.for.inc32_crit_edge ], [ 0, %for.cond1.preheader ]
+ %current.1.lcssa = phi %struct._list* [ %current.2, %for.cond1.for.inc32_crit_edge ], [ %current.077, %for.cond1.preheader ]
+ %row.1.lcssa = phi i32 [ %row.3, %for.cond1.for.inc32_crit_edge ], [ 0, %for.cond1.preheader ]
+ %inc33 = add nsw i32 %row.1.lcssa, 1
+ %cmp = icmp eq i32 %inc33, %tmp26
+ br i1 %cmp, label %for.end34, label %for.cond1.preheader
+
+for.end34: ; preds = %for.inc32
+ %.pre = load %struct._list** %tmp, align 8
+ ret %struct._list* %.pre
+}