#define DEBUG_TYPE "regalloc"
#include "RegisterCoalescer.h"
-#include "LiveDebugVariables.h"
-#include "VirtRegMap.h"
-
-#include "llvm/Pass.h"
-#include "llvm/Value.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
cl::init(true));
// Temporary flag to test critical edge unsplitting.
-static cl::opt<cl::boolOrDefault>
+static cl::opt<bool>
EnableJoinSplits("join-splitedges",
- cl::desc("Coalesce copies on split edges (default=subtarget)"),
- cl::init(cl::BOU_UNSET), cl::Hidden);
+ cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
// Temporary flag to test global copy optimization.
static cl::opt<cl::boolOrDefault>
const TargetRegisterInfo* TRI;
const TargetInstrInfo* TII;
LiveIntervals *LIS;
- LiveDebugVariables *LDV;
const MachineLoopInfo* Loops;
AliasAnalysis *AA;
RegisterClassInfo RegClassInfo;
/// reMaterializeTrivialDef - If the source of a copy is defined by a
/// trivial computation, replace the copy by rematerialize the definition.
- bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
- MachineInstr *CopyMI);
+ bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI);
/// canJoinPhys - Return true if a physreg copy should be joined.
- bool canJoinPhys(CoalescerPair &CP);
+ bool canJoinPhys(const CoalescerPair &CP);
/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
AU.addRequired<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
- AU.addRequired<LiveDebugVariables>();
- AU.addPreserved<LiveDebugVariables>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
-bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt,
- unsigned DstReg,
+bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
MachineInstr *CopyMI) {
+ unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+ unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ LiveInterval &SrcInt = LIS->getInterval(SrcReg);
SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
assert(SrcLR != SrcInt.end() && "Live range not found!");
const MCInstrDesc &MCID = DefMI->getDesc();
if (MCID.getNumDefs() != 1)
return false;
+ // Only support subregister destinations when the def is read-undef.
+ MachineOperand &DstOperand = CopyMI->getOperand(0);
+ if (DstOperand.getSubReg() && !DstOperand.isUndef())
+ return false;
if (!DefMI->isImplicitDef()) {
// Make sure the copy destination register class fits the instruction
// definition register class. The mismatch can happen as a result of earlier
// extract_subreg, insert_subreg, subreg_to_reg coalescing.
const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF);
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
- if (MRI->getRegClass(DstReg) != RC)
+ if (!MRI->constrainRegClass(DstReg, RC))
return false;
} else if (!RC->contains(DstReg))
return false;
TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
MachineInstr *NewMI = prior(MII);
+ // The original DefMI may have been a subregister def, but the full register
+ // class of its destination matches the destination of CopyMI, and CopyMI is
+ // either a full register def or is read-undef. Therefore we can clear the
+ // subregister index on the rematerialized instruction.
+ NewMI->getOperand(0).setSubReg(0);
+
// NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
// We need to remember these so we can add intervals once we insert
// NewMI into SlotIndexes.
bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg);
- // Update LiveDebugVariables.
- LDV->renameRegister(SrcReg, DstReg, SubIdx);
-
+ SmallPtrSet<MachineInstr*, 8> Visited;
for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
MachineInstr *UseMI = I.skipInstruction();) {
+ // Each instruction can only be rewritten once because sub-register
+ // composition is not always idempotent. When SrcReg != DstReg, rewriting
+ // the UseMI operands removes them from the SrcReg use-def chain, but when
+ // SrcReg is DstReg we could encounter UseMI twice if it has multiple
+ // operands mentioning the virtual register.
+ if (SrcReg == DstReg && !Visited.insert(UseMI))
+ continue;
+
SmallVector<unsigned,8> Ops;
bool Reads, Writes;
tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
}
/// canJoinPhys - Return true if a copy involving a physreg should be joined.
-bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
+bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
/// Always join simple intervals that are defined by a single copy from a
/// reserved register. This doesn't increase register pressure, so it is
/// always beneficial.
if (!canJoinPhys(CP)) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
- if (!CP.isFlipped() &&
- reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
- CP.getDstReg(), CopyMI))
+ if (reMaterializeTrivialDef(CP, CopyMI))
return true;
return false;
}
// If definition of source is defined by trivial computation, try
// rematerializing it.
- if (!CP.isFlipped() &&
- reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
- CP.getDstReg(), CopyMI))
+ if (reMaterializeTrivialDef(CP, CopyMI))
return true;
// If we can eliminate the copy without merging the live ranges, do so now.
// Value in the other live range that overlaps this def, if any.
VNInfo *OtherVNI;
- // Is this value an IMPLICIT_DEF?
- bool IsImplicitDef;
+ // Is this value an IMPLICIT_DEF that can be erased?
+ //
+ // IMPLICIT_DEF values should only exist at the end of a basic block that
+ // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
+ // safely erased if they are overlapping a live value in the other live
+ // interval.
+ //
+ // Weird control flow graphs and incomplete PHI handling in
+ // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
+ // longer live ranges. Such IMPLICIT_DEF values should be treated like
+ // normal values.
+ bool ErasableImplicitDef;
// True when the live range of this value will be pruned because of an
// overlapping CR_Replace value in the other live range.
bool PrunedComputed;
Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
- RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false),
- PrunedComputed(false) {}
+ RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false),
+ Pruned(false), PrunedComputed(false) {}
bool isAnalyzed() const { return WriteLanes != 0; }
};
// An IMPLICIT_DEF writes undef values.
if (DefMI->isImplicitDef()) {
- V.IsImplicitDef = true;
+ // We normally expect IMPLICIT_DEF values to be live only until the end
+ // of their block. If the value is really live longer and gets pruned in
+ // another block, this flag is cleared again.
+ V.ErasableImplicitDef = true;
V.ValidLanes &= ~V.WriteLanes;
}
}
// We have overlapping values, or possibly a kill of Other.
// Recursively compute assignments up the dominator tree.
Other.computeAssignment(V.OtherVNI->id, *this);
- const Val &OtherV = Other.Vals[V.OtherVNI->id];
+ Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+ // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+ // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+ // technically.
+ //
+ // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+ // to erase the IMPLICIT_DEF instruction.
+ if (OtherV.ErasableImplicitDef && DefMI &&
+ DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+ DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " extends into BB#" << DefMI->getParent()->getNumber()
+ << ", keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
+ }
// Allow overlapping PHI values. Any real interference would show up in a
// predecessor, the PHI itself can't introduce any conflicts.
// predecessors, so the instruction should simply go away once its value
// has been replaced.
Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
- bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep;
+ bool EraseImpDef = OtherV.ErasableImplicitDef &&
+ OtherV.Resolution == CR_Keep;
if (!Def.isBlock()) {
// Remove <def,read-undef> flags. This def is now a partial redef.
// Also remove <def,dead> flags since the joined live range will
// If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
// longer. The IMPLICIT_DEF instructions are only inserted by
// PHIElimination to guarantee that all PHI predecessors have a value.
- if (!Vals[i].IsImplicitDef || !Vals[i].Pruned)
+ if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
break;
// Remove value number i from LI. Note that this VNInfo is still present
// in NewVNInfo, so it will appear as an unused value number in the final
}
namespace {
- // Information concerning MBB coalescing priority.
- struct MBBPriorityInfo {
- MachineBasicBlock *MBB;
- unsigned Depth;
- bool IsSplit;
-
- MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
- : MBB(mbb), Depth(depth), IsSplit(issplit) {}
- };
-
- // MBBPriorityCompare - Comparison predicate that sorts first based on the
- // loop depth of the basic block (the unsigned), and then on the MBB number.
- //
- // EnableGlobalCopies assumes that the primary sort key is loop depth.
- struct MBBPriorityCompare {
- bool JoinSplitEdges;
-
- MBBPriorityCompare(bool joinsplits): JoinSplitEdges(joinsplits) {}
-
- bool operator()(const MBBPriorityInfo &LHS,
- const MBBPriorityInfo &RHS) const {
- // Deeper loops first
- if (LHS.Depth != RHS.Depth)
- return LHS.Depth > RHS.Depth;
-
- // Try to unsplit critical edges next.
- if (JoinSplitEdges && LHS.IsSplit != RHS.IsSplit)
- return LHS.IsSplit;
-
- // Prefer blocks that are more connected in the CFG. This takes care of
- // the most difficult copies first while intervals are short.
- unsigned cl = LHS.MBB->pred_size() + LHS.MBB->succ_size();
- unsigned cr = RHS.MBB->pred_size() + RHS.MBB->succ_size();
- if (cl != cr)
- return cl > cr;
+// Information concerning MBB coalescing priority.
+struct MBBPriorityInfo {
+ MachineBasicBlock *MBB;
+ unsigned Depth;
+ bool IsSplit;
+
+ MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
+ : MBB(mbb), Depth(depth), IsSplit(issplit) {}
+};
+}
- // As a last resort, sort by block number.
- return LHS.MBB->getNumber() < RHS.MBB->getNumber();
- }
- };
+// C-style comparator that sorts first based on the loop depth of the basic
+// block (the unsigned), and then on the MBB number.
+//
+// EnableGlobalCopies assumes that the primary sort key is loop depth.
+static int compareMBBPriority(const void *L, const void *R) {
+ const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L);
+ const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R);
+ // Deeper loops first
+ if (LHS->Depth != RHS->Depth)
+ return LHS->Depth > RHS->Depth ? -1 : 1;
+
+ // Try to unsplit critical edges next.
+ if (LHS->IsSplit != RHS->IsSplit)
+ return LHS->IsSplit ? -1 : 1;
+
+ // Prefer blocks that are more connected in the CFG. This takes care of
+ // the most difficult copies first while intervals are short.
+ unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
+ unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
+ if (cl != cr)
+ return cl > cr ? -1 : 1;
+
+ // As a last resort, sort by block number.
+ return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
}
/// \returns true if the given copy uses or defines a local live range.
assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
std::vector<MBBPriorityInfo> MBBs;
+ MBBs.reserve(MF->size());
for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
MachineBasicBlock *MBB = I;
MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
- isSplitEdge(MBB)));
+ JoinSplitEdges && isSplitEdge(MBB)));
}
- std::sort(MBBs.begin(), MBBs.end(), MBBPriorityCompare(JoinSplitEdges));
+ array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
// Coalesce intervals in MBB priority order.
unsigned CurrDepth = UINT_MAX;
for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
// Try coalescing the collected local copies for deeper loops.
- if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth)
+ if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
coalesceLocals();
+ CurrDepth = MBBs[i].Depth;
+ }
copyCoalesceInMBB(MBBs[i].MBB);
}
coalesceLocals();
TRI = TM->getRegisterInfo();
TII = TM->getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();
- LDV = &getAnalysis<LiveDebugVariables>();
AA = &getAnalysis<AliasAnalysis>();
Loops = &getAnalysis<MachineLoopInfo>();
else
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
- if (EnableJoinSplits == cl::BOU_UNSET)
- JoinSplitEdges = ST.enableMachineScheduler();
- else
- JoinSplitEdges = (EnableJoinSplits == cl::BOU_TRUE);
+ // The MachineScheduler does not currently require JoinSplitEdges. This will
+ // either be enabled unconditionally or replaced by a more general live range
+ // splitting optimization.
+ JoinSplitEdges = EnableJoinSplits;
DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
<< "********** Function: " << MF->getName() << '\n');
}
DEBUG(dump());
- DEBUG(LDV->dump());
if (VerifyCoalescing)
MF->verify(this, "After register coalescing");
return true;