#define DEBUG_TYPE "regalloc"
#include "RegisterCoalescer.h"
#include "LiveDebugVariables.h"
-#include "VirtRegMap.h"
-
-#include "llvm/Pass.h"
-#include "llvm/Value.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Value.h"
#include <algorithm>
#include <cmath>
using namespace llvm;
// Temporary flag to test critical edge unsplitting.
static cl::opt<bool>
EnableJoinSplits("join-splitedges",
- cl::desc("Coalesce copies on split edges (default=false)"),
- cl::init(false), cl::Hidden);
+ cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
// Temporary flag to test global copy optimization.
-static cl::opt<bool>
+static cl::opt<cl::boolOrDefault>
EnableGlobalCopies("join-globalcopies",
- cl::desc("Coalesce copies that don't locally define an lrg"),
- cl::init(false));
+ cl::desc("Coalesce copies that span blocks (default=subtarget)"),
+ cl::init(cl::BOU_UNSET), cl::Hidden);
static cl::opt<bool>
VerifyCoalescing("verify-coalescing",
AliasAnalysis *AA;
RegisterClassInfo RegClassInfo;
+ /// \brief True if the coalescer should aggressively coalesce global copies
+ /// in favor of keeping local copies.
+ bool JoinGlobalCopies;
+
+ /// \brief True if the coalescer should aggressively coalesce fall-thru
+ /// blocks exclusively containing copies.
+ bool JoinSplitEdges;
+
/// WorkList - Copy instructions yet to be coalesced.
SmallVector<MachineInstr*, 8> WorkList;
SmallVector<MachineInstr*, 8> LocalWorkList;
MachineInstr *CopyMI);
/// canJoinPhys - Return true if a physreg copy should be joined.
- bool canJoinPhys(CoalescerPair &CP);
+ bool canJoinPhys(const CoalescerPair &CP);
/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end();
MII != E; ++MII) {
- if (!MII->isCopyLike() || !MII->isUnconditionalBranch())
+ if (!MII->isCopyLike() && !MII->isUnconditionalBranch())
return false;
}
return true;
// Update LiveDebugVariables.
LDV->renameRegister(SrcReg, DstReg, SubIdx);
+ SmallPtrSet<MachineInstr*, 8> Visited;
for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
MachineInstr *UseMI = I.skipInstruction();) {
+ // Each instruction can only be rewritten once because sub-register
+ // composition is not always idempotent. When SrcReg != DstReg, rewriting
+ // the UseMI operands removes them from the SrcReg use-def chain, but when
+ // SrcReg is DstReg we could encounter UseMI twice if it has multiple
+ // operands mentioning the virtual register.
+ if (SrcReg == DstReg && !Visited.insert(UseMI))
+ continue;
+
SmallVector<unsigned,8> Ops;
bool Reads, Writes;
tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
}
/// canJoinPhys - Return true if a copy involving a physreg should be joined.
-bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
+bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
/// Always join simple intervals that are defined by a single copy from a
/// reserved register. This doesn't increase register pressure, so it is
/// always beneficial.
}
namespace {
- // Information concerning MBB coalescing priority.
- struct MBBPriorityInfo {
- MachineBasicBlock *MBB;
- unsigned Depth;
- bool IsSplit;
-
- MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
- : MBB(mbb), Depth(depth), IsSplit(issplit) {}
- };
+// Information concerning MBB coalescing priority.
+struct MBBPriorityInfo {
+ MachineBasicBlock *MBB;
+ unsigned Depth;
+ bool IsSplit;
+
+ MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
+ : MBB(mbb), Depth(depth), IsSplit(issplit) {}
+};
+}
- // MBBPriorityCompare - Comparison predicate that sorts first based on the
- // loop depth of the basic block (the unsigned), and then on the MBB number.
- //
- // EnableGlobalCopies assumes that the primary sort key is loop depth.
- struct MBBPriorityCompare {
- bool operator()(const MBBPriorityInfo &LHS,
- const MBBPriorityInfo &RHS) const {
- // Deeper loops first
- if (LHS.Depth != RHS.Depth)
- return LHS.Depth > RHS.Depth;
-
- // Try to unsplit critical edges next.
- if (EnableJoinSplits && LHS.IsSplit != RHS.IsSplit)
- return LHS.IsSplit;
-
- // Prefer blocks that are more connected in the CFG. This takes care of
- // the most difficult copies first while intervals are short.
- unsigned cl = LHS.MBB->pred_size() + LHS.MBB->succ_size();
- unsigned cr = RHS.MBB->pred_size() + RHS.MBB->succ_size();
- if (cl != cr)
- return cl > cr;
-
- // As a last resort, sort by block number.
- return LHS.MBB->getNumber() < RHS.MBB->getNumber();
- }
- };
+// C-style comparator that sorts first based on the loop depth of the basic
+// block (the unsigned), and then on the MBB number.
+//
+// EnableGlobalCopies assumes that the primary sort key is loop depth.
+static int compareMBBPriority(const void *L, const void *R) {
+ const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L);
+ const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R);
+ // Deeper loops first
+ if (LHS->Depth != RHS->Depth)
+ return LHS->Depth > RHS->Depth ? -1 : 1;
+
+ // Try to unsplit critical edges next.
+ if (LHS->IsSplit != RHS->IsSplit)
+ return LHS->IsSplit ? -1 : 1;
+
+ // Prefer blocks that are more connected in the CFG. This takes care of
+ // the most difficult copies first while intervals are short.
+ unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
+ unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
+ if (cl != cr)
+ return cl > cr ? -1 : 1;
+
+ // As a last resort, sort by block number.
+ return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
}
/// \returns true if the given copy uses or defines a local live range.
// Collect all copy-like instructions in MBB. Don't start coalescing anything
// yet, it might invalidate the iterator.
const unsigned PrevSize = WorkList.size();
- if (EnableGlobalCopies) {
+ if (JoinGlobalCopies) {
// Coalesce copies bottom-up to coalesce local defs before local uses. They
// are not inherently easier to resolve, but slightly preferable until we
// have local live range splitting. In particular this is required by
assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
std::vector<MBBPriorityInfo> MBBs;
+ MBBs.reserve(MF->size());
for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
MachineBasicBlock *MBB = I;
MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
- isSplitEdge(MBB)));
+ JoinSplitEdges && isSplitEdge(MBB)));
}
- std::sort(MBBs.begin(), MBBs.end(), MBBPriorityCompare());
+ array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
// Coalesce intervals in MBB priority order.
unsigned CurrDepth = UINT_MAX;
for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
// Try coalescing the collected local copies for deeper loops.
- if (EnableGlobalCopies && MBBs[i].Depth < CurrDepth)
+ if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
coalesceLocals();
+ CurrDepth = MBBs[i].Depth;
+ }
copyCoalesceInMBB(MBBs[i].MBB);
}
coalesceLocals();
AA = &getAnalysis<AliasAnalysis>();
Loops = &getAnalysis<MachineLoopInfo>();
+ const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+ if (EnableGlobalCopies == cl::BOU_UNSET)
+ JoinGlobalCopies = ST.enableMachineScheduler();
+ else
+ JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
+
+ // The MachineScheduler does not currently require JoinSplitEdges. This will
+ // either be enabled unconditionally or replaced by a more general live range
+ // splitting optimization.
+ JoinSplitEdges = EnableJoinSplits;
+
DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
<< "********** Function: " << MF->getName() << '\n');