X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineCSE.cpp;h=aad376c4702b6549d33302e64e6548827073e2f2;hb=f4e4e5eca8a849bfebcc7d6eb33848f48b2bf27a;hp=5d79e96097e31bfb1ebd810e1e348898dbc08364;hpb=baf717a08a0bc8cb0a7931ea3ce51d063a8fe6f0;p=oota-llvm.git diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 5d79e96097e..aad376c4702 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -13,28 +13,30 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-cse" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" - +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "machine-cse" + STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); STATISTIC(NumPhysCSEs, "Number of physreg referencing common subexpr eliminated"); -STATISTIC(NumCrossBlockPhysCSEs, - "Number of physreg common subexprs cross-block eliminated"); +STATISTIC(NumCrossBBCSEs, + "Number of cross-MBB physreg referencing CS eliminated"); STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); namespace { @@ -46,28 +48,28 @@ namespace { MachineRegisterInfo *MRI; public: static char ID; // Pass identification - MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) { + MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) { initializeMachineCSEPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired(); + AU.addRequired(); AU.addPreservedID(MachineLoopInfoID); AU.addRequired(); AU.addPreserved(); } - virtual void releaseMemory() { + void releaseMemory() override { ScopeMap.clear(); Exps.clear(); } private: - const unsigned LookAheadLimit; + unsigned LookAheadLimit; typedef RecyclingAllocator > AllocatorTy; typedef ScopedHashTable Exps; unsigned CurrVN; - bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); + bool PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, - MachineBasicBlock::const_iterator E) const ; + MachineBasicBlock::const_iterator E) const; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet &PhysRefs, - SmallVector &PhysDefs) const; + SmallVectorImpl &PhysDefs, + bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet &PhysRefs) const; + SmallSet &PhysRefs, + SmallVectorImpl &PhysDefs, + bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI); @@ -95,54 +101,69 @@ namespace { void ExitScope(MachineBasicBlock *MBB); bool ProcessBlock(MachineBasicBlock *MBB); void ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap &OpenChildren, - DenseMap &ParentMap); + DenseMap &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); }; } // end anonymous namespace char MachineCSE::ID = 0; +char &llvm::MachineCSEID = MachineCSE::ID; INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) -FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } - -bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, - MachineBasicBlock *MBB) { +/// The source register of a COPY machine instruction can be propagated to all +/// its users, and this propagation could increase the probability of finding +/// common subexpressions. If the COPY has only one user, the COPY itself can +/// be removed. +bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB) { bool Changed = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (!MRI->hasOneNonDBGUse(Reg)) - // Only coalesce single use copies. This ensure the copy will be - // deleted. - continue; + bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (DefMI->getParent() != MBB) - continue; if (!DefMI->isCopy()) continue; unsigned SrcReg = DefMI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) continue; - if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + if (DefMI->getOperand(0).getSubReg()) + continue; + // FIXME: We should trivially coalesce subregister copies to expose CSE + // opportunities on instructions with truncated operands (see + // cse-add-with-overflow.ll). This can be done here as follows: + // if (SrcSubReg) + // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, + // SrcSubReg); + // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); + // + // The 2-addr pass has been updated to handle coalesced subregs. However, + // some machine-specific code still can't handle it. + // To handle it properly we also need a way find a constrained subregister + // class given a super-reg class and subreg index. + if (DefMI->getOperand(1).getSubReg()) continue; - if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (!MRI->constrainRegClass(SrcReg, RC)) continue; DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); + // Propagate SrcReg of copies to MI. MO.setReg(SrcReg); MRI->clearKillFlags(SrcReg); - DefMI->eraseFromParent(); - ++NumCoalesces; + // Coalesce single use copies. + if (OnlyOneUse) { + DefMI->eraseFromParent(); + ++NumCoalesces; + } Changed = true; } @@ -164,8 +185,9 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, return true; bool SeenDef = false; - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = I->getOperand(i); + for (const MachineOperand &MO : I->operands()) { + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) + SeenDef = true; if (!MO.isReg() || !MO.getReg()) continue; if (!TRI->regsOverlap(MO.getReg(), Reg)) @@ -176,7 +198,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, SeenDef = true; } if (SeenDef) - // See a def of Reg (or an alias) before encountering any use, it's + // See a def of Reg (or an alias) before encountering any use, it's // trivially dead. return true; @@ -193,79 +215,122 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet &PhysRefs, - SmallVector &PhysDefs) const{ - MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) + SmallVectorImpl &PhysDefs, + bool &PhysUseDef) const{ + // First, add all uses to PhysRefs. + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Reading constant physregs is ok. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + } + + // Next, collect all defs into PhysDefs. If any is already in PhysRefs + // (which currently contains only uses), set the PhysUseDef flag. + PhysUseDef = false; + MachineBasicBlock::const_iterator I = MI; I = std::next(I); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; + // Check against PhysRefs even if the def is "dead". + if (PhysRefs.count(Reg)) + PhysUseDef = true; // If the def is dead, it's ok. But the def may not marked "dead". That's // common since this pass is run before livevariables. We can scan // forward a few instructions and check if it is obviously dead. - if (MO.isDef() && - (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) - continue; - PhysDefs.push_back(Reg); - PhysRefs.insert(Reg); - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) - PhysRefs.insert(*Alias); + if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) + PhysDefs.push_back(Reg); } + // Finally, add all defs to PhysRefs as well. + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) + for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + return !PhysRefs.empty(); } bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet &PhysRefs) const { - // Look backward from MI to find CSMI. + SmallSet &PhysRefs, + SmallVectorImpl &PhysDefs, + bool &NonLocal) const { + // For now conservatively returns false if the common subexpression is + // not in the same basic block as the given instruction. The only exception + // is if the common subexpression is in the sole predecessor block. + const MachineBasicBlock *MBB = MI->getParent(); + const MachineBasicBlock *CSMBB = CSMI->getParent(); + + bool CrossMBB = false; + if (CSMBB != MBB) { + if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) + return false; + + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { + if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) + // Avoid extending live range of physical registers if they are + //allocatable or reserved. + return false; + } + CrossMBB = true; + } + MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); + MachineBasicBlock::const_iterator E = MI; + MachineBasicBlock::const_iterator EE = CSMBB->end(); unsigned LookAheadLeft = LookAheadLimit; - MachineBasicBlock *CurBB = MI->getParent(); - MachineBasicBlock::const_reverse_iterator I(MI); - MachineBasicBlock::const_reverse_iterator E(CurBB->rend()); while (LookAheadLeft) { - while (LookAheadLeft && I != E) { - // Skip over dbg_value's. - while (I != E && I->isDebugValue()) - ++I; - - if (I == E) break; + // Skip over dbg_value's. + while (I != E && I != EE && I->isDebugValue()) + ++I; - if (&*I == CSMI) - return true; + if (I == EE) { + assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); + (void)CrossMBB; + CrossMBB = false; + NonLocal = true; + I = MBB->begin(); + EE = MBB->end(); + continue; + } - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = I->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned MOReg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(MOReg)) - continue; - if (PhysRefs.count(MOReg)) - return false; - } + if (I == E) + return true; - --LookAheadLeft; - ++I; + for (const MachineOperand &MO : I->operands()) { + // RegMasks go on instructions like calls that clobber lots of physregs. + // Don't attempt to CSE across such an instruction. + if (MO.isRegMask()) + return false; + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + continue; + if (PhysRefs.count(MOReg)) + return false; } - // Go back another BB; for now, only go back at most one BB. - MachineBasicBlock *CSBB = CSMI->getParent(); - if (!CSBB->isSuccessor(CurBB) || CurBB->pred_size() != 1) - return false; - CurBB = CSBB; - I = CSBB->rbegin(); - E = CSBB->rend(); + + --LookAheadLeft; + ++I; } return false; } bool MachineCSE::isCSECandidate(MachineInstr *MI) { - if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) + if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || + MI->isInlineAsm() || MI->isDebugValue()) return false; // Ignore copies. @@ -273,12 +338,11 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { return false; // Ignore stuff that we obviously can't move. - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + if (MI->mayStore() || MI->isCall() || MI->isTerminator() || MI->hasUnmodeledSideEffects()) return false; - if (TID.mayLoad()) { + if (MI->mayLoad()) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. @@ -297,10 +361,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. + // If CSReg is used at all uses of Reg, CSE should not increase register + // pressure of CSReg. + bool MayIncreasePressure = true; + if (TargetRegisterInfo::isVirtualRegister(CSReg) && + TargetRegisterInfo::isVirtualRegister(Reg)) { + MayIncreasePressure = false; + SmallPtrSet CSUses; + for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { + CSUses.insert(&MI); + } + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { + if (!CSUses.count(&MI)) { + MayIncreasePressure = true; + break; + } + } + } + if (!MayIncreasePressure) return true; + // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. - if (MI->getDesc().isAsCheapAsAMove()) { + if (TII->isAsCheapAsAMove(MI)) { MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); if (CSBB != BB && !CSBB->isSuccessor(BB)) @@ -310,8 +393,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // Heuristics #2: If the expression doesn't not use a vr and the only use // of the redundant computation are copies, do not cse. bool HasVRegUse = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.isUse() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { HasVRegUse = true; @@ -320,11 +402,9 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, } if (!HasVRegUse) { bool HasNonCopyUse = false; - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { // Ignore copies. - if (!Use->isCopyLike()) { + if (!MI.isCopyLike()) { HasNonCopyUse = true; break; } @@ -337,11 +417,9 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // it unless the defined value is already used in the BB of the new use. bool HasPHI = false; SmallPtrSet CSBBs; - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; - HasPHI |= Use->isPHI(); - CSBBs.insert(Use->getParent()); + for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { + HasPHI |= MI.isPHI(); + CSBBs.insert(MI.getParent()); } if (!HasPHI) @@ -359,14 +437,16 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); DenseMap::iterator SI = ScopeMap.find(MBB); assert(SI != ScopeMap.end()); - ScopeMap.erase(SI); delete SI->second; + ScopeMap.erase(SI); } bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector, 8> CSEPairs; + SmallVector ImplicitDefsToUpdate; + SmallVector ImplicitDefs; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -376,20 +456,22 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool FoundCSE = VNT.count(MI); if (!FoundCSE) { - // Look for trivial copy coalescing opportunities. - if (PerformTrivialCoalescing(MI, MBB)) { + // Using trivial copy propagation to find more CSE opportunities. + if (PerformTrivialCopyPropagation(MI, MBB)) { Changed = true; // After coalescing MI itself may become a copy. if (MI->isCopyLike()) continue; + + // Try again to see if CSE is possible. FoundCSE = VNT.count(MI); } } // Commute commutable instructions. bool Commuted = false; - if (!FoundCSE && MI->getDesc().isCommutable()) { + if (!FoundCSE && MI->isCommutable()) { MachineInstr *NewMI = TII->commuteInstruction(MI); if (NewMI) { Commuted = true; @@ -407,18 +489,25 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // If the instruction defines physical registers and the values *may* be // used, then it's not safe to replace it with a common subexpression. // It's also not safe if the instruction uses physical registers. - SmallSet PhysRefs; - SmallVector DirectPhysRefs; - if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, DirectPhysRefs)) { + bool CrossMBBPhysDef = false; + SmallSet PhysRefs; + SmallVector PhysDefs; + bool PhysUseDef = false; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, + PhysDefs, PhysUseDef)) { FoundCSE = false; - // ... Unless the CS is local and it also defines the physical register - // which is not clobbered in between and the physical register uses - // were not clobbered. - unsigned CSVN = VNT.lookup(MI); - MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefsReach(CSMI, MI, PhysRefs)) - FoundCSE = true; + // ... Unless the CS is local or is in the sole predecessor block + // and it also defines the physical register which is not clobbered + // in between and the physical register uses were not clobbered. + // This can never be the case if the instruction both uses and + // defines the same physical register, which was detected above. + if (!PhysUseDef) { + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) + FoundCSE = true; + } } if (!FoundCSE) { @@ -435,41 +524,108 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Check if it's profitable to perform this CSE. bool DoCSE = true; - unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumDefs = MI->getDesc().getNumDefs() + + MI->getDesc().getNumImplicitDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned OldReg = MO.getReg(); unsigned NewReg = CSMI->getOperand(i).getReg(); - if (OldReg == NewReg) + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) + ImplicitDefsToUpdate.push_back(i); + + // Keep track of implicit defs of CSMI and MI, to clear possibly + // made-redundant kill flags. + if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) + ImplicitDefs.push_back(OldReg); + + if (OldReg == NewReg) { + --NumDefs; continue; + } + assert(TargetRegisterInfo::isVirtualRegister(OldReg) && TargetRegisterInfo::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); + if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { + DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); + DoCSE = false; + break; + } + + // Don't perform CSE if the result of the old instruction cannot exist + // within the register class of the new instruction. + const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); + if (!MRI->constrainRegClass(NewReg, OldRC)) { + DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); DoCSE = false; break; } + CSEPairs.push_back(std::make_pair(OldReg, NewReg)); --NumDefs; } // Actually perform the elimination. if (DoCSE) { - for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { - MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); - MRI->clearKillFlags(CSEPairs[i].second); + for (std::pair &CSEPair : CSEPairs) { + unsigned OldReg = CSEPair.first; + unsigned NewReg = CSEPair.second; + // OldReg may have been unused but is used now, clear the Dead flag + MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); + assert(Def != nullptr && "CSEd register has no unique definition?"); + Def->clearRegisterDeads(NewReg); + // Replace with NewReg and clear kill flags which may be wrong now. + MRI->replaceRegWith(OldReg, NewReg); + MRI->clearKillFlags(NewReg); } - MI->eraseFromParent(); - if (!DirectPhysRefs.empty() && CSMI->getParent() != MBB) { - assert(CSMI->getParent()->isSuccessor(MBB)); - ++NumCrossBlockPhysCSEs; - SmallVector::iterator PI = DirectPhysRefs.begin(), - PE = DirectPhysRefs.end(); - for (; PI != PE; ++PI) - MBB->addLiveIn(*PI); + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) + CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); + + // Go through implicit defs of CSMI and MI, and clear the kill flags on + // their uses in all the instructions between CSMI and MI. + // We might have made some of the kill flags redundant, consider: + // subs ... %NZCV <- CSMI + // csinc ... %NZCV <- this kill flag isn't valid anymore + // subs ... %NZCV <- MI, to be eliminated + // csinc ... %NZCV + // Since we eliminated MI, and reused a register imp-def'd by CSMI + // (here %NZCV), that register, if it was killed before MI, should have + // that kill flag removed, because it's lifetime was extended. + if (CSMI->getParent() == MI->getParent()) { + for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) + for (auto ImplicitDef : ImplicitDefs) + if (MachineOperand *MO = II->findRegisterUseOperand( + ImplicitDef, /*isKill=*/true, TRI)) + MO->setIsKill(false); + } else { + // If the instructions aren't in the same BB, bail out and clear the + // kill flag on all uses of the imp-def'd register. + for (auto ImplicitDef : ImplicitDefs) + MRI->clearKillFlags(ImplicitDef); + } + + if (CrossMBBPhysDef) { + // Add physical register defs now coming in from a predecessor to MBB + // livein list. + while (!PhysDefs.empty()) { + unsigned LiveIn = PhysDefs.pop_back_val(); + if (!MBB->isLiveIn(LiveIn)) + MBB->addLiveIn(LiveIn); + } + ++NumCrossBBCSEs; } + + MI->eraseFromParent(); ++NumCSEs; if (!PhysRefs.empty()) ++NumPhysCSEs; @@ -477,11 +633,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { ++NumCommutes; Changed = true; } else { - DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); Exps.push_back(MI); } CSEPairs.clear(); + ImplicitDefsToUpdate.clear(); + ImplicitDefs.clear(); } return Changed; @@ -492,8 +649,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { /// up the dominator tree to destroy ancestors which are now done. void MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap &OpenChildren, - DenseMap &ParentMap) { + DenseMap &OpenChildren) { if (OpenChildren[Node]) return; @@ -501,7 +657,7 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, ExitScope(Node->getBlock()); // Now traverse upwards to pop ancestors whose offsprings are all done. - while (MachineDomTreeNode *Parent = ParentMap[Node]) { + while (MachineDomTreeNode *Parent = Node->getIDom()) { unsigned Left = --OpenChildren[Parent]; if (Left != 0) break; @@ -513,7 +669,6 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { SmallVector Scopes; SmallVector WorkList; - DenseMap ParentMap; DenseMap OpenChildren; CurrVN = 0; @@ -524,34 +679,33 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { Node = WorkList.pop_back_val(); Scopes.push_back(Node); const std::vector &Children = Node->getChildren(); - unsigned NumChildren = Children.size(); - OpenChildren[Node] = NumChildren; - for (unsigned i = 0; i != NumChildren; ++i) { - MachineDomTreeNode *Child = Children[i]; - ParentMap[Child] = Node; + OpenChildren[Node] = Children.size(); + for (MachineDomTreeNode *Child : Children) WorkList.push_back(Child); - } } while (!WorkList.empty()); // Now perform CSE. bool Changed = false; - for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { - MachineDomTreeNode *Node = Scopes[i]; + for (MachineDomTreeNode *Node : Scopes) { MachineBasicBlock *MBB = Node->getBlock(); EnterScope(MBB); Changed |= ProcessBlock(MBB); // If it's a leaf node, it's done. Traverse upwards to pop ancestors. - ExitScopeIfDone(Node, OpenChildren, ParentMap); + ExitScopeIfDone(Node, OpenChildren); } return Changed; } bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + if (skipOptnoneFunction(*MF.getFunction())) + return false; + + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); DT = &getAnalysis(); + LookAheadLimit = TII->getMachineCSELookAheadLimit(); return PerformCSE(DT->getRootNode()); }