X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineCSE.cpp;h=ae26967b235e8fab483674c6a0d7604ea5fea42e;hb=33e07581b22ed472103c91a246dab5b8ce08a1c7;hp=0a76f8db0b3baf60d36f05d2e138a8319728accb;hpb=ba86b13ad9cd6a9707a954598863da1e2a9f773b;p=oota-llvm.git diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 0a76f8db0b3..ae26967b235 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -13,21 +13,23 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-cse" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "machine-cse" + STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); STATISTIC(NumPhysCSEs, @@ -49,9 +51,9 @@ namespace { initializeMachineCSEPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired(); @@ -60,11 +62,9 @@ namespace { AU.addPreserved(); } - virtual void releaseMemory() { + void releaseMemory() override { ScopeMap.clear(); Exps.clear(); - AllocatableRegs.clear(); - ReservedRegs.clear(); } private: @@ -78,20 +78,20 @@ namespace { ScopedHTType VNT; SmallVector Exps; unsigned CurrVN; - BitVector AllocatableRegs; - BitVector ReservedRegs; - bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); + bool PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator E) const; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet &PhysRefs, - SmallVector &PhysDefs) const; + SmallVectorImpl &PhysDefs, + bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet &PhysRefs, - SmallVector &PhysDefs, + SmallVectorImpl &PhysDefs, bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, @@ -114,8 +114,12 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) -bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, - MachineBasicBlock *MBB) { +/// The source register of a COPY machine instruction can be propagated to all +/// its users, and this propagation could increase the probability of finding +/// common subexpressions. If the COPY has only one user, the COPY itself can +/// be removed. +bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB) { bool Changed = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -124,28 +128,42 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (!MRI->hasOneNonDBGUse(Reg)) - // Only coalesce single use copies. This ensure the copy will be - // deleted. - continue; + bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (DefMI->getParent() != MBB) - continue; if (!DefMI->isCopy()) continue; unsigned SrcReg = DefMI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) continue; - if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + if (DefMI->getOperand(0).getSubReg()) + continue; + // FIXME: We should trivially coalesce subregister copies to expose CSE + // opportunities on instructions with truncated operands (see + // cse-add-with-overflow.ll). This can be done here as follows: + // if (SrcSubReg) + // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, + // SrcSubReg); + // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); + // + // The 2-addr pass has been updated to handle coalesced subregs. However, + // some machine-specific code still can't handle it. + // To handle it properly we also need a way find a constrained subregister + // class given a super-reg class and subreg index. + if (DefMI->getOperand(1).getSubReg()) continue; - if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (!MRI->constrainRegClass(SrcReg, RC)) continue; DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); + // Propagate SrcReg of copies to MI. MO.setReg(SrcReg); MRI->clearKillFlags(SrcReg); - DefMI->eraseFromParent(); - ++NumCoalesces; + // Coalesce single use copies. + if (OnlyOneUse) { + DefMI->eraseFromParent(); + ++NumCoalesces; + } Changed = true; } @@ -198,35 +216,58 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet &PhysRefs, - SmallVector &PhysDefs) const{ - MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + SmallVectorImpl &PhysDefs, + bool &PhysUseDef) const{ + // First, add all uses to PhysRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) + if (!MO.isReg() || MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; + // Reading constant physregs is ok. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + } + + // Next, collect all defs into PhysDefs. If any is already in PhysRefs + // (which currently contains only uses), set the PhysUseDef flag. + PhysUseDef = false; + MachineBasicBlock::const_iterator I = MI; I = std::next(I); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Check against PhysRefs even if the def is "dead". + if (PhysRefs.count(Reg)) + PhysUseDef = true; // If the def is dead, it's ok. But the def may not marked "dead". That's // common since this pass is run before livevariables. We can scan // forward a few instructions and check if it is obviously dead. - if (MO.isDef() && - (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) - continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - PhysRefs.insert(*AI); - if (MO.isDef()) + if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) PhysDefs.push_back(Reg); } + // Finally, add all defs to PhysRefs as well. + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) + for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + return !PhysRefs.empty(); } bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet &PhysRefs, - SmallVector &PhysDefs, + SmallVectorImpl &PhysDefs, bool &NonLocal) const { // For now conservatively returns false if the common subexpression is // not in the same basic block as the given instruction. The only exception @@ -240,14 +281,14 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, return false; for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { - if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i])) + if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) // Avoid extending live range of physical registers if they are //allocatable or reserved. return false; } CrossMBB = true; } - MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); + MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); MachineBasicBlock::const_iterator E = MI; MachineBasicBlock::const_iterator EE = CSMBB->end(); unsigned LookAheadLeft = LookAheadLimit; @@ -292,8 +333,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, } bool MachineCSE::isCSECandidate(MachineInstr *MI) { - if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) + if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || + MI->isInlineAsm() || MI->isDebugValue()) return false; // Ignore copies. @@ -331,15 +372,11 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, TargetRegisterInfo::isVirtualRegister(Reg)) { MayIncreasePressure = false; SmallPtrSet CSUses; - for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; - CSUses.insert(Use); + for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { + CSUses.insert(&MI); } - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; - if (!CSUses.count(Use)) { + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { + if (!CSUses.count(&MI)) { MayIncreasePressure = true; break; } @@ -350,7 +387,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. - if (MI->isAsCheapAsAMove()) { + if (TII->isAsCheapAsAMove(MI)) { MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); if (CSBB != BB && !CSBB->isSuccessor(BB)) @@ -370,11 +407,9 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, } if (!HasVRegUse) { bool HasNonCopyUse = false; - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { // Ignore copies. - if (!Use->isCopyLike()) { + if (!MI.isCopyLike()) { HasNonCopyUse = true; break; } @@ -387,11 +422,9 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // it unless the defined value is already used in the BB of the new use. bool HasPHI = false; SmallPtrSet CSBBs; - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; - HasPHI |= Use->isPHI(); - CSBBs.insert(Use->getParent()); + for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { + HasPHI |= MI.isPHI(); + CSBBs.insert(MI.getParent()); } if (!HasPHI) @@ -409,14 +442,15 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); DenseMap::iterator SI = ScopeMap.find(MBB); assert(SI != ScopeMap.end()); - ScopeMap.erase(SI); delete SI->second; + ScopeMap.erase(SI); } bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector, 8> CSEPairs; + SmallVector ImplicitDefsToUpdate; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -426,13 +460,15 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool FoundCSE = VNT.count(MI); if (!FoundCSE) { - // Look for trivial copy coalescing opportunities. - if (PerformTrivialCoalescing(MI, MBB)) { + // Using trivial copy propagation to find more CSE opportunities. + if (PerformTrivialCopyPropagation(MI, MBB)) { Changed = true; // After coalescing MI itself may become a copy. if (MI->isCopyLike()) continue; + + // Try again to see if CSE is possible. FoundCSE = VNT.count(MI); } } @@ -460,16 +496,22 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool CrossMBBPhysDef = false; SmallSet PhysRefs; SmallVector PhysDefs; - if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { + bool PhysUseDef = false; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, + PhysDefs, PhysUseDef)) { FoundCSE = false; // ... Unless the CS is local or is in the sole predecessor block // and it also defines the physical register which is not clobbered // in between and the physical register uses were not clobbered. - unsigned CSVN = VNT.lookup(MI); - MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) - FoundCSE = true; + // This can never be the case if the instruction both uses and + // defines the same physical register, which was detected above. + if (!PhysUseDef) { + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) + FoundCSE = true; + } } if (!FoundCSE) { @@ -486,15 +528,24 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Check if it's profitable to perform this CSE. bool DoCSE = true; - unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumDefs = MI->getDesc().getNumDefs() + + MI->getDesc().getNumImplicitDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned OldReg = MO.getReg(); unsigned NewReg = CSMI->getOperand(i).getReg(); - if (OldReg == NewReg) + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) + ImplicitDefsToUpdate.push_back(i); + if (OldReg == NewReg) { + --NumDefs; continue; + } assert(TargetRegisterInfo::isVirtualRegister(OldReg) && TargetRegisterInfo::isVirtualRegister(NewReg) && @@ -526,6 +577,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { MRI->clearKillFlags(CSEPairs[i].second); } + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) + CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + if (CrossMBBPhysDef) { // Add physical register defs now coming in from a predecessor to MBB // livein list. @@ -549,6 +605,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { Exps.push_back(MI); } CSEPairs.clear(); + ImplicitDefsToUpdate.clear(); } return Changed; @@ -612,12 +669,13 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { } bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + if (skipOptnoneFunction(*MF.getFunction())) + return false; + + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); AA = &getAnalysis(); DT = &getAnalysis(); - AllocatableRegs = TRI->getAllocatableSet(MF); - ReservedRegs = TRI->getReservedRegs(MF); return PerformCSE(DT->getRootNode()); }