// - Optimize Loads:
//
// Loads that can be folded into a later instruction. A load is foldable
-// if it loads to virtual registers and the virtual register defined has
+// if it loads to virtual registers and the virtual register defined has
// a single use.
//
// - Optimize Copies and Bitcast (more generally, target specific copies):
bool optimizeSelect(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
bool optimizeCondBranch(MachineInstr *MI);
- bool optimizeCopyOrBitcast(MachineInstr *MI);
bool optimizeCoalescableCopy(MachineInstr *MI);
bool optimizeUncoalescableCopy(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+
+ /// \brief If copy instruction \p MI is a virtual register copy, track it in
+ /// the set \p CopiedFromRegs and \p CopyMIs. If this virtual register was
+ /// previously seen as a copy, replace the uses of this copy with the
+ /// previously seen copy's destination register.
+ bool foldRedundantCopy(MachineInstr *MI,
+ SmallSet<unsigned, 4> &CopiedFromRegs,
+ DenseMap<unsigned, MachineInstr*> &CopyMIs);
+
bool isLoadFoldable(MachineInstr *MI,
SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
/// \brief Following the use-def chain, get the next available source
/// for the tracked value.
- /// \return A ValueTrackerResult containing the a set of registers
+ /// \return A ValueTrackerResult containing a set of registers
/// and sub registers with tracked values. A ValueTrackerResult with
/// an empty set of registers means no source was found.
ValueTrackerResult getNextSource();
return TII->optimizeCondBranch(MI);
}
-/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
-/// share the same register file.
-static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
- const TargetRegisterClass *DefRC,
- unsigned DefSubReg,
- const TargetRegisterClass *SrcRC,
- unsigned SrcSubReg) {
- // Same register class.
- if (DefRC == SrcRC)
- return true;
-
- // Both operands are sub registers. Check if they share a register class.
- unsigned SrcIdx, DefIdx;
- if (SrcSubReg && DefSubReg)
- return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
- SrcIdx, DefIdx) != nullptr;
- // At most one of the register is a sub register, make it Src to avoid
- // duplicating the test.
- if (!SrcSubReg) {
- std::swap(DefSubReg, SrcSubReg);
- std::swap(DefRC, SrcRC);
- }
-
- // One of the register is a sub register, check if we can get a superclass.
- if (SrcSubReg)
- return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
- // Plain copy.
- return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
-}
-
/// \brief Try to find the next source that share the same register file
/// for the value defined by \p Reg and \p SubReg.
/// When true is returned, the \p RewriteMap can be used by the client to
SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook;
TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg);
SrcToLook.push_back(CurSrcPair);
- bool ShouldRewrite = false;
- unsigned PHILimit = RewritePHILimit;
- while (!SrcToLook.empty() && PHILimit) {
+ unsigned PHICount = 0;
+ while (!SrcToLook.empty() && PHICount < RewritePHILimit) {
TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val();
// As explained above, do not handle physical registers
if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg))
ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
!DisableAdvCopyOpt, TII);
ValueTrackerResult Res;
+ bool ShouldRewrite = false;
do {
// Follow the chain of copies until we reach the top of the use-def chain
// a PHI instruction. Add the found PHI edges to be looked up further.
unsigned NumSrcs = Res.getNumSources();
if (NumSrcs > 1) {
- PHILimit--;
+ PHICount++;
for (unsigned i = 0; i < NumSrcs; ++i)
SrcToLook.push_back(TargetInstrInfo::RegSubRegPair(
Res.getSrcReg(i), Res.getSrcSubReg(i)));
return false;
const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
-
- // If this source does not incur a cross register bank copy, use it.
- ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, SubReg, SrcRC,
- CurSrcPair.SubReg);
+ ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
+ CurSrcPair.SubReg);
} while (!ShouldRewrite);
// Continue looking for new sources...
if (Res.isValid())
continue;
- if (!PHILimit) {
- DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
- return false;
- }
-
// Do not continue searching for a new source if the there's at least
// one use-def which cannot be rewritten.
if (!ShouldRewrite)
return false;
}
- // If we did not find a more suitable source, there is nothing to optimize.
- if (CurSrcPair.Reg == Reg)
+ if (PHICount >= RewritePHILimit) {
+ DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
return false;
+ }
- return true;
+ // If we did not find a more suitable source, there is nothing to optimize.
+ return CurSrcPair.Reg != Reg;
}
/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are
/// successfully traverse a PHI instruction and find suitable sources coming
/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
/// suitable to be used in a new COPY instruction.
-MachineInstr *
+static MachineInstr *
insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs,
MachineInstr *OrigPHI) {
/// This source defines the whole definition, i.e.,
/// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
///
- /// The second and subsequent calls will return false, has there is only one
+ /// The second and subsequent calls will return false, as there is only one
/// rewritable source.
///
/// \return True if a rewritable source has been found, false otherwise.
virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
unsigned &TrackReg,
unsigned &TrackSubReg) {
- // If CurrentSrcIdx == 1, this means this function has already been
- // called once. CopyLike has one defintiion and one argument, thus,
- // there is nothing else to rewrite.
+ // If CurrentSrcIdx == 1, this means this function has already been called
+ // once. CopyLike has one definition and one argument, thus, there is
+ // nothing else to rewrite.
if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
return false;
// This is the first call to getNextRewritableSource.
continue;
}
+ // TODO: Remove once multiple srcs w/ coalescable copies are supported.
if (!HandleMultipleSources)
break;
// Build the new PHI node and return its def register as the new source.
MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst());
MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI);
+ DEBUG(dbgs() << "-- getNewSource\n");
+ DEBUG(dbgs() << " Replacing: " << *OrigPHI);
+ DEBUG(dbgs() << " With: " << *NewPHI);
const MachineOperand &MODef = NewPHI->getOperand(0);
return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
if (Def.SubReg)
NewCopy->getOperand(0).setIsUndef();
+ DEBUG(dbgs() << "-- RewriteSource\n");
+ DEBUG(dbgs() << " Replacing: " << CopyLike);
+ DEBUG(dbgs() << " With: " << *NewCopy);
MRI.replaceRegWith(Def.Reg, NewVR);
MRI.clearKillFlags(NewVR);
// partial definition.
TrackReg = MODef.getReg();
if (MODef.getSubReg())
- // Bails if we have to compose sub-register indices.
+ // Bail if we have to compose sub-register indices.
return false;
TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
return true;
CurrentSrcIdx = 1;
const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
SrcReg = MOExtractedReg.getReg();
- // If we have to compose sub-register indices, bails out.
+ // If we have to compose sub-register indices, bail out.
if (MOExtractedReg.getSubReg())
return false;
}
const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
SrcReg = MOInsertedReg.getReg();
- // If we have to compose sub-register indices, bails out.
+ // If we have to compose sub-register indices, bail out.
if ((SrcSubReg = MOInsertedReg.getSubReg()))
return false;
const MachineOperand &MODef = CopyLike.getOperand(0);
TrackReg = MODef.getReg();
- // If we have to compose sub-registers, bails.
+ // If we have to compose sub-registers, bail.
return MODef.getSubReg() == 0;
}
/// the same register bank.
/// New copies issued by this optimization are register allocator
/// friendly. This optimization does not remove any copy as it may
-/// overconstraint the register allocator, but replaces some operands
+/// overconstrain the register allocator, but replaces some operands
/// when possible.
/// \pre isCoalescableCopy(*MI) is true.
/// \return True, when \p MI has been rewritten. False otherwise.
bool Changed = false;
// Get the right rewriter for the current copy.
std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
- // If none exists, bails out.
+ // If none exists, bail out.
if (!CpyRewriter)
return false;
// Rewrite each rewritable source.
TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg);
TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource(
MRI, TII, TrackPair, RewriteMap, false /* multiple sources */);
- if (SrcReg == NewSrc.Reg)
+ if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0)
continue;
// Rewrite source.
SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs;
// Get the right rewriter for the current copy.
std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
- // If none exists, bails out.
+ // If none exists, bail out.
if (!CpyRewriter)
return false;
return false;
}
+// FIXME: This is very simple and misses some cases which should be handled when
+// motivating examples are found.
+//
+// The copy rewriting logic should look at uses as well as defs and be able to
+// eliminate copies across blocks.
+//
+// Later copies that are subregister extracts will also not be eliminated since
+// only the first copy is considered.
+//
+// e.g.
+// %vreg1 = COPY %vreg0
+// %vreg2 = COPY %vreg0:sub1
+//
+// Should replace %vreg2 uses with %vreg1:sub1
+bool PeepholeOptimizer::foldRedundantCopy(
+ MachineInstr *MI,
+ SmallSet<unsigned, 4> &CopySrcRegs,
+ DenseMap<unsigned, MachineInstr *> &CopyMIs) {
+ assert(MI->isCopy());
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return false;
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ return false;
+
+ if (CopySrcRegs.insert(SrcReg).second) {
+ // First copy of this reg seen.
+ CopyMIs.insert(std::make_pair(SrcReg, MI));
+ return false;
+ }
+
+ MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
+
+ unsigned SrcSubReg = MI->getOperand(1).getSubReg();
+ unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
+
+ // Can't replace different subregister extracts.
+ if (SrcSubReg != PrevSrcSubReg)
+ return false;
+
+ unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
+
+ // Only replace if the copy register class is the same.
+ //
+ // TODO: If we have multiple copies to different register classes, we may want
+ // to track multiple copies of the same source register.
+ if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
+ return false;
+
+ MRI->replaceRegWith(DstReg, PrevDstReg);
+
+ // Lifetime of the previous copy has been extended.
+ MRI->clearKillFlags(PrevDstReg);
+ return true;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipOptnoneFunction(*MF.getFunction()))
return false;
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
+ // Set of virtual registers that are copied from.
+ SmallSet<unsigned, 4> CopySrcRegs;
+ DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+
for (MachineBasicBlock::iterator
MII = I->begin(), MIE = I->end(); MII != MIE; ) {
MachineInstr *MI = &*MII;
if (MI->isDebugValue())
continue;
- // If there exists an instruction which belongs to the following
- // categories, we will discard the load candidates.
+ // If we run into an instruction we can't fold across, discard
+ // the load candidates.
+ if (MI->isLoadFoldBarrier())
+ FoldAsLoadDefCandidates.clear();
+
if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() ||
- MI->hasUnmodeledSideEffects()) {
- FoldAsLoadDefCandidates.clear();
+ MI->hasUnmodeledSideEffects())
continue;
- }
- if (MI->mayStore() || MI->isCall())
- FoldAsLoadDefCandidates.clear();
if ((isUncoalescableCopy(*MI) &&
optimizeUncoalescableCopy(MI, LocalMIs)) ||
continue;
}
+ if (MI->isCopy() && foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs)) {
+ LocalMIs.erase(MI);
+ MI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
SeenMoveImm = true;
} else {
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of src.
- // Bails as we do not support composing subreg yet.
+ // Bails as we do not support composing subregs yet.
return ValueTrackerResult();
// Otherwise, we want the whole source.
const MachineOperand &Src = Def->getOperand(1);
return ValueTrackerResult();
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of the src.
- // Bails as we do not support composing subreg yet.
+ // Bails as we do not support composing subregs yet.
return ValueTrackerResult();
unsigned SrcIdx = Def->getNumOperands();
"Invalid definition");
if (Def->getOperand(DefIdx).getSubReg())
- // If we are composing subreg, bails out.
+ // If we are composing subregs, bail out.
// The case we are checking is Def.<subreg> = REG_SEQUENCE.
// This should almost never happen as the SSA property is tracked at
// the register level (as opposed to the subreg level).
for (auto &RegSeqInput : RegSeqInputRegs) {
if (RegSeqInput.SubIdx == DefSubReg) {
if (RegSeqInput.SubReg)
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
return ValueTrackerResult();
return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
"Invalid definition");
if (Def->getOperand(DefIdx).getSubReg())
- // If we are composing subreg, bails out.
+ // If we are composing subreg, bail out.
// Same remark as getNextSourceFromRegSequence.
// I.e., this may be turned into an assert.
return ValueTrackerResult();
const MachineOperand &MODef = Def->getOperand(DefIdx);
// If the result register (Def) and the base register (v0) do not
// have the same register class or if we have to compose
- // subregisters, bails out.
+ // subregisters, bail out.
if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
BaseReg.SubReg)
return ValueTrackerResult();
// We are looking at:
// Def = EXTRACT_SUBREG v0, sub0
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
// Indeed, if DefSubReg != 0, we would have to compose it with sub0.
if (DefSubReg)
return ValueTrackerResult();
if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
return ValueTrackerResult();
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
// Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
if (ExtractSubregInputReg.SubReg)
return ValueTrackerResult();
// We are looking at:
// Def = SUBREG_TO_REG Imm, v0, sub0
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
// If DefSubReg != sub0, we would have to check that all the bits
// we track are included in sub0 and if yes, we would have to
// determine the right subreg in v0.
if (DefSubReg != Def->getOperand(3).getImm())
return ValueTrackerResult();
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
// Likewise, if v0.subreg != 0, we would have to compose it with sub0.
if (Def->getOperand(2).getSubReg())
return ValueTrackerResult();
assert(Def->isPHI() && "Invalid definition");
ValueTrackerResult Res;
- // If we look for a different subreg, bails as we do not
- // support composing subreg yet.
+ // If we look for a different subreg, bail as we do not support composing
+ // subregs yet.
if (Def->getOperand(0).getSubReg() != DefSubReg)
return ValueTrackerResult();
if (Def->isBitcast())
return getNextSourceFromBitcast();
// All the remaining cases involve "complex" instructions.
- // Bails if we did not ask for the advanced tracking.
+ // Bail if we did not ask for the advanced tracking.
if (!UseAdvancedTracking)
return ValueTrackerResult();
if (Def->isRegSequence() || Def->isRegSequenceLike())
Res.setInst(Def);
// If we can still move up in the use-def chain, move to the next
- // defintion.
+ // definition.
if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
Def = MRI.getVRegDef(Reg);
DefIdx = MRI.def_begin(Reg).getOperandNo();