X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FBranchFolding.cpp;h=2e978b333e98ec33f86b75d7ac8e21eba2883065;hb=dc54f8ebe288c7313e22902d4bdfe030e479db48;hp=6fc077bc1287fa5b04fec73c5ac76c1a501882f5;hpb=30adced41b17cc9a5c301386e80567faccaff04f;p=oota-llvm.git diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 6fc077bc128..2e978b333e9 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -61,29 +62,33 @@ TailMergeSize("tail-merge-size", namespace { /// BranchFolderPass - Wrap branch folder in a machine function pass. - class BranchFolderPass : public MachineFunctionPass, - public BranchFolder { + class BranchFolderPass : public MachineFunctionPass { public: static char ID; - explicit BranchFolderPass(bool defaultEnableTailMerge) - : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {} + explicit BranchFolderPass(): MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Control Flow Optimizer"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; } char BranchFolderPass::ID = 0; +char &llvm::BranchFolderPassID = BranchFolderPass::ID; -FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { - return new BranchFolderPass(DefaultEnableTailMerge); -} +INITIALIZE_PASS(BranchFolderPass, "branch-folder", + "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { - return OptimizeFunction(MF, - MF.getTarget().getInstrInfo(), - MF.getTarget().getRegisterInfo(), - getAnalysisIfAvailable()); + TargetPassConfig *PassConfig = &getAnalysis(); + BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true); + return Folder.OptimizeFunction(MF, + MF.getTarget().getInstrInfo(), + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable()); } @@ -108,6 +113,9 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_end()-1); + // Avoid matching if this pointer gets reused. + TriedMerging.erase(MBB); + // Remove the block. MF->erase(MBB); } @@ -129,7 +137,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { break; unsigned Reg = I->getOperand(0).getReg(); ImpDefRegs.insert(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) ImpDefRegs.insert(SubReg); ++I; @@ -171,11 +179,19 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MachineModuleInfo *mmi) { if (!tii) return false; + TriedMerging.clear(); + TII = tii; TRI = tri; MMI = mmi; + RS = NULL; - RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + // Use a RegScavenger to help update liveness when required. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) + RS = new RegScavenger(); + else + MRI.invalidateLiveness(); // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; @@ -203,7 +219,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, delete RS; return MadeChange; } - + // Walk the function to find jump tables that are live. BitVector JTIsLive(JTI->getJumpTables().size()); for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); @@ -361,11 +377,31 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, return TailLen; } +void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB) { + if (RS) { + RS->enterBasicBlock(CurMBB); + if (!CurMBB->empty()) + RS->forward(prior(CurMBB->end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); + } +} + /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything /// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { + MachineBasicBlock *CurMBB = OldInst->getParent(); + TII->ReplaceTailWithBranchTo(OldInst, NewDest); + + // For targets that use the register scavenger, we must maintain LiveIns. + MaintainLiveIns(CurMBB, NewDest); + ++NumTailMerge; } @@ -394,16 +430,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); // For targets that use the register scavenger, we must maintain LiveIns. - if (RS) { - RS->enterBasicBlock(&CurMBB); - if (!CurMBB.empty()) - RS->forward(prior(CurMBB.end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) - if (RegsLiveAtExit[i]) - NewMBB->addLiveIn(i); - } + MaintainLiveIns(&CurMBB, NewMBB); return NewMBB; } @@ -416,10 +443,9 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, for (; I != E; ++I) { if (I->isDebugValue()) continue; - const TargetInstrDesc &TID = I->getDesc(); - if (TID.isCall()) + if (I->isCall()) Time += 10; - else if (TID.mayLoad() || TID.mayStore()) + else if (I->mayLoad() || I->mayStore()) Time += 2; else ++Time; @@ -468,8 +494,9 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { // an object with itself. #ifndef _GLIBCXX_DEBUG llvm_unreachable("Predecessor appears twice"); -#endif +#else return false; +#endif } } @@ -486,7 +513,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, break; } --I; - if (!I->getDesc().isTerminator()) break; + if (!I->isTerminator()) break; ++NumTerms; } return NumTerms; @@ -534,8 +561,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // heuristics. unsigned EffectiveTailLen = CommonTailLen; if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && - !MBB1->back().getDesc().isBarrier() && - !MBB2->back().getDesc().isBarrier()) + !MBB1->back().isBarrier() && + !MBB2->back().isBarrier()) ++EffectiveTailLen; // Check if the common tail is long enough to be worthwhile. @@ -792,21 +819,27 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, } bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { - - if (!EnableTailMerge) return false; - bool MadeChange = false; + if (!EnableTailMerge) return MadeChange; // First find blocks with no successors. MergePotentials.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E && MergePotentials.size() < TailMergeThreshold; ++I) { + if (TriedMerging.count(I)) + continue; if (I->succ_empty()) MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + // See if we can do any tail merging on those. - if (MergePotentials.size() < TailMergeThreshold && - MergePotentials.size() >= 2) + if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(NULL, NULL); // Look at blocks (IBB) with multiple predecessors (PBB). @@ -830,77 +863,97 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) { - if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { - SmallPtrSet UniquePreds; - MachineBasicBlock *IBB = I; - MachineBasicBlock *PredBB = prior(I); - MergePotentials.clear(); - for (MachineBasicBlock::pred_iterator P = I->pred_begin(), - E2 = I->pred_end(); - P != E2; ++P) { - MachineBasicBlock *PBB = *P; - // Skip blocks that loop to themselves, can't tail merge these. - if (PBB == IBB) - continue; - // Visit each predecessor only once. - if (!UniquePreds.insert(PBB)) - continue; - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { - // Failing case: IBB is the target of a cbr, and - // we cannot reverse the branch. - SmallVector NewCond(Cond); - if (!Cond.empty() && TBB == IBB) { - if (TII->ReverseBranchCondition(NewCond)) + if (I->pred_size() < 2) continue; + SmallPtrSet UniquePreds; + MachineBasicBlock *IBB = I; + MachineBasicBlock *PredBB = prior(I); + MergePotentials.clear(); + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + E2 = I->pred_end(); + P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { + MachineBasicBlock *PBB = *P; + if (TriedMerging.count(PBB)) + continue; + + // Skip blocks that loop to themselves, can't tail merge these. + if (PBB == IBB) + continue; + + // Visit each predecessor only once. + if (!UniquePreds.insert(PBB)) + continue; + + // Skip blocks which may jump to a landing pad. Can't tail merge these. + if (PBB->getLandingPadSuccessor()) + continue; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { + // Failing case: IBB is the target of a cbr, and we cannot reverse the + // branch. + SmallVector NewCond(Cond); + if (!Cond.empty() && TBB == IBB) { + if (TII->ReverseBranchCondition(NewCond)) + continue; + // This is the QBB case described above + if (!FBB) + FBB = llvm::next(MachineFunction::iterator(PBB)); + } + + // Failing case: the only way IBB can be reached from PBB is via + // exception handling. Happens for landing pads. Would be nice to have + // a bit in the edge so we didn't have to do all this. + if (IBB->isLandingPad()) { + MachineFunction::iterator IP = PBB; IP++; + MachineBasicBlock *PredNextBB = NULL; + if (IP != MF.end()) + PredNextBB = IP; + if (TBB == NULL) { + if (IBB != PredNextBB) // fallthrough + continue; + } else if (FBB) { + if (TBB != IBB && FBB != IBB) // cbr then ubr + continue; + } else if (Cond.empty()) { + if (TBB != IBB) // ubr + continue; + } else { + if (TBB != IBB && IBB != PredNextBB) // cbr continue; - // This is the QBB case described above - if (!FBB) - FBB = llvm::next(MachineFunction::iterator(PBB)); - } - // Failing case: the only way IBB can be reached from PBB is via - // exception handling. Happens for landing pads. Would be nice - // to have a bit in the edge so we didn't have to do all this. - if (IBB->isLandingPad()) { - MachineFunction::iterator IP = PBB; IP++; - MachineBasicBlock *PredNextBB = NULL; - if (IP != MF.end()) - PredNextBB = IP; - if (TBB == NULL) { - if (IBB != PredNextBB) // fallthrough - continue; - } else if (FBB) { - if (TBB != IBB && FBB != IBB) // cbr then ubr - continue; - } else if (Cond.empty()) { - if (TBB != IBB) // ubr - continue; - } else { - if (TBB != IBB && IBB != PredNextBB) // cbr - continue; - } - } - // Remove the unconditional branch at the end, if any. - if (TBB && (Cond.empty() || FBB)) { - DebugLoc dl; // FIXME: this is nowhere - TII->RemoveBranch(*PBB); - if (!Cond.empty()) - // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } + + // Remove the unconditional branch at the end, if any. + if (TBB && (Cond.empty() || FBB)) { + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PBB); + if (!Cond.empty()) + // reinsert conditional branch only, for now + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); + } + + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } - if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(IBB, PredBB); - // Reinsert an unconditional branch if needed. - // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks. - PredBB = prior(I); // this may have been changed in TryTailMergeBlocks - if (MergePotentials.size() == 1 && - MergePotentials.begin()->getBlock() != PredBB) - FixTail(MergePotentials.begin()->getBlock(), IBB, TII); } + + // If this is a large problem, avoid visiting the same basic blocks multiple + // times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(IBB, PredBB); + + // Reinsert an unconditional branch if needed. The 1 below can occur as a + // result of removing blocks in TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + if (MergePotentials.size() == 1 && + MergePotentials.begin()->getBlock() != PredBB) + FixTail(MergePotentials.begin()->getBlock(), IBB, TII); } + return MadeChange; } @@ -950,7 +1003,7 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { if (!MBBI->isDebugValue()) break; } - return (MBBI->getDesc().isBranch()); + return (MBBI->isBranch()); } /// IsBetterFallthrough - Return true if it would be clearly better to @@ -978,7 +1031,23 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock::iterator MBB2I = --MBB2->end(); while (MBB2I->isDebugValue()) --MBB2I; - return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); + return MBB2I->isCall() && !MBB1I->isCall(); +} + +/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch +/// instructions on the block. Always use the DebugLoc of the first +/// branching instruction found unless its absent, in which case use the +/// DebugLoc of the second if present. +static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return DebugLoc(); + --I; + while (I->isDebugValue() && I != MBB.begin()) + --I; + if (I->isBranch()) + return I->getDebugLoc(); + return DebugLoc(); } /// OptimizeBlock - Analyze and optimize control flow related to the specified @@ -986,7 +1055,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; MachineFunction &MF = *MBB->getParent(); - DebugLoc dl; // FIXME: this is nowhere ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB; @@ -1035,6 +1103,7 @@ ReoptimizeBlock: // destination, remove the branch, replacing it with an unconditional one or // a fall-through. if (PriorTBB && PriorTBB == PriorFBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) @@ -1056,8 +1125,24 @@ ReoptimizeBlock: !MBB->hasAddressTaken() && !MBB->isLandingPad()) { DEBUG(dbgs() << "\nMerging into block: " << PrevBB << "From MBB: " << *MBB); + // Remove redundant DBG_VALUEs first. + if (PrevBB.begin() != PrevBB.end()) { + MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); + --PrevBBIter; + MachineBasicBlock::iterator MBBIter = MBB->begin(); + // Check if DBG_VALUE at the end of PrevBB is identical to the + // DBG_VALUE at the beginning of MBB. + while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end() + && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) { + if (!MBBIter->isIdenticalTo(PrevBBIter)) + break; + MachineInstr *DuplicateDbg = MBBIter; + ++MBBIter; -- PrevBBIter; + DuplicateDbg->eraseFromParent(); + } + } PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); - PrevBB.removeSuccessor(PrevBB.succ_begin());; + PrevBB.removeSuccessor(PrevBB.succ_begin()); assert(PrevBB.succ_empty()); PrevBB.transferSuccessors(MBB); MadeChange = true; @@ -1076,6 +1161,7 @@ ReoptimizeBlock: // If the prior block branches somewhere else on the condition and here if // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; @@ -1089,6 +1175,7 @@ ReoptimizeBlock: if (PriorTBB == MBB) { SmallVector NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); MadeChange = true; @@ -1126,6 +1213,7 @@ ReoptimizeBlock: DEBUG(dbgs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); @@ -1155,6 +1243,7 @@ ReoptimizeBlock: if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { SmallVector NewCond(CurCond); if (!TII->ReverseBranchCondition(NewCond)) { + DebugLoc dl = getBranchDebugLoc(*MBB); TII->RemoveBranch(*MBB); TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); MadeChange = true; @@ -1168,6 +1257,7 @@ ReoptimizeBlock: if (CurTBB && CurCond.empty() && CurFBB == 0 && IsBranchOnlyBlock(MBB) && CurTBB != MBB && !MBB->hasAddressTaken()) { + DebugLoc dl = getBranchDebugLoc(*MBB); // This block may contain just an unconditional branch. Because there can // be 'non-branch terminators' in the block, try removing the branch and // then seeing if the block is empty. @@ -1210,8 +1300,9 @@ ReoptimizeBlock: assert(PriorFBB == 0 && "Machine CFG out of date!"); PriorFBB = MBB; } + DebugLoc pdl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1235,9 +1326,10 @@ ReoptimizeBlock: bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + DebugLoc pdl = getBranchDebugLoc(*PMBB); TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); @@ -1297,7 +1389,7 @@ ReoptimizeBlock: if (CurFallsThru) { MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); } MBB->moveAfter(PredBB); MadeChange = true; @@ -1351,13 +1443,7 @@ ReoptimizeBlock: /// HoistCommonCode - Hoist common instruction sequences at the start of basic /// blocks to their common predecessor. -/// NOTE: This optimization does not update live-in information so it must be -/// run after all passes that require correct liveness information. bool BranchFolder::HoistCommonCode(MachineFunction &MF) { -#if 1 - // FIXME: Temporarily disabled. - return false; -#endif bool MadeChange = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; @@ -1406,7 +1492,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, continue; if (MO.isUse()) { Uses.insert(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) Uses.insert(*AS); } else if (!MO.isDead()) // Don't try to hoist code in the rare case the terminator defines a @@ -1429,6 +1515,9 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, bool IsDef = false; for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) { const MachineOperand &MO = PI->getOperand(i); + // If PI has a regmask operand, it is probably a call. Separate away. + if (MO.isRegMask()) + return Loc; if (!MO.isReg() || MO.isUse()) continue; unsigned Reg = MO.getReg(); @@ -1465,17 +1554,17 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, continue; if (MO.isUse()) { Uses.insert(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) Uses.insert(*AS); } else { if (Uses.count(Reg)) { Uses.erase(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) Uses.erase(*SR); // Use getSubRegisters to be conservative - Defs.insert(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Defs.insert(*AS); } + Defs.insert(Reg); + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Defs.insert(*AS); } } @@ -1511,7 +1600,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { return false; bool HasDups = false; - SmallSet LocalDefs; + SmallVector LocalDefs; + SmallSet LocalDefsSet; MachineBasicBlock::iterator TIB = TBB->begin(); MachineBasicBlock::iterator FIB = FBB->begin(); MachineBasicBlock::iterator TIE = TBB->end(); @@ -1540,6 +1630,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { bool IsSafe = true; for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { MachineOperand &MO = TIB->getOperand(i); + // Don't attempt to hoist instructions with register masks. + if (MO.isRegMask()) { + IsSafe = false; + break; + } if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -1568,16 +1663,17 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { IsSafe = false; break; } - - LocalDefs.insert(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - LocalDefs.insert(*SR); - } else if (!LocalDefs.count(Reg)) { + } else if (!LocalDefsSet.count(Reg)) { if (Defs.count(Reg)) { // Use is defined by the instruction at the point of insertion. IsSafe = false; break; } + + if (MO.isKill() && Uses.count(Reg)) + // Kills a register that's read by the instruction at the point of + // insertion. Remove the kill marker. + MO.setIsKill(false); } } if (!IsSafe) @@ -1587,7 +1683,32 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore)) break; - HasDups = true;; + // Remove kills from LocalDefsSet, these registers had short live ranges. + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || !LocalDefsSet.count(Reg)) + continue; + for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) + LocalDefsSet.erase(*OR); + } + + // Track local defs so we can update liveins. + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.isDead()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + LocalDefs.push_back(Reg); + for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) + LocalDefsSet.insert(*OR); + } + + HasDups = true; ++TIB; ++FIB; } @@ -1597,6 +1718,16 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { MBB->splice(Loc, TBB, TBB->begin(), TIB); FBB->erase(FBB->begin(), FIB); + + // Update livein's. + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Def = LocalDefs[i]; + if (LocalDefsSet.count(Def)) { + TBB->addLiveIn(Def); + FBB->addLiveIn(Def); + } + } + ++NumHoist; return true; }