#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB);
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
- BasicBlock *PredBB);
+ const SmallVectorImpl<BasicBlock *> &PredBBs);
typedef SmallVectorImpl<std::pair<ConstantInt*,
BasicBlock*> > PredValueInfo;
bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
PredValueInfo &Result);
- bool ProcessThreadableEdges(Instruction *CondInst, BasicBlock *BB);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
- bool ProcessJumpOnPHI(PHINode *PN);
+ bool ProcessBranchOnPHI(PHINode *PN);
+ bool ProcessBranchOnXOR(BinaryOperator *BO);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
};
/// runOnFunction - Top level algorithm.
///
bool JumpThreading::runOnFunction(Function &F) {
- DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n");
+ DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
FindLoopHeaders(F);
- bool AnotherIteration = true, EverChanged = false;
- while (AnotherIteration) {
- AnotherIteration = false;
- bool Changed = false;
+ bool Changed, EverChanged = false;
+ do {
+ Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
BasicBlock *BB = I;
// Thread all of the branches we can over this block.
// edges which simplifies the CFG.
if (pred_begin(BB) == pred_end(BB) &&
BB != &BB->getParent()->getEntryBlock()) {
- DEBUG(errs() << " JT: Deleting dead block '" << BB->getName()
+ DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
<< "' with terminator: " << *BB->getTerminator() << '\n');
LoopHeaders.erase(BB);
DeleteDeadBlock(BB);
if (BBI->isTerminator()) {
// Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
// block, we have to make sure it isn't in the LoopHeaders set. We
- // reinsert afterward in the rare case when the block isn't deleted.
+ // reinsert afterward if needed.
bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+ BasicBlock *Succ = BI->getSuccessor(0);
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
Changed = true;
- else if (ErasedFromLoopHeaders)
+ // If we deleted BB and BB was the header of a loop, then the
+ // successor is now the header of the loop.
+ BB = Succ;
+ }
+
+ if (ErasedFromLoopHeaders)
LoopHeaders.insert(BB);
}
}
}
}
- AnotherIteration = Changed;
EverChanged |= Changed;
- }
+ } while (Changed);
LoopHeaders.clear();
return EverChanged;
/// predecessor based on its terminator.
//
if (LVI) {
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
else
InterestingVal = ConstantInt::getFalse(I->getContext());
- // Scan for the sentinel.
+ // Scan for the sentinel. If we find an undef, force it to the
+ // interesting value: x|undef -> true and x&undef -> false.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
- if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0)
+ if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
Result.push_back(LHSVals[i]);
+ Result.back().first = InterestingVal;
+ }
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
- if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0)
+ if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
Result.push_back(RHSVals[i]);
+ Result.back().first = InterestingVal;
+ }
return !Result.empty();
}
// Invert the known values.
for (unsigned i = 0, e = Result.size(); i != e; ++i)
- Result[i].first =
- cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+ if (Result[i].first)
+ Result[i].first =
+ cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
return true;
}
}
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
- if (Res == 0) continue;
+ if (Res == 0) {
+ if (!LVI || !isa<Constant>(RHS))
+ continue;
+
+ LazyValueInfo::Tristate
+ ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+ cast<Constant>(RHS), PredBB, BB);
+ if (ResT == LazyValueInfo::Unknown)
+ continue;
+ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+ }
if (isa<UndefValue>(Res))
Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
+ Cmp->getType()->isIntegerTy() && // Not vector compare.
(!isa<Instruction>(Cmp->getOperand(0)) ||
cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
- Constant *PredCst = LVI->getConstantOnEdge(Cmp->getOperand(0), *PI, BB);
- if (PredCst == 0)
+ LazyValueInfo::Tristate
+ Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, *PI, BB);
+ if (Res == LazyValueInfo::Unknown)
continue;
-
- // Constant fold the compare.
- Value *Res = SimplifyCmpInst(Cmp->getPredicate(), PredCst, RHSCst, TD);
- if (isa<ConstantInt>(Res) || isa<UndefValue>(Res))
- Result.push_back(std::make_pair(dyn_cast<ConstantInt>(Res), *PI));
+
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI));
}
return !Result.empty();
/// ProcessBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
bool JumpThreading::ProcessBlock(BasicBlock *BB) {
+ // If the block is trivially dead, just return and let the caller nuke it.
+ // This simplifies other transformations.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock())
+ return false;
+
// If this block has a single predecessor, and if that pred has a single
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// terminator to an unconditional branch. This can occur due to threading in
// other blocks.
if (isa<ConstantInt>(Condition)) {
- DEBUG(errs() << " In block '" << BB->getName()
+ DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding terminator: " << *BB->getTerminator() << '\n');
++NumFolds;
ConstantFoldTerminator(BB);
RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
}
- DEBUG(errs() << " In block '" << BB->getName()
+ DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding undef terminator: " << *BBTerm << '\n');
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
// br COND, BBX, BBY
// BBX:
// br COND, BBZ, BBW
- if (!Condition->hasOneUse() && // Multiple uses.
+ if (!LVI &&
+ !Condition->hasOneUse() && // Multiple uses.
(CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
if (isa<BranchInst>(BB->getTerminator())) {
}
// All the rest of our checks depend on the condition being an instruction.
- if (CondInst == 0)
+ if (CondInst == 0) {
+ // FIXME: Unify this with code below.
+ if (LVI && ProcessThreadableEdges(Condition, BB))
+ return true;
return false;
-
- // See if this is a phi node in the current block.
- if (PHINode *PN = dyn_cast<PHINode>(CondInst))
- if (PN->getParent() == BB)
- return ProcessJumpOnPHI(PN);
+ }
+
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
- if (!isa<PHINode>(CondCmp->getOperand(0)) ||
- cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB) {
+ if (!LVI &&
+ (!isa<PHINode>(CondCmp->getOperand(0)) ||
+ cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
// If we have a comparison, loop over the predecessors to see if there is
// a condition with a lexically identical value.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
// we see one, check to see if it's partially redundant. If so, insert a PHI
// which can then be used to thread the values.
//
- // This is particularly important because reg2mem inserts loads and stores all
- // over the place, and this blocks jump threading if we don't zap them.
Value *SimplifyValue = CondInst;
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
if (isa<Constant>(CondCmp->getOperand(1)))
SimplifyValue = CondCmp->getOperand(0);
+ // TODO: There are other places where load PRE would be profitable, such as
+ // more complex comparisons.
if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
if (SimplifyPartiallyRedundantLoad(LI))
return true;
if (ProcessThreadableEdges(CondInst, BB))
return true;
+ // If this is an otherwise-unfoldable branch on a phi node in the current
+ // block, see if we can simplify.
+ if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+ if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return ProcessBranchOnPHI(PN);
+
+
+ // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
+ if (CondInst->getOpcode() == Instruction::Xor &&
+ CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+
// TODO: If we have: "br (X > 0)" and we have a predecessor where we know
- // "(X == 4)" thread through this block.
+ // "(X == 4)", thread through this block.
return false;
}
else if (PredBI->getSuccessor(0) != BB)
BranchDir = false;
else {
- DEBUG(errs() << " In block '" << PredBB->getName()
+ DEBUG(dbgs() << " In block '" << PredBB->getName()
<< "' folding terminator: " << *PredBB->getTerminator() << '\n');
++NumFolds;
ConstantFoldTerminator(PredBB);
// If the dest block has one predecessor, just fix the branch condition to a
// constant and fold it.
if (BB->getSinglePredecessor()) {
- DEBUG(errs() << " In block '" << BB->getName()
+ DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding condition to '" << BranchDir << "': "
<< *BB->getTerminator() << '\n');
++NumFolds;
if (PredSI->getSuccessor(PredCase) != DestBB &&
DestSI->getSuccessor(i) != DestBB)
continue;
+
+ // Do not forward this if it already goes to this destination, this would
+ // be an infinite loop.
+ if (PredSI->getSuccessor(PredCase) == DestSucc)
+ continue;
// Otherwise, we're safe to make the change. Make sure that the edge from
// DestSI to DestSucc is not critical and has no PHI nodes.
- DEBUG(errs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI);
- DEBUG(errs() << "THROUGH: " << *DestSI);
+ DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI);
+ DEBUG(dbgs() << "THROUGH: " << *DestSI);
// If the destination has PHI nodes, just split the edge for updating
// simplicity.
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
- // FIXME: Could do PHI translation, that would be fun :)
+ // TODO: Could do simple PHI translation, that would be fun :)
if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
if (PtrOp->getParent() == LoadBB)
return false;
// the entry to its block.
BasicBlock::iterator BBIt = LI;
- if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB,
- BBIt, 6)) {
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
// If the value if the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
// Split them out to their own block.
UnavailablePred =
SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-split", this);
+ "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr",
+ Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LI->getAlignment(),
UnavailablePred->getTerminator());
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
return MostPopularDest;
}
-bool JumpThreading::ProcessThreadableEdges(Instruction *CondInst,
- BasicBlock *BB) {
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
// If threading this would thread across a loop header, don't even try to
// thread the edge.
if (LoopHeaders.count(BB))
return false;
SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
- if (!ComputeValueKnownInPredecessors(CondInst, BB, PredValues))
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
return false;
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
- DEBUG(errs() << "IN BB: " << *BB;
+ DEBUG(dbgs() << "IN BB: " << *BB;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
- errs() << " BB '" << BB->getName() << "': FOUND condition = ";
+ dbgs() << " BB '" << BB->getName() << "': FOUND condition = ";
if (PredValues[i].first)
- errs() << *PredValues[i].first;
+ dbgs() << *PredValues[i].first;
else
- errs() << "UNDEF";
- errs() << " for pred '" << PredValues[i].second->getName()
+ dbgs() << "UNDEF";
+ dbgs() << " for pred '" << PredValues[i].second->getName()
<< "'.\n";
});
return ThreadEdge(BB, PredsToFactor, MostPopularDest);
}
-/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
-/// the current block. See if there are any simplifications we can do based on
-/// inputs to the phi node.
+/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
+/// a PHI node in the current block. See if there are any simplifications we
+/// can do based on inputs to the phi node.
///
-bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
+bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent();
- // If any of the predecessor blocks end in an unconditional branch, we can
- // *duplicate* the jump into that block in order to further encourage jump
- // threading and to eliminate cases where we have branch on a phi of an icmp
- // (branch on icmp is much better).
-
- // We don't want to do this tranformation for switches, because we don't
- // really want to duplicate a switch.
- if (isa<SwitchInst>(BB->getTerminator()))
- return false;
+ // TODO: We could make use of this to do it once for blocks with common PHI
+ // values.
+ SmallVector<BasicBlock*, 1> PredBBs;
+ PredBBs.resize(1);
- // Look for unconditional branch predecessors.
+ // If any of the predecessor blocks end in an unconditional branch, we can
+ // *duplicate* the conditional branch into that block in order to further
+ // encourage jump threading and to eliminate cases where we have branch on a
+ // phi of an icmp (branch on icmp is much better).
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PredBB = PN->getIncomingBlock(i);
if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
- if (PredBr->isUnconditional() &&
- // Try to duplicate BB into PredBB.
- DuplicateCondBranchOnPHIIntoPred(BB, PredBB))
- return true;
+ if (PredBr->isUnconditional()) {
+ PredBBs[0] = PredBB;
+ // Try to duplicate BB into PredBB.
+ if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
+ return true;
+ }
}
return false;
}
+/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
+/// a xor instruction in the current block. See if there are any
+/// simplifications we can do based on inputs to the xor.
+///
+bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
+ BasicBlock *BB = BO->getParent();
+
+ // If either the LHS or RHS of the xor is a constant, don't do this
+ // optimization.
+ if (isa<ConstantInt>(BO->getOperand(0)) ||
+ isa<ConstantInt>(BO->getOperand(1)))
+ return false;
+
+ // If the first instruction in BB isn't a phi, we won't be able to infer
+ // anything special about any particular predecessor.
+ if (!isa<PHINode>(BB->front()))
+ return false;
+
+ // If we have a xor as the branch input to this block, and we know that the
+ // LHS or RHS of the xor in any predecessor is true/false, then we can clone
+ // the condition into the predecessor and fix that value to true, saving some
+ // logical ops on that path and encouraging other paths to simplify.
+ //
+ // This copies something like this:
+ //
+ // BB:
+ // %X = phi i1 [1], [%X']
+ // %Y = icmp eq i32 %A, %B
+ // %Z = xor i1 %X, %Y
+ // br i1 %Z, ...
+ //
+ // Into:
+ // BB':
+ // %Y = icmp ne i32 %A, %B
+ // br i1 %Z, ...
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues;
+ bool isLHS = true;
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
+ assert(XorOpValues.empty());
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
+ return false;
+ isLHS = false;
+ }
+
+ assert(!XorOpValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ // Scan the information to see which is most popular: true or false. The
+ // predecessors can be of the set true, false, or undef.
+ unsigned NumTrue = 0, NumFalse = 0;
+ for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+ if (!XorOpValues[i].first) continue; // Ignore undefs for the count.
+ if (XorOpValues[i].first->isZero())
+ ++NumFalse;
+ else
+ ++NumTrue;
+ }
+
+ // Determine which value to split on, true, false, or undef if neither.
+ ConstantInt *SplitVal = 0;
+ if (NumTrue > NumFalse)
+ SplitVal = ConstantInt::getTrue(BB->getContext());
+ else if (NumTrue != 0 || NumFalse != 0)
+ SplitVal = ConstantInt::getFalse(BB->getContext());
+
+ // Collect all of the blocks that this can be folded into so that we can
+ // factor this once and clone it once.
+ SmallVector<BasicBlock*, 8> BlocksToFoldInto;
+ for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+ if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue;
+
+ BlocksToFoldInto.push_back(XorOpValues[i].second);
+ }
+
+ // If we inferred a value for all of the predecessors, then duplication won't
+ // help us. However, we can just replace the LHS or RHS with the constant.
+ if (BlocksToFoldInto.size() ==
+ cast<PHINode>(BB->front()).getNumIncomingValues()) {
+ if (SplitVal == 0) {
+ // If all preds provide undef, just nuke the xor, because it is undef too.
+ BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
+ BO->eraseFromParent();
+ } else if (SplitVal->isZero()) {
+ // If all preds provide 0, replace the xor with the other input.
+ BO->replaceAllUsesWith(BO->getOperand(isLHS));
+ BO->eraseFromParent();
+ } else {
+ // If all preds provide 1, set the computed value to 1.
+ BO->setOperand(!isLHS, SplitVal);
+ }
+
+ return true;
+ }
+
+ // Try to duplicate BB into PredBB.
+ return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+}
+
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
- DEBUG(errs() << " Not threading across BB '" << BB->getName()
+ DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
<< "' - would thread to self!\n");
return false;
}
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
- DEBUG(errs() << " Not threading across loop header BB '" << BB->getName()
+ DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName()
<< "' to dest BB '" << SuccBB->getName()
<< "' - it might create an irreducible loop!\n");
return false;
unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
if (JumpThreadCost > Threshold) {
- DEBUG(errs() << " Not threading BB '" << BB->getName()
+ DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
return false;
}
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
- DEBUG(errs() << " Factoring out " << PredBBs.size()
+ DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
// And finally, do it!
- DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '"
+ DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
<< SuccBB->getName() << "' with cost: " << JumpThreadCost
<< ", across block:\n "
<< *BB << "\n");
if (UsesToRename.empty())
continue;
- DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- DEBUG(errs() << "\n");
+ DEBUG(dbgs() << "\n");
}
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- BI = NewBB->begin();
- for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
- Instruction *Inst = BI++;
-
- if (Value *V = SimplifyInstruction(Inst, TD)) {
- WeakVH BIHandle(BI);
- ReplaceAndSimplifyAllUses(Inst, V, TD);
- if (BIHandle == 0)
- BI = NewBB->begin();
- continue;
- }
-
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
- }
+ SimplifyInstructionsInBlock(NewBB, TD);
// Threaded an edge!
++NumThreads;
/// improves the odds that the branch will be on an analyzable instruction like
/// a compare.
bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
- BasicBlock *PredBB) {
+ const SmallVectorImpl<BasicBlock *> &PredBBs) {
+ assert(!PredBBs.empty() && "Can't handle an empty set");
+
// If BB is a loop header, then duplicating this block outside the loop would
// cause us to transform this into an irreducible loop, don't do this.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
- DEBUG(errs() << " Not duplicating loop header '" << BB->getName()
- << "' into predecessor block '" << PredBB->getName()
+ DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
+ << "' into predecessor block '" << PredBBs[0]->getName()
<< "' - it might create an irreducible loop!\n");
return false;
}
unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
if (DuplicationCost > Threshold) {
- DEBUG(errs() << " Not duplicating BB '" << BB->getName()
+ DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
return false;
}
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(dbgs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+ ".thr_comm", this);
+ }
+
// Okay, we decided to do this! Clone all the instructions in BB onto the end
// of PredBB.
- DEBUG(errs() << " Duplicating block '" << BB->getName() << "' into end of '"
+ DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
<< PredBB->getName() << "' to eliminate branch on phi. Cost: "
<< DuplicationCost << " block is:" << *BB << "\n");
+ // Unless PredBB ends with an unconditional branch, split the edge so that we
+ // can just clone the bits from BB into the end of the new PredBB.
+ BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
+
+ if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+ PredBB = SplitEdge(PredBB, BB, this);
+ OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+ }
+
// We are going to have to map operands from the original BB block into the
// PredBB block. Evaluate PHI nodes in BB.
DenseMap<Instruction*, Value*> ValueMapping;
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
- BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
-
// Clone the non-phi instructions of BB into PredBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; BI != BB->end(); ++BI) {
Instruction *New = BI->clone();
- New->setName(BI->getName());
- PredBB->getInstList().insert(OldPredBranch, New);
- ValueMapping[BI] = New;
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (I != ValueMapping.end())
New->setOperand(i, I->second);
}
+
+ // If this instruction can be simplified after the operands are updated,
+ // just use the simplified value instead. This frequently happens due to
+ // phi translation.
+ if (Value *IV = SimplifyInstruction(New, TD)) {
+ delete New;
+ ValueMapping[BI] = IV;
+ } else {
+ // Otherwise, insert the new instruction into the block.
+ New->setName(BI->getName());
+ PredBB->getInstList().insert(OldPredBranch, New);
+ ValueMapping[BI] = New;
+ }
}
// Check to see if the targets of the branch had PHI nodes. If so, we need to
if (UsesToRename.empty())
continue;
- DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- DEBUG(errs() << "\n");
+ DEBUG(dbgs() << "\n");
}
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge