#include "llvm/Function.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
+
+static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",
+ cl::init(false), cl::Hidden);
namespace {
class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
+
+ /// BackEdges - Keep a set of all the loop back edges.
+ ///
+ SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> BackEdges;
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
bool OptimizeBlock(BasicBlock &BB);
- bool OptimizeLoadStoreInst(Instruction *I, Value *Addr,
- const Type *AccessTy,
- DenseMap<Value*,Value*> &SunkAddrs);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy,
+ DenseMap<Value*,Value*> &SunkAddrs);
bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
DenseMap<Value*,Value*> &SunkAddrs);
bool OptimizeExtUses(Instruction *I);
+ void findLoopBackEdges(Function &F);
};
}
return new CodeGenPrepare(TLI);
}
+/// findLoopBackEdges - Do a DFS walk to find loop back edges.
+///
+void CodeGenPrepare::findLoopBackEdges(Function &F) {
+ SmallPtrSet<BasicBlock*, 8> Visited;
+ SmallVector<std::pair<BasicBlock*, succ_iterator>, 8> VisitStack;
+ SmallPtrSet<BasicBlock*, 8> InStack;
+
+ BasicBlock *BB = &F.getEntryBlock();
+ if (succ_begin(BB) == succ_end(BB))
+ return;
+ Visited.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ InStack.insert(BB);
+ do {
+ std::pair<BasicBlock*, succ_iterator> &Top = VisitStack.back();
+ BasicBlock *ParentBB = Top.first;
+ succ_iterator &I = Top.second;
+
+ bool FoundNew = false;
+ while (I != succ_end(ParentBB)) {
+ BB = *I++;
+ if (Visited.insert(BB)) {
+ FoundNew = true;
+ break;
+ }
+ // Successor is in VisitStack, it's a back edge.
+ if (InStack.count(BB))
+ BackEdges.insert(std::make_pair(ParentBB, BB));
+ }
+
+ if (FoundNew) {
+ // Go down one level if there is a unvisited successor.
+ InStack.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ } else {
+ // Go up one level.
+ std::pair<BasicBlock*, succ_iterator> &Pop = VisitStack.back();
+ InStack.erase(Pop.first);
+ VisitStack.pop_back();
+ }
+ } while (!VisitStack.empty());
+}
+
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+ // Now find loop back edges.
+ findLoopBackEdges(F);
+
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
return EverMadeChange;
}
-/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes
-/// and an unconditional branch. Passes before isel (e.g. LSR/loopsimplify)
-/// often split edges in ways that are non-optimal for isel. Start by
-/// eliminating these blocks so we can split them the way we want them.
+/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
+/// debug info directives, and an unconditional branch. Passes before isel
+/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
+/// isel. Start by eliminating these blocks so we can split them the way we
+/// want them.
bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
bool MadeChange = false;
// Note that this intentionally skips the entry block.
if (!BI || !BI->isUnconditional())
continue;
- // If the instruction before the branch isn't a phi node, then other stuff
- // is happening here.
+ // If the instruction before the branch (skipping debug info) isn't a phi
+ // node, then other stuff is happening here.
BasicBlock::iterator BBI = BI;
if (BBI != BB->begin()) {
--BBI;
- if (!isa<PHINode>(BBI)) continue;
+ while (isa<DbgInfoIntrinsic>(BBI)) {
+ if (BBI == BB->begin())
+ break;
+ --BBI;
+ }
+ if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+ continue;
}
// Do not break infinite loops.
// If the destination block has a single pred, then this is a trivial edge,
// just collapse it.
- if (DestBB->getSinglePredecessor()) {
- // If DestBB has single-entry PHI nodes, fold them.
- while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
- Value *NewVal = PN->getIncomingValue(0);
- // Replace self referencing PHI with undef, it must be dead.
- //if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
- PN->replaceAllUsesWith(NewVal);
- PN->eraseFromParent();
+ if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
+ if (SinglePred != DestBB) {
+ // Remember if SinglePred was the entry block of the function. If so, we
+ // will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(DestBB);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ DOUT << "AFTER:\n" << *DestBB << "\n\n\n";
+ return;
}
-
- // Splice all the PHI nodes from BB over to DestBB.
- DestBB->getInstList().splice(DestBB->begin(), BB->getInstList(),
- BB->begin(), BI);
-
- // Anything that branched to BB now branches to DestBB.
- BB->replaceAllUsesWith(DestBB);
-
- // Nuke BB.
- BB->eraseFromParent();
-
- DOUT << "AFTER:\n" << *DestBB << "\n\n\n";
- return;
}
// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
/// phi nodes (otherwise critical edges are ok). If there is already another
/// predecessor of the succ that is empty (and thus has no phi nodes), use it
/// instead of introducing a new block.
-static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) {
+static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
+ SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> &BackEdges,
+ Pass *P) {
BasicBlock *TIBB = TI->getParent();
BasicBlock *Dest = TI->getSuccessor(SuccNum);
assert(isa<PHINode>(Dest->begin()) &&
"This should only be called if Dest has a PHI!");
+ // Do not split edges to EH landing pads.
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) {
+ if (Invoke->getSuccessor(1) == Dest)
+ return;
+ }
+
// As a hack, never split backedges of loops. Even though the copy for any
// PHIs inserted on the backedge would be dead for exits from the loop, we
// assume that the cost of *splitting* the backedge would be too high.
- if (Dest == TIBB)
+ if (BackEdges.count(std::make_pair(TIBB, Dest)))
return;
- /// TIPHIValues - This array is lazily computed to determine the values of
- /// PHIs in Dest that TI would provide.
- SmallVector<Value*, 32> TIPHIValues;
+ if (!FactorCommonPreds) {
+ /// TIPHIValues - This array is lazily computed to determine the values of
+ /// PHIs in Dest that TI would provide.
+ SmallVector<Value*, 32> TIPHIValues;
+
+ // Check to see if Dest has any blocks that can be used as a split edge for
+ // this terminator.
+ for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ // To be usable, the pred has to end with an uncond branch to the dest.
+ BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
+ if (!PredBr || !PredBr->isUnconditional())
+ continue;
+ // Must be empty other than the branch and debug info.
+ BasicBlock::iterator I = Pred->begin();
+ while (isa<DbgInfoIntrinsic>(I))
+ I++;
+ if (dyn_cast<Instruction>(I) != PredBr)
+ continue;
+ // Cannot be the entry block; its label does not get emitted.
+ if (Pred == &(Dest->getParent()->getEntryBlock()))
+ continue;
- // Check to see if Dest has any blocks that can be used as a split edge for
- // this terminator.
- for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
- BasicBlock *Pred = *PI;
- // To be usable, the pred has to end with an uncond branch to the dest.
- BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
- if (!PredBr || !PredBr->isUnconditional() ||
- // Must be empty other than the branch.
- &Pred->front() != PredBr ||
- // Cannot be the entry block; its label does not get emitted.
- Pred == &(Dest->getParent()->getEntryBlock()))
- continue;
+ // Finally, since we know that Dest has phi nodes in it, we have to make
+ // sure that jumping to Pred will have the same effect as going to Dest in
+ // terms of PHI values.
+ PHINode *PN;
+ unsigned PHINo = 0;
+ bool FoundMatch = true;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
+ if (PHINo == TIPHIValues.size())
+ TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
+
+ // If the PHI entry doesn't work, we can't use this pred.
+ if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
+ FoundMatch = false;
+ break;
+ }
+ }
- // Finally, since we know that Dest has phi nodes in it, we have to make
- // sure that jumping to Pred will have the same affect as going to Dest in
- // terms of PHI values.
- PHINode *PN;
- unsigned PHINo = 0;
- bool FoundMatch = true;
- for (BasicBlock::iterator I = Dest->begin();
- (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
- if (PHINo == TIPHIValues.size())
- TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
-
- // If the PHI entry doesn't work, we can't use this pred.
- if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
- FoundMatch = false;
- break;
+ // If we found a workable predecessor, change TI to branch to Succ.
+ if (FoundMatch) {
+ Dest->removePredecessor(TIBB);
+ TI->setSuccessor(SuccNum, Pred);
+ return;
}
}
- // If we found a workable predecessor, change TI to branch to Succ.
- if (FoundMatch) {
- Dest->removePredecessor(TIBB);
- TI->setSuccessor(SuccNum, Pred);
- return;
+ SplitCriticalEdge(TI, SuccNum, P, true);
+ return;
+ }
+
+ PHINode *PN;
+ SmallVector<Value*, 8> TIPHIValues;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
+
+ SmallVector<BasicBlock*, 8> IdenticalPreds;
+ for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (BackEdges.count(std::make_pair(Pred, Dest)))
+ continue;
+ if (PI == TIBB)
+ IdenticalPreds.push_back(Pred);
+ else {
+ bool Identical = true;
+ unsigned PHINo = 0;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo)
+ if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
+ Identical = false;
+ break;
+ }
+ if (Identical)
+ IdenticalPreds.push_back(Pred);
}
}
- SplitCriticalEdge(TI, SuccNum, P, true);
+ assert(!IdenticalPreds.empty());
+ SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(),
+ ".critedge", P);
}
+
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
/// copy (e.g. it's casting from one pointer type to another, int->uint, or
/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
/// registers that must be created and coalesced.
///
/// Return true if any changes are made.
+///
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
// If this is a noop copy,
MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
// appropriate predecessor block.
BasicBlock *UserBB = User->getParent();
if (PHINode *PN = dyn_cast<PHINode>(User)) {
- unsigned OpVal = UI.getOperandNo()/2;
- UserBB = PN->getIncomingBlock(OpVal);
+ UserBB = PN->getIncomingBlock(UI);
}
// Preincrement use iterator so we don't invalidate it.
/// (PowerPC), where it might lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI){
-
+static bool OptimizeCmpExpression(CmpInst *CI) {
BasicBlock *DefBB = CI->getParent();
/// InsertedCmp - Only insert a cmp in each block once.
return MadeChange;
}
-/// EraseDeadInstructions - Erase any dead instructions
-static void EraseDeadInstructions(Value *V) {
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I || !I->use_empty()) return;
-
- SmallPtrSet<Instruction*, 16> Insts;
- Insts.insert(I);
-
- while (!Insts.empty()) {
- I = *Insts.begin();
- Insts.erase(I);
- if (isInstructionTriviallyDead(I)) {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i)))
- Insts.insert(U);
- I->eraseFromParent();
- }
- }
-}
-
-namespace {
-
-/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode which
-/// holds actual Value*'s for register values.
-struct ExtAddrMode : public TargetLowering::AddrMode {
- Value *BaseReg;
- Value *ScaledReg;
- ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
- void dump() const;
-};
-
-static std::ostream &operator<<(std::ostream &OS, const ExtAddrMode &AM) {
- bool NeedPlus = false;
- OS << "[";
- if (AM.BaseGV)
- OS << (NeedPlus ? " + " : "")
- << "GV:%" << AM.BaseGV->getName(), NeedPlus = true;
-
- if (AM.BaseOffs)
- OS << (NeedPlus ? " + " : "") << AM.BaseOffs, NeedPlus = true;
-
- if (AM.BaseReg)
- OS << (NeedPlus ? " + " : "")
- << "Base:%" << AM.BaseReg->getName(), NeedPlus = true;
- if (AM.Scale)
- OS << (NeedPlus ? " + " : "")
- << AM.Scale << "*%" << AM.ScaledReg->getName(), NeedPlus = true;
-
- return OS << "]";
-}
-
-void ExtAddrMode::dump() const {
- cerr << *this << "\n";
-}
-
-}
-
-static bool TryMatchingScaledValue(Value *ScaleReg, int64_t Scale,
- const Type *AccessTy, ExtAddrMode &AddrMode,
- SmallVector<Instruction*, 16> &AddrModeInsts,
- const TargetLowering &TLI, unsigned Depth);
-
-/// FindMaximalLegalAddressingMode - If we can, try to merge the computation of
-/// Addr into the specified addressing mode. If Addr can't be added to AddrMode
-/// this returns false. This assumes that Addr is either a pointer type or
-/// intptr_t for the target.
-static bool FindMaximalLegalAddressingMode(Value *Addr, const Type *AccessTy,
- ExtAddrMode &AddrMode,
- SmallVector<Instruction*, 16> &AddrModeInsts,
- const TargetLowering &TLI,
- unsigned Depth) {
-
- // If this is a global variable, fold it into the addressing mode if possible.
- if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
- if (AddrMode.BaseGV == 0) {
- AddrMode.BaseGV = GV;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
- return true;
- AddrMode.BaseGV = 0;
- }
- } else if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
- AddrMode.BaseOffs += CI->getSExtValue();
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
- return true;
- AddrMode.BaseOffs -= CI->getSExtValue();
- } else if (isa<ConstantPointerNull>(Addr)) {
- return true;
- }
-
- // Look through constant exprs and instructions.
- unsigned Opcode = ~0U;
- User *AddrInst = 0;
- if (Instruction *I = dyn_cast<Instruction>(Addr)) {
- Opcode = I->getOpcode();
- AddrInst = I;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
- Opcode = CE->getOpcode();
- AddrInst = CE;
- }
-
- // Limit recursion to avoid exponential behavior.
- if (Depth == 5) { AddrInst = 0; Opcode = ~0U; }
-
- // If this is really an instruction, add it to our list of related
- // instructions.
- if (Instruction *I = dyn_cast_or_null<Instruction>(AddrInst))
- AddrModeInsts.push_back(I);
-
- switch (Opcode) {
- case Instruction::PtrToInt:
- // PtrToInt is always a noop, as we know that the int type is pointer sized.
- if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy,
- AddrMode, AddrModeInsts, TLI, Depth))
- return true;
- break;
- case Instruction::IntToPtr:
- // This inttoptr is a no-op if the integer type is pointer sized.
- if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
- TLI.getPointerTy()) {
- if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy,
- AddrMode, AddrModeInsts, TLI, Depth))
- return true;
- }
- break;
- case Instruction::Add: {
- // Check to see if we can merge in the RHS then the LHS. If so, we win.
- ExtAddrMode BackupAddrMode = AddrMode;
- unsigned OldSize = AddrModeInsts.size();
- if (FindMaximalLegalAddressingMode(AddrInst->getOperand(1), AccessTy,
- AddrMode, AddrModeInsts, TLI, Depth+1) &&
- FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy,
- AddrMode, AddrModeInsts, TLI, Depth+1))
- return true;
-
- // Restore the old addr mode info.
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
-
- // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
- if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy,
- AddrMode, AddrModeInsts, TLI, Depth+1) &&
- FindMaximalLegalAddressingMode(AddrInst->getOperand(1), AccessTy,
- AddrMode, AddrModeInsts, TLI, Depth+1))
- return true;
-
- // Otherwise we definitely can't merge the ADD in.
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- break;
- }
- case Instruction::Or: {
- ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
- if (!RHS) break;
- // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
- break;
- }
- case Instruction::Mul:
- case Instruction::Shl: {
- // Can only handle X*C and X << C, and can only handle this when the scale
- // field is available.
- ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
- if (!RHS) break;
- int64_t Scale = RHS->getSExtValue();
- if (Opcode == Instruction::Shl)
- Scale = 1 << Scale;
-
- if (TryMatchingScaledValue(AddrInst->getOperand(0), Scale, AccessTy,
- AddrMode, AddrModeInsts, TLI, Depth))
- return true;
- break;
- }
- case Instruction::GetElementPtr: {
- // Scan the GEP. We check it if it contains constant offsets and at most
- // one variable offset.
- int VariableOperand = -1;
- unsigned VariableScale = 0;
-
- int64_t ConstantOffset = 0;
- const TargetData *TD = TLI.getTargetData();
- gep_type_iterator GTI = gep_type_begin(AddrInst);
- for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
- const StructLayout *SL = TD->getStructLayout(STy);
- unsigned Idx =
- cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
- ConstantOffset += SL->getElementOffset(Idx);
- } else {
- uint64_t TypeSize = TD->getABITypeSize(GTI.getIndexedType());
- if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
- ConstantOffset += CI->getSExtValue()*TypeSize;
- } else if (TypeSize) { // Scales of zero don't do anything.
- // We only allow one variable index at the moment.
- if (VariableOperand != -1) {
- VariableOperand = -2;
- break;
- }
-
- // Remember the variable index.
- VariableOperand = i;
- VariableScale = TypeSize;
- }
- }
- }
-
- // If the GEP had multiple variable indices, punt.
- if (VariableOperand == -2)
- break;
-
- // A common case is for the GEP to only do a constant offset. In this case,
- // just add it to the disp field and check validity.
- if (VariableOperand == -1) {
- AddrMode.BaseOffs += ConstantOffset;
- if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
- // Check to see if we can fold the base pointer in too.
- if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy,
- AddrMode, AddrModeInsts, TLI,
- Depth+1))
- return true;
- }
- AddrMode.BaseOffs -= ConstantOffset;
- } else {
- // Check that this has no base reg yet. If so, we won't have a place to
- // put the base of the GEP (assuming it is not a null ptr).
- bool SetBaseReg = false;
- if (AddrMode.HasBaseReg) {
- if (!isa<ConstantPointerNull>(AddrInst->getOperand(0)))
- break;
- } else {
- AddrMode.HasBaseReg = true;
- AddrMode.BaseReg = AddrInst->getOperand(0);
- SetBaseReg = true;
- }
-
- // See if the scale amount is valid for this target.
- AddrMode.BaseOffs += ConstantOffset;
- if (TryMatchingScaledValue(AddrInst->getOperand(VariableOperand),
- VariableScale, AccessTy, AddrMode,
- AddrModeInsts, TLI, Depth)) {
- if (!SetBaseReg) return true;
-
- // If this match succeeded, we know that we can form an address with the
- // GepBase as the basereg. See if we can match *more*.
- AddrMode.HasBaseReg = false;
- AddrMode.BaseReg = 0;
- if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy,
- AddrMode, AddrModeInsts, TLI,
- Depth+1))
- return true;
- // Strange, shouldn't happen. Restore the base reg and succeed the easy
- // way.
- AddrMode.HasBaseReg = true;
- AddrMode.BaseReg = AddrInst->getOperand(0);
- return true;
- }
-
- AddrMode.BaseOffs -= ConstantOffset;
- if (SetBaseReg) {
- AddrMode.HasBaseReg = false;
- AddrMode.BaseReg = 0;
- }
- }
- break;
- }
- }
-
- if (Instruction *I = dyn_cast_or_null<Instruction>(AddrInst)) {
- assert(AddrModeInsts.back() == I && "Stack imbalance"); I = I;
- AddrModeInsts.pop_back();
- }
-
- // Worse case, the target should support [reg] addressing modes. :)
- if (!AddrMode.HasBaseReg) {
- AddrMode.HasBaseReg = true;
- // Still check for legality in case the target supports [imm] but not [i+r].
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) {
- AddrMode.BaseReg = Addr;
- return true;
- }
- AddrMode.HasBaseReg = false;
- }
-
- // If the base register is already taken, see if we can do [r+r].
- if (AddrMode.Scale == 0) {
- AddrMode.Scale = 1;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) {
- AddrMode.ScaledReg = Addr;
- return true;
- }
- AddrMode.Scale = 0;
- }
- // Couldn't match.
- return false;
-}
-
-/// TryMatchingScaledValue - Try adding ScaleReg*Scale to the specified
-/// addressing mode. Return true if this addr mode is legal for the target,
-/// false if not.
-static bool TryMatchingScaledValue(Value *ScaleReg, int64_t Scale,
- const Type *AccessTy, ExtAddrMode &AddrMode,
- SmallVector<Instruction*, 16> &AddrModeInsts,
- const TargetLowering &TLI, unsigned Depth) {
- // If we already have a scale of this value, we can add to it, otherwise, we
- // need an available scale field.
- if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
- return false;
-
- ExtAddrMode InputAddrMode = AddrMode;
-
- // Add scale to turn X*4+X*3 -> X*7. This could also do things like
- // [A+B + A*7] -> [B+A*8].
- AddrMode.Scale += Scale;
- AddrMode.ScaledReg = ScaleReg;
-
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) {
- // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
- // to see if ScaleReg is actually X+C. If so, we can turn this into adding
- // X*Scale + C*Scale to addr mode.
- BinaryOperator *BinOp = dyn_cast<BinaryOperator>(ScaleReg);
- if (BinOp && BinOp->getOpcode() == Instruction::Add &&
- isa<ConstantInt>(BinOp->getOperand(1)) && InputAddrMode.ScaledReg ==0) {
-
- InputAddrMode.Scale = Scale;
- InputAddrMode.ScaledReg = BinOp->getOperand(0);
- InputAddrMode.BaseOffs +=
- cast<ConstantInt>(BinOp->getOperand(1))->getSExtValue()*Scale;
- if (TLI.isLegalAddressingMode(InputAddrMode, AccessTy)) {
- AddrModeInsts.push_back(BinOp);
- AddrMode = InputAddrMode;
- return true;
- }
- }
-
- // Otherwise, not (x+c)*scale, just return what we have.
- return true;
- }
-
- // Otherwise, back this attempt out.
- AddrMode.Scale -= Scale;
- if (AddrMode.Scale == 0) AddrMode.ScaledReg = 0;
-
- return false;
-}
+//===----------------------------------------------------------------------===//
+// Addressing Mode Analysis and Optimization
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Memory Optimization
+//===----------------------------------------------------------------------===//
/// IsNonLocalValue - Return true if the specified values are defined in a
/// different basic block than BB.
return false;
}
-/// OptimizeLoadStoreInst - Load and Store Instructions have often have
+/// OptimizeMemoryInst - Load and Store Instructions have often have
/// addressing modes that can do significant amounts of computation. As such,
/// instruction selection will try to get the load or store to do as much
/// computation as possible for the program. The problem is that isel can only
/// see within a single block. As such, we sink as much legal addressing mode
/// stuff into the block as possible.
-bool CodeGenPrepare::OptimizeLoadStoreInst(Instruction *LdStInst, Value *Addr,
- const Type *AccessTy,
- DenseMap<Value*,Value*> &SunkAddrs) {
+///
+/// This method is used to optimize both load/store and inline asms with memory
+/// operands.
+bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ const Type *AccessTy,
+ DenseMap<Value*,Value*> &SunkAddrs) {
// Figure out what addressing mode will be built up for this operation.
SmallVector<Instruction*, 16> AddrModeInsts;
- ExtAddrMode AddrMode;
- bool Success = FindMaximalLegalAddressingMode(Addr, AccessTy, AddrMode,
- AddrModeInsts, *TLI, 0);
- Success = Success; assert(Success && "Couldn't select *anything*?");
+ ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
+ AddrModeInsts, *TLI);
// Check to see if any of the instructions supersumed by this addr mode are
// non-local to I's BB.
bool AnyNonLocal = false;
for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
- if (IsNonLocalValue(AddrModeInsts[i], LdStInst->getParent())) {
+ if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
AnyNonLocal = true;
break;
}
// Insert this computation right after this user. Since our caller is
// scanning from the top of the BB to the bottom, reuse of the expr are
// guaranteed to happen later.
- BasicBlock::iterator InsertPt = LdStInst;
+ BasicBlock::iterator InsertPt = MemoryInst;
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
// computation.
Value *&SunkAddr = SunkAddrs[Addr];
if (SunkAddr) {
- DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << "\n");
+ DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst);
if (SunkAddr->getType() != Addr->getType())
SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
} else {
- DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << "\n");
+ DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst);
const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType();
Value *Result = 0;
SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
}
- LdStInst->replaceUsesOfWith(Addr, SunkAddr);
+ MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
if (Addr->use_empty())
- EraseDeadInstructions(Addr);
+ RecursivelyDeleteTriviallyDeadInstructions(Addr);
return true;
}
/// OptimizeInlineAsmInst - If there are any memory operands, use
-/// OptimizeLoadStoreInt to sink their address computing into the block when
+/// OptimizeMemoryInst to sink their address computing into the block when
/// possible / profitable.
bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
DenseMap<Value*,Value*> &SunkAddrs) {
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = OpInfo.CallOperandVal;
- MadeChange |= OptimizeLoadStoreInst(I, OpVal, OpVal->getType(),
- SunkAddrs);
+ MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
}
}
bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
bool MadeChange = false;
- // Split all critical edges where the dest block has a PHI and where the phi
- // has shared immediate operands.
+ // Split all critical edges where the dest block has a PHI.
TerminatorInst *BBTI = BB.getTerminator();
if (BBTI->getNumSuccessors() > 1) {
- for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i)
- if (isa<PHINode>(BBTI->getSuccessor(i)->begin()) &&
- isCriticalEdge(BBTI, i, true))
- SplitEdgeNicely(BBTI, i, this);
+ for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *SuccBB = BBTI->getSuccessor(i);
+ if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
+ SplitEdgeNicely(BBTI, i, BackEdges, this);
+ }
}
-
// Keep track of non-local addresses that have been sunk into this block.
// This allows us to avoid inserting duplicate code for blocks with multiple
// load/stores of the same address.
MadeChange |= OptimizeCmpExpression(CI);
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (TLI)
- MadeChange |= OptimizeLoadStoreInst(I, I->getOperand(0), LI->getType(),
- SunkAddrs);
+ MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
+ SunkAddrs);
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (TLI)
- MadeChange |= OptimizeLoadStoreInst(I, SI->getOperand(1),
- SI->getOperand(0)->getType(),
- SunkAddrs);
+ MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType(),
+ SunkAddrs);
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
if (GEPI->hasAllZeroIndices()) {
/// The GEP operand must be a pointer, so must its result -> BitCast
if (TLI && isa<InlineAsm>(CI->getCalledValue()))
if (const TargetAsmInfo *TAI =
TLI->getTargetMachine().getTargetAsmInfo()) {
- if (TAI->ExpandInlineAsm(CI))
+ if (TAI->ExpandInlineAsm(CI)) {
BBI = BB.begin();
- else
+ // Avoid processing instructions out of order, which could cause
+ // reuse before a value is defined.
+ SunkAddrs.clear();
+ } else
// Sink address computing for memory operands into the block.
MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
}