//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Chris Lattner and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
using namespace llvm;
+namespace {
+ cl::opt<bool> OptExtUses("optimize-ext-uses",
+ cl::init(true), cl::Hidden);
+}
+
namespace {
class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
public:
- static char ID; // Pass identifcation, replacement for typeid
- CodeGenPrepare(const TargetLowering *tli = 0) : FunctionPass((intptr_t)&ID),
- TLI(tli) {}
+ static char ID; // Pass identification, replacement for typeid
+ explicit CodeGenPrepare(const TargetLowering *tli = 0)
+ : FunctionPass((intptr_t)&ID), TLI(tli) {}
bool runOnFunction(Function &F);
private:
bool OptimizeLoadStoreInst(Instruction *I, Value *Addr,
const Type *AccessTy,
DenseMap<Value*,Value*> &SunkAddrs);
+ bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
+ DenseMap<Value*,Value*> &SunkAddrs);
+ bool OptimizeExtUses(Instruction *I);
};
}
if (!DestBBPN) return true; // no conflict.
// Collect the preds of BB.
- SmallPtrSet<BasicBlock*, 16> BBPreds;
+ SmallPtrSet<const BasicBlock*, 16> BBPreds;
if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
// It is faster to get preds from a PHI than with pred_iterator.
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
}
-/// SplitEdgeNicely - Split the critical edge from TI to it's specified
+/// SplitEdgeNicely - Split the critical edge from TI to its specified
/// successor if it will improve codegen. We only do this if the successor has
/// phi nodes (otherwise critical edges are ok). If there is already another
/// predecessor of the succ that is empty (and thus has no phi nodes), use it
assert(isa<PHINode>(Dest->begin()) &&
"This should only be called if Dest has a PHI!");
+ // As a hack, never split backedges of loops. Even though the copy for any
+ // PHIs inserted on the backedge would be dead for exits from the loop, we
+ // assume that the cost of *splitting* the backedge would be too high.
+ if (Dest == TIBB)
+ return;
+
/// TIPHIValues - This array is lazily computed to determine the values of
/// PHIs in Dest that TI would provide.
- std::vector<Value*> TIPHIValues;
+ SmallVector<Value*, 32> TIPHIValues;
// Check to see if Dest has any blocks that can be used as a split edge for
// this terminator.
BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
if (!PredBr || !PredBr->isUnconditional() ||
// Must be empty other than the branch.
- &Pred->front() != PredBr)
+ &Pred->front() != PredBr ||
+ // Cannot be the entry block; its label does not get emitted.
+ Pred == &(Dest->getParent()->getEntryBlock()))
continue;
// Finally, since we know that Dest has phi nodes in it, we have to make
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
/// copy (e.g. it's casting from one pointer type to another, int->uint, or
/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
-/// registers that must be created and coallesced.
+/// registers that must be created and coalesced.
///
/// Return true if any changes are made.
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
BasicBlock *DefBB = CI->getParent();
/// InsertedCasts - Only insert a cast in each block once.
- std::map<BasicBlock*, CastInst*> InsertedCasts;
+ DenseMap<BasicBlock*, CastInst*> InsertedCasts;
bool MadeChange = false;
for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
MadeChange = true;
}
- // Replace a use of the cast with a use of the new casat.
+ // Replace a use of the cast with a use of the new cast.
TheUse = InsertedCast;
}
// If we removed all uses, nuke the cast.
+ if (CI->use_empty()) {
+ CI->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// the number of virtual registers that must be created and coalesced. This is
+/// a clear win except on targets with multiple condition code registers
+/// (PowerPC), where it might lose; some adjustment may be wanted there.
+///
+/// Return true if any changes are made.
+static bool OptimizeCmpExpression(CmpInst *CI){
+
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCmp - Only insert a cmp in each block once.
+ DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ // Figure out which BB this cmp is used in.
+ BasicBlock *UserBB = User->getParent();
+
+ // If this user is in the same block as the cmp, don't change the cmp.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cmp into this block, use it.
+ CmpInst *&InsertedCmp = InsertedCmps[UserBB];
+
+ if (!InsertedCmp) {
+ BasicBlock::iterator InsertPt = UserBB->begin();
+ while (isa<PHINode>(InsertPt)) ++InsertPt;
+
+ InsertedCmp =
+ CmpInst::create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0),
+ CI->getOperand(1), "", InsertPt);
+ MadeChange = true;
+ }
+
+ // Replace a use of the cmp with a use of the new cmp.
+ TheUse = InsertedCmp;
+ }
+
+ // If we removed all uses, nuke the cmp.
if (CI->use_empty())
CI->eraseFromParent();
cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
ConstantOffset += SL->getElementOffset(Idx);
} else {
- uint64_t TypeSize = TD->getTypeSize(GTI.getIndexedType());
+ uint64_t TypeSize = TD->getABITypeSize(GTI.getIndexedType());
if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
ConstantOffset += CI->getSExtValue()*TypeSize;
} else if (TypeSize) { // Scales of zero don't do anything.
return true;
}
+/// OptimizeInlineAsmInst - If there are any memory operands, use
+/// OptimizeLoadStoreInt to sink their address computing into the block when
+/// possible / profitable.
+bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
+ DenseMap<Value*,Value*> &SunkAddrs) {
+ bool MadeChange = false;
+ InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ std::vector<InlineAsm::ConstraintInfo>
+ ConstraintInfos = IA->ParseConstraints();
+
+ /// ConstraintOperands - Information about all of the constraints.
+ std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.
+ push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
+ TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ if (OpInfo.isIndirect)
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ OpInfo.ComputeConstraintToUse(*TLI);
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ Value *OpVal = OpInfo.CallOperandVal;
+ MadeChange |= OptimizeLoadStoreInst(I, OpVal, OpVal->getType(),
+ SunkAddrs);
+ }
+ }
+
+ return MadeChange;
+}
+
+bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+ BasicBlock *DefBB = I->getParent();
+
+ // If both result of the {s|z}xt and its source are live out, rewrite all
+ // other uses of the source with result of extension.
+ Value *Src = I->getOperand(0);
+ if (Src->hasOneUse())
+ return false;
+
+ // Only do this xform if truncating is free.
+ if (!TLI->isTruncateFree(I->getType(), Src->getType()))
+ return false;
+
+ // Only safe to perform the optimization if the source is also defined in
+ // this block.
+ if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
+ return false;
+
+ bool DefIsLiveOut = false;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+ DefIsLiveOut = true;
+ break;
+ }
+ if (!DefIsLiveOut)
+ return false;
+
+ // Make sure non of the uses are PHI nodes.
+ for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+ // Be conservative. We don't want this xform to end up introducing
+ // reloads just before load / store instructions.
+ if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
+ return false;
+ }
+
+ // InsertedTruncs - Only insert one trunc in each block once.
+ DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+ UI != E; ++UI) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+
+ // Both src and def are live in this block. Rewrite the use.
+ Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
+
+ if (!InsertedTrunc) {
+ BasicBlock::iterator InsertPt = UserBB->begin();
+ while (isa<PHINode>(InsertPt)) ++InsertPt;
+
+ InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+ }
+
+ // Replace a use of the {s|z}ext source with a use of the result.
+ TheUse = InsertedTrunc;
+
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
if (isa<Constant>(CI->getOperand(0)))
continue;
- if (TLI)
- MadeChange |= OptimizeNoopCopyExpression(CI, *TLI);
+ bool Change = false;
+ if (TLI) {
+ Change = OptimizeNoopCopyExpression(CI, *TLI);
+ MadeChange |= Change;
+ }
+
+ if (OptExtUses && !Change && (isa<ZExtInst>(I) || isa<SExtInst>(I)))
+ MadeChange |= OptimizeExtUses(I);
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
+ MadeChange |= OptimizeCmpExpression(CI);
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (TLI)
MadeChange |= OptimizeLoadStoreInst(I, I->getOperand(0), LI->getType(),
TLI->getTargetMachine().getTargetAsmInfo()) {
if (TAI->ExpandInlineAsm(CI))
BBI = BB.begin();
+ else
+ // Sink address computing for memory operands into the block.
+ MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
}
}
}