friend class InstVisitor<CallAnalyzer, bool>;
// DataLayout if available, or null.
- const DataLayout *const TD;
+ const DataLayout *const DL;
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
bool ExposesReturnsTwice;
bool HasDynamicAlloca;
bool ContainsNoDuplicateCall;
+ bool HasReturn;
+ bool HasIndirectBr;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
bool visitExtractValue(ExtractValueInst &I);
bool visitInsertValue(InsertValueInst &I);
bool visitCallSite(CallSite CS);
+ bool visitReturnInst(ReturnInst &RI);
+ bool visitBranchInst(BranchInst &BI);
+ bool visitSwitchInst(SwitchInst &SI);
+ bool visitIndirectBrInst(IndirectBrInst &IBI);
+ bool visitResumeInst(ResumeInst &RI);
+ bool visitUnreachableInst(UnreachableInst &I);
public:
- CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
+ CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
Function &Callee, int Threshold)
- : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
+ : DL(DL), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
- ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
- NumVectorInstructions(0), FiftyPercentVectorBonus(0),
- TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
- NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
- NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
- SROACostSavings(0), SROACostSavingsLost(0) {}
+ ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+ AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0),
+ SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
/// Returns false if unable to compute the offset for any reason. Respects any
/// simplified values known during the analysis of this callsite.
bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
- if (!TD)
+ if (!DL)
return false;
- unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ unsigned IntPtrWidth = DL->getPointerSizeInBits();
assert(IntPtrWidth == Offset.getBitWidth());
for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
unsigned ElementIdx = OpC->getZExtValue();
- const StructLayout *SL = TD->getStructLayout(STy);
+ const StructLayout *SL = DL->getStructLayout(STy);
Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
continue;
}
- APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
+ APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType()));
Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
}
return true;
// Accumulate the allocated size.
if (I.isStaticAlloca()) {
Type *Ty = I.getAllocatedType();
- AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) :
+ AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) :
Ty->getPrimitiveSizeInBits());
}
// Try to fold GEPs of constant-offset call site argument pointers. This
// requires target data and inbounds GEPs.
- if (TD && I.isInBounds()) {
+ if (DL && I.isInBounds()) {
// Check if we have a base + offset for the pointer.
Value *Ptr = I.getPointerOperand();
std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
// Track base/offset pairs when converted to a plain integer provided the
// integer is large enough to represent the pointer.
unsigned IntegerSize = I.getType()->getScalarSizeInBits();
- if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
+ if (DL && IntegerSize >= DL->getPointerSizeInBits()) {
std::pair<Value *, APInt> BaseAndOffset
= ConstantOffsetPtrs.lookup(I.getOperand(0));
if (BaseAndOffset.first)
// modifications provided the integer is not too large.
Value *Op = I.getOperand(0);
unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
- if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
+ if (DL && IntegerSize <= DL->getPointerSizeInBits()) {
std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
if (BaseAndOffset.first)
ConstantOffsetPtrs[&I] = BaseAndOffset;
COp = SimplifiedValues.lookup(Operand);
if (COp)
if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
- COp, TD)) {
+ COp, DL)) {
SimplifiedValues[&I] = C;
return true;
}
if (!isa<Constant>(RHS))
if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
RHS = SimpleRHS;
- Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD);
+ Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
SimplifiedValues[&I] = C;
return true;
}
bool CallAnalyzer::visitCallSite(CallSite CS) {
- if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
+ if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold);
+ CallAnalyzer CA(DL, TTI, *F, InlineConstants::IndirectCallThreshold);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// bonus we want to apply, but don't go below zero.
return Base::visitCallSite(CS);
}
+bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
+ // At least one return instruction will be free after inlining.
+ bool Free = !HasReturn;
+ HasReturn = true;
+ return Free;
+}
+
+bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
+ // We model unconditional branches as essentially free -- they really
+ // shouldn't exist at all, but handling them makes the behavior of the
+ // inliner more regular and predictable. Interestingly, conditional branches
+ // which will fold away are also free.
+ return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(BI.getCondition()));
+}
+
+bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
+ // We model unconditional switches as free, see the comments on handling
+ // branches.
+ return isa<ConstantInt>(SI.getCondition()) ||
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(SI.getCondition()));
+}
+
+bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this
+ // indirect jump would jump from the inlined copy of the function into the
+ // original function which is extremely undefined behavior.
+ // FIXME: This logic isn't really right; we can safely inline functions with
+ // indirectbr's as long as no other function or global references the
+ // blockaddress of a block within the current function. And as a QOI issue,
+ // if someone is using a blockaddress without an indirectbr, and that
+ // reference somehow ends up in another function or global, we probably don't
+ // want to inline this function.
+ HasIndirectBr = true;
+ return false;
+}
+
+bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
+ // FIXME: It's not clear that a single instruction is an accurate model for
+ // the inline cost of a resume instruction.
+ return false;
+}
+
+bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
+ // FIXME: It might be reasonably to discount the cost of instructions leading
+ // to unreachable as they have the lowest possible impact on both runtime and
+ // code size.
+ return true; // No actual code is needed for unreachable.
+}
+
bool CallAnalyzer::visitInstruction(Instruction &I) {
// Some instructions are free. All of the free intrinsics can also be
// handled by SROA, etc.
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
- for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
- I != E; ++I) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // FIXME: Currently, the number of instructions in a function regardless of
+ // our ability to simplify them during inline to constants or dead code,
+ // are actually used by the vector bonus heuristic. As long as that's true,
+ // we have to special case debug intrinsics here to prevent differences in
+ // inlining due to debug symbols. Eventually, the number of unsimplified
+ // instructions shouldn't factor into the cost computation, but until then,
+ // hack around it here.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
Cost += InlineConstants::InstrCost;
// If the visit this instruction detected an uninlinable pattern, abort.
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+ HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
/// returns 0 if V is not a pointer, and returns the constant '0' if there are
/// no constant offsets applied.
ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
- if (!TD || !V->getType()->isPointerTy())
+ if (!DL || !V->getType()->isPointerTy())
return 0;
- unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ unsigned IntPtrWidth = DL->getPointerSizeInBits();
APInt Offset = APInt::getNullValue(IntPtrWidth);
// Even though we don't look through PHI nodes, we could be called on an
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
} while (Visited.insert(V));
- Type *IntPtrTy = TD->getIntPtrType(V->getContext());
+ Type *IntPtrTy = DL->getIntPtrType(V->getContext());
return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
}
// Give out bonuses per argument, as the instructions setting them up will
// be gone after inlining.
for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
- if (TD && CS.isByValArgument(I)) {
+ if (DL && CS.isByValArgument(I)) {
// We approximate the number of loads and stores needed by dividing the
// size of the byval type by the target's pointer size.
PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
- unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
- unsigned PointerSize = TD->getPointerSizeInBits();
+ unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType());
+ unsigned PointerSize = DL->getPointerSizeInBits();
// Ceiling division.
unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
}
}
- // Track whether we've seen a return instruction. The first return
- // instruction is free, as at least one will usually disappear in inlining.
- bool HasReturn = false;
-
// Populate our simplified values by mapping from function arguments to call
// arguments with known important simplifications.
CallSite::arg_iterator CAI = CS.arg_begin();
if (BB->empty())
continue;
- // Handle the terminator cost here where we can track returns and other
- // function-wide constructs.
- TerminatorInst *TI = BB->getTerminator();
-
- // We never want to inline functions that contain an indirectbr. This is
- // incorrect because all the blockaddress's (in static global initializers
- // for example) would be referring to the original function, and this
- // indirect jump would jump from the inlined copy of the function into the
- // original function which is extremely undefined behavior.
- // FIXME: This logic isn't really right; we can safely inline functions
- // with indirectbr's as long as no other function or global references the
- // blockaddress of a block within the current function. And as a QOI issue,
- // if someone is using a blockaddress without an indirectbr, and that
- // reference somehow ends up in another function or global, we probably
- // don't want to inline this function.
- if (isa<IndirectBrInst>(TI))
- return false;
-
- if (!HasReturn && isa<ReturnInst>(TI))
- HasReturn = true;
- else
- Cost += InlineConstants::InstrCost;
-
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
if (!analyzeBlock(BB)) {
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+ HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
break;
}
+ TerminatorInst *TI = BB->getTerminator();
+
// Add in the live successors by first checking whether we have terminator
// that may be simplified based on the values simplified by this call.
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
}
}
- // If this is a noduplicate call, we can still inline as long as
+ // If this is a noduplicate call, we can still inline as long as
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
DEBUG_PRINT_STAT(SROACostSavings);
DEBUG_PRINT_STAT(SROACostSavingsLost);
DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
+ DEBUG_PRINT_STAT(Cost);
+ DEBUG_PRINT_STAT(Threshold);
+ DEBUG_PRINT_STAT(VectorBonus);
#undef DEBUG_PRINT_STAT
}
#endif
char InlineCostAnalysis::ID = 0;
-InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {}
+InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), DL(0) {}
InlineCostAnalysis::~InlineCostAnalysis() {}
}
bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TTI = &getAnalysis<TargetTransformInfo>();
return false;
}
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(TD, *TTI, *Callee, Threshold);
+ CallAnalyzer CA(DL, *TTI, *Callee, Threshold);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());