#define LLVM_ANALYSIS_CODEMETRICS_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/CallSite.h"
namespace llvm {
+class AssumptionTracker;
class BasicBlock;
+class Loop;
class Function;
class Instruction;
class DataLayout;
NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {}
/// \brief Add information about a block to the current state.
- void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI);
+ void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI,
+ SmallPtrSetImpl<const Value*> &EphValues);
+
+ /// \brief Collect a loop's ephemeral values (those used only by an assume
+ /// or similar intrinsics in the loop).
+ static void collectEphemeralValues(const Loop *L, AssumptionTracker *AT,
+ SmallPtrSetImpl<const Value*> &EphValues);
+
+ /// \brief Collect a functions's ephemeral values (those used only by an
+ /// assume or similar intrinsics in the function).
+ static void collectEphemeralValues(const Function *L, AssumptionTracker *AT,
+ SmallPtrSetImpl<const Value*> &EphValues);
};
}
#include <climits>
namespace llvm {
+class AssumptionTracker;
class CallSite;
class DataLayout;
class Function;
/// \brief Cost analyzer used by inliner.
class InlineCostAnalysis : public CallGraphSCCPass {
const TargetTransformInfo *TTI;
+ AssumptionTracker *AT;
public:
static char ID;
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "code-metrics"
using namespace llvm;
+static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
+ SmallPtrSetImpl<const Value*> &EphValues) {
+ SmallPtrSet<const Value *, 32> Visited;
+
+ // Make sure that all of the items in WorkSet are in our EphValues set.
+ EphValues.insert(WorkSet.begin(), WorkSet.end());
+
+ // Note: We don't speculate PHIs here, so we'll miss instruction chains kept
+ // alive only by ephemeral values.
+
+ while (!WorkSet.empty()) {
+ const Value *V = WorkSet.pop_back_val();
+ if (!Visited.insert(V))
+ continue;
+
+ // If all uses of this value are ephemeral, then so is this value.
+ bool FoundNEUse = false;
+ for (const User *I : V->users())
+ if (!EphValues.count(I)) {
+ FoundNEUse = true;
+ break;
+ }
+
+ if (FoundNEUse)
+ continue;
+
+ EphValues.insert(V);
+ DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
+
+ if (const User *U = dyn_cast<User>(V))
+ for (const Value *J : U->operands()) {
+ if (isSafeToSpeculativelyExecute(J))
+ WorkSet.push_back(J);
+ }
+ }
+}
+
+// Find all ephemeral values.
+void CodeMetrics::collectEphemeralValues(const Loop *L, AssumptionTracker *AT,
+ SmallPtrSetImpl<const Value*> &EphValues) {
+ SmallVector<const Value *, 16> WorkSet;
+
+ for (auto &I : AT->assumptions(L->getHeader()->getParent())) {
+ // Filter out call sites outside of the loop so we don't to a function's
+ // worth of work for each of its loops (and, in the common case, ephemeral
+ // values in the loop are likely due to @llvm.assume calls in the loop).
+ if (!L->contains(I->getParent()))
+ continue;
+
+ WorkSet.push_back(I);
+ }
+
+ completeEphemeralValues(WorkSet, EphValues);
+}
+
+void CodeMetrics::collectEphemeralValues(const Function *F, AssumptionTracker *AT,
+ SmallPtrSetImpl<const Value*> &EphValues) {
+ SmallVector<const Value *, 16> WorkSet;
+
+ for (auto &I : AT->assumptions(const_cast<Function*>(F)))
+ WorkSet.push_back(I);
+
+ completeEphemeralValues(WorkSet, EphValues);
+}
+
/// analyzeBasicBlock - Fill in the current structure with information gleaned
/// from the specified block.
void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ SmallPtrSetImpl<const Value*> &EphValues) {
++NumBlocks;
unsigned NumInstsBeforeThisBB = NumInsts;
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
II != E; ++II) {
+ // Skip ephemeral values.
+ if (EphValues.count(II))
+ continue;
+
// Special handling for calls.
if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
ImmutableCallSite CS(cast<Instruction>(II));
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
+ /// The cache of @llvm.assume intrinsics.
+ AssumptionTracker *AT;
+
// The called function.
Function &F;
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
// Custom analysis routines.
- bool analyzeBlock(BasicBlock *BB);
+ bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
public:
CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
- Function &Callee, int Threshold)
- : DL(DL), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
+ AssumptionTracker *AT, Function &Callee, int Threshold)
+ : DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(DL, TTI, *F, InlineConstants::IndirectCallThreshold);
+ CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// bonus we want to apply, but don't go below zero.
/// aborts early if the threshold has been exceeded or an impossible to inline
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
-bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
+bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
+ SmallPtrSetImpl<const Value *> &EphValues) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
// FIXME: Currently, the number of instructions in a function regardless of
// our ability to simplify them during inline to constants or dead code,
if (isa<DbgInfoIntrinsic>(I))
continue;
+ // Skip ephemeral values.
+ if (EphValues.count(I))
+ continue;
+
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
NumAllocaArgs = SROAArgValues.size();
+ // FIXME: If a caller has multiple calls to a callee, we end up recomputing
+ // the ephemeral values multiple times (and they're completely determined by
+ // the callee, so this is purely duplicate work).
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(&F, AT, EphValues);
+
// The worklist of live basic blocks in the callee *after* inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this
// particular call site in order to get more accurate cost estimates. This
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
- if (!analyzeBlock(BB)) {
+ if (!analyzeBlock(BB, EphValues)) {
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
HasIndirectBr)
return false;
INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetTransformInfo>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
TTI = &getAnalysis<TargetTransformInfo>();
+ AT = &getAnalysis<AssumptionTracker>();
return false;
}
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(Callee->getDataLayout(), *TTI, *Callee, Threshold);
+ CallAnalyzer CA(Callee->getDataLayout(), *TTI, AT, *Callee, Threshold);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
// LCSSA form makes instruction renaming easier.
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
unsigned MaxHeaderSize;
LoopInfo *LI;
const TargetTransformInfo *TTI;
+ AssumptionTracker *AT;
};
}
char LoopRotate::ID = 0;
INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
+ AT = &getAnalysis<AssumptionTracker>();
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
// Check size of original header and reject loop if it is very big or we can't
// duplicate blocks inside it.
{
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+
CodeMetrics Metrics;
- Metrics.analyzeBasicBlock(OrigHeader, *TTI);
+ Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
if (Metrics.notDuplicatable) {
DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
<< " instructions: "; L->dump());
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ AssumptionTracker *AT) {
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
- Metrics.analyzeBasicBlock(*I, TTI);
+ Metrics.analyzeBasicBlock(*I, TTI, EphValues);
NumCalls = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
unsigned NumInlineCandidates;
bool notDuplicatable;
unsigned LoopSize =
- ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI);
+ ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, AT);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
uint64_t UnrolledSize = (uint64_t)LoopSize * Count;
if (notDuplicatable) {
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
- bool countLoop(const Loop *L, const TargetTransformInfo &TTI);
+ bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
+ AssumptionTracker *AT);
// Clean all data related to given loop.
void forgetLoop(const Loop *L);
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
-bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
+bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
+ AssumptionTracker *AT) {
LoopPropsMapIt PropsIt;
bool Inserted;
// large numbers of branches which cause loop unswitching to go crazy.
// This is a very ad-hoc heuristic.
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+
// FIXME: This is overly conservative because it does not take into
// consideration code simplification opportunities and code that can
// be shared by the resultant unswitched loops.
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
- Metrics.analyzeBasicBlock(*I, TTI);
+ Metrics.analyzeBasicBlock(*I, TTI, EphValues);
Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
// Probably we reach the quota of branches for this loop. If so
// stop unswitching.
- if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>()))
+ if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>(),
+ AT))
return false;
// Loop over all of the basic blocks in the loop. If we find an interior
--- /dev/null
+; RUN: opt -S -Oz %s | FileCheck %s
+
+@a = global i32 4
+
+define i1 @inner() {
+ %a1 = load volatile i32* @a
+ %x1 = add i32 %a1, %a1
+ %c = icmp eq i32 %x1, 0
+
+ ; Here are enough instructions to prevent inlining, but because they are used
+ ; only by the @llvm.assume intrinsic, they're free (and, thus, inlining will
+ ; still happen).
+ %a2 = mul i32 %a1, %a1
+ %a3 = sub i32 %a1, 5
+ %a4 = udiv i32 %a3, -13
+ %a5 = mul i32 %a4, %a4
+ %a6 = add i32 %a5, %x1
+ %ca = icmp sgt i32 %a6, -7
+ tail call void @llvm.assume(i1 %ca)
+
+ ret i1 %c
+}
+
+; @inner() should be inlined for -Oz.
+; CHECK-NOT: call i1 @inner
+define i1 @outer() optsize {
+ %r = call i1 @inner()
+ ret i1 %r
+}
+
+declare void @llvm.assume(i1) nounwind
+
--- /dev/null
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s
+
+; Make sure this loop is completely unrolled...
+; CHECK-LABEL: @test1
+; CHECK: for.body:
+; CHECK-NOT: for.end:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+
+ ; This loop will be completely unrolled, even with these extra instructions,
+ ; but only because they're ephemeral (and, thus, free).
+ %1 = add nsw i32 %0, 2
+ %2 = add nsw i32 %1, 4
+ %3 = add nsw i32 %2, 4
+ %4 = add nsw i32 %3, 4
+ %5 = add nsw i32 %4, 4
+ %6 = add nsw i32 %5, 4
+ %7 = add nsw i32 %6, 4
+ %8 = add nsw i32 %7, 4
+ %9 = add nsw i32 %8, 4
+ %10 = add nsw i32 %9, 4
+ %ca = icmp sgt i32 %10, -7
+ call void @llvm.assume(i1 %ca)
+
+ %add = add nsw i32 %0, %sum.01
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 5
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 %add
+}
+
+declare void @llvm.assume(i1) nounwind
+