//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopPass.h"
unsigned TripCount;
ScalarEvolution &SE;
const TargetTransformInfo &TTI;
- unsigned NumberOfOptimizedInstructions;
DenseMap<Value *, Constant *> SimplifiedValues;
DenseMap<LoadInst *, Value *> LoadBaseAddresses;
- SmallPtrSet<Instruction *, 32> CountedInsns;
+ SmallPtrSet<Instruction *, 32> CountedInstructions;
+
+ /// \brief Count the number of optimized instructions.
+ unsigned NumberOfOptimizedInstructions;
// Provide base case for our instruction visit.
bool visitInstruction(Instruction &I) { return false; };
else
SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS);
- if (SimpleV && CountedInsns.insert(&I).second)
+ if (SimpleV && CountedInstructions.insert(&I).second)
NumberOfOptimizedInstructions += TTI.getUserCost(&I);
if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
// Given a list of loads that could be constant-folded (LoadBaseAddresses),
// estimate number of optimized instructions after substituting the concrete
// values for the given Iteration.
- // Fill in SimplifiedInsns map for future use in DCE-estimation.
- unsigned estimateNumberOfSimplifiedInsns(unsigned Iteration) {
+ // Fill in SimplifiedValues map for future use in DCE-estimation.
+ unsigned estimateNumberOfSimplifiedInstructions(unsigned Iteration) {
SmallVector<Instruction *, 8> Worklist;
SimplifiedValues.clear();
- CountedInsns.clear();
-
+ CountedInstructions.clear();
NumberOfOptimizedInstructions = 0;
+
// We start by adding all loads to the worklist.
- for (auto LoadDescr : LoadBaseAddresses) {
+ for (auto &LoadDescr : LoadBaseAddresses) {
LoadInst *LI = LoadDescr.first;
SimplifiedValues[LI] = computeLoadValue(LI, Iteration);
- if (CountedInsns.insert(LI).second)
+ if (CountedInstructions.insert(LI).second)
NumberOfOptimizedInstructions += TTI.getUserCost(LI);
- for (auto U : LI->users()) {
+ for (User *U : LI->users()) {
Instruction *UI = dyn_cast<Instruction>(U);
if (!UI)
continue;
Instruction *I = Worklist.pop_back_val();
if (!visit(I))
continue;
- for (auto U : I->users()) {
+ for (User *U : I->users()) {
Instruction *UI = dyn_cast<Instruction>(U);
if (!UI)
continue;
// Given a list of potentially simplifed instructions, estimate number of
// instructions that would become dead if we do perform the simplification.
- unsigned estimateNumberOfDeadInsns() {
+ unsigned estimateNumberOfDeadInstructions() {
NumberOfOptimizedInstructions = 0;
- SmallVector<Instruction *, 8> Worklist;
- DenseMap<Instruction *, bool> DeadInstructions;
+
+ // We keep a set vector for the worklist so that we don't wast space in the
+ // worklist queuing up the same instruction repeatedly. This can happen due
+ // to multiple operands being the same instruction or due to the same
+ // instruction being an operand of lots of things that end up dead or
+ // simplified.
+ SmallSetVector<Instruction *, 8> Worklist;
+
+ // The dead instructions are held in a separate set. This is used to
+ // prevent us from re-examining instructions and make sure we only count
+ // the benifit once. The worklist's internal set handles insertion
+ // deduplication.
+ SmallPtrSet<Instruction *, 16> DeadInstructions;
+
+ // Lambda to enque operands onto the worklist.
+ auto EnqueueOperands = [&](Instruction &I) {
+ for (auto *Op : I.operand_values())
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ Worklist.insert(OpI);
+ };
+
// Start by initializing worklist with simplified instructions.
- for (auto Folded : SimplifiedValues) {
- if (auto FoldedInsn = dyn_cast<Instruction>(Folded.first)) {
- Worklist.push_back(FoldedInsn);
- DeadInstructions[FoldedInsn] = true;
+ for (auto &FoldedKeyValue : SimplifiedValues)
+ if (auto *FoldedInst = dyn_cast<Instruction>(FoldedKeyValue.first)) {
+ DeadInstructions.insert(FoldedInst);
+
+ // Add each instruction operand of this dead instruction to the
+ // worklist.
+ EnqueueOperands(*FoldedInst);
}
- }
+
// If a definition of an insn is only used by simplified or dead
// instructions, it's also dead. Check defs of all instructions from the
// worklist.
while (!Worklist.empty()) {
- Instruction *FoldedInsn = Worklist.pop_back_val();
- for (Value *Op : FoldedInsn->operands()) {
- if (auto I = dyn_cast<Instruction>(Op)) {
- if (!L->contains(I))
- continue;
- if (SimplifiedValues[I])
- continue; // This insn has been counted already.
- if (I->getNumUses() == 0)
- continue;
- bool AllUsersFolded = true;
- for (auto U : I->users()) {
- Instruction *UI = dyn_cast<Instruction>(U);
- if (!SimplifiedValues[UI] && !DeadInstructions[UI]) {
- AllUsersFolded = false;
- break;
- }
- }
- if (AllUsersFolded) {
- NumberOfOptimizedInstructions += TTI.getUserCost(I);
- Worklist.push_back(I);
- DeadInstructions[I] = true;
- }
+ Instruction *I = Worklist.pop_back_val();
+ if (!L->contains(I))
+ continue;
+ if (DeadInstructions.count(I))
+ continue;
+ if (I->getNumUses() == 0)
+ continue;
+ bool AllUsersFolded = true;
+ for (User *U : I->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!SimplifiedValues[UI] && !DeadInstructions.count(UI)) {
+ AllUsersFolded = false;
+ break;
}
}
+ if (AllUsersFolded) {
+ NumberOfOptimizedInstructions += TTI.getUserCost(I);
+ DeadInstructions.insert(I);
+ EnqueueOperands(*I);
+ }
}
return NumberOfOptimizedInstructions;
}
approximateNumberOfOptimizedInstructions(const Loop *L, ScalarEvolution &SE,
unsigned TripCount,
const TargetTransformInfo &TTI) {
- if (!TripCount)
+ if (!TripCount || !UnrollMaxIterationsCountToAnalyze)
return 0;
UnrollAnalyzer UA(L, TripCount, SE, TTI);
std::min<unsigned>(UnrollMaxIterationsCountToAnalyze, TripCount);
unsigned NumberOfOptimizedInstructions = 0;
for (unsigned i = 0; i < IterationsNumberForEstimate; ++i) {
- NumberOfOptimizedInstructions += UA.estimateNumberOfSimplifiedInsns(i);
- NumberOfOptimizedInstructions += UA.estimateNumberOfDeadInsns();
+ NumberOfOptimizedInstructions +=
+ UA.estimateNumberOfSimplifiedInstructions(i);
+ NumberOfOptimizedInstructions += UA.estimateNumberOfDeadInstructions();
}
NumberOfOptimizedInstructions *= TripCount / IterationsNumberForEstimate;