X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FLoopRerollPass.cpp;h=7f73baa6a8ffed24ae64f2a27a5db21bca6201b1;hp=144aa7e5c584ae4188514d733a2fe2a588971448;hb=7ba5769458c072a56fe2a9605e965d10229b54ce;hpb=7f2eff792a2e18758a25956abdac2440ee18dd7f diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 144aa7e5c58..7f73baa6a8f 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-reroll" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -22,6 +23,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -29,19 +31,26 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; +#define DEBUG_TYPE "loop-reroll" + STATISTIC(NumRerolledLoops, "Number of rerolled loops"); static cl::opt MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden, cl::desc("The maximum increment for loop rerolling")); +static cl::opt +NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), + cl::Hidden, + cl::desc("The maximum number of failures to tolerate" + " during fuzzy matching. (default: 400)")); + // This loop re-rolling transformation aims to transform loops like this: // // int foo(int a); @@ -118,6 +127,16 @@ MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden, // br %cmp, header, exit namespace { + enum IterationLimits { + /// The maximum number of iterations that we'll try and reroll. This + /// has to be less than 25 in order to fit into a SmallBitVector. + IL_MaxRerollIterations = 16, + /// The bitvector index used by loop induction variables and other + /// instructions that belong to all iterations. + IL_All, + IL_End + }; + class LoopReroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid @@ -125,30 +144,32 @@ namespace { initializeLoopRerollPass(*PassRegistry::getPassRegistry()); } - bool runOnLoop(Loop *L, LPPassManager &LPM); + bool runOnLoop(Loop *L, LPPassManager &LPM) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); } -protected: + protected: AliasAnalysis *AA; LoopInfo *LI; ScalarEvolution *SE; - DataLayout *DL; TargetLibraryInfo *TLI; DominatorTree *DT; typedef SmallVector SmallInstructionVector; typedef SmallSet SmallInstructionSet; - // A chain of isomorphic instructions, indentified by a single-use PHI, + // Map between induction variable and its increment + DenseMap IVToIncMap; + + // A chain of isomorphic instructions, identified by a single-use PHI // representing a reduction. Only the last value may be used outside the // loop. struct SimpleLoopReduction { @@ -190,12 +211,12 @@ protected: iterator begin() { assert(Valid && "Using invalid reduction"); - return llvm::next(Instructions.begin()); + return std::next(Instructions.begin()); } const_iterator begin() const { assert(Valid && "Using invalid reduction"); - return llvm::next(Instructions.begin()); + return std::next(Instructions.begin()); } iterator end() { return Instructions.end(); } @@ -214,9 +235,7 @@ protected: typedef SmallVector SmallReductionVector; // Add a new possible reduction. - void addSLR(SimpleLoopReduction &SLR) { - PossibleReds.push_back(SLR); - } + void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); } // Setup to track possible reductions corresponding to the provided // rerolling scale. Only reductions with a number of non-PHI instructions @@ -224,7 +243,8 @@ protected: // are filled in: // - A set of all possible instructions in eligible reductions. // - A set of all PHIs in eligible reductions - // - A set of all reduced values (last instructions) in eligible reductions. + // - A set of all reduced values (last instructions) in eligible + // reductions. void restrictToScale(uint64_t Scale, SmallInstructionSet &PossibleRedSet, SmallInstructionSet &PossibleRedPHISet, @@ -237,13 +257,12 @@ protected: if (PossibleReds[i].size() % Scale == 0) { PossibleRedLastSet.insert(PossibleReds[i].getReducedValue()); PossibleRedPHISet.insert(PossibleReds[i].getPHI()); - + PossibleRedSet.insert(PossibleReds[i].getPHI()); PossibleRedIdx[PossibleReds[i].getPHI()] = i; - for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(), - JE = PossibleReds[i].end(); J != JE; ++J) { - PossibleRedSet.insert(*J); - PossibleRedIdx[*J] = i; + for (Instruction *J : PossibleReds[i]) { + PossibleRedSet.insert(J); + PossibleRedIdx[J] = i; } } } @@ -284,22 +303,6 @@ protected: // The functions below can be called after we've finished processing all // instructions in the loop, and we know which reductions were selected. - // Is the provided instruction the PHI of a reduction selected for - // rerolling? - bool isSelectedPHI(Instruction *J) { - if (!isa(J)) - return false; - - for (DenseSet::iterator RI = Reds.begin(), RIE = Reds.end(); - RI != RIE; ++RI) { - int i = *RI; - if (cast(J) == PossibleReds[i].getPHI()) - return true; - } - - return false; - } - bool validateSelected(); void replaceSelected(); @@ -312,26 +315,117 @@ protected: DenseSet Reds; }; + // A DAGRootSet models an induction variable being used in a rerollable + // loop. For example, + // + // x[i*3+0] = y1 + // x[i*3+1] = y2 + // x[i*3+2] = y3 + // + // Base instruction -> i*3 + // +---+----+ + // / | \ + // ST[y1] +1 +2 <-- Roots + // | | + // ST[y2] ST[y3] + // + // There may be multiple DAGRoots, for example: + // + // x[i*2+0] = ... (1) + // x[i*2+1] = ... (1) + // x[i*2+4] = ... (2) + // x[i*2+5] = ... (2) + // x[(i+1234)*2+5678] = ... (3) + // x[(i+1234)*2+5679] = ... (3) + // + // The loop will be rerolled by adding a new loop induction variable, + // one for the Base instruction in each DAGRootSet. + // + struct DAGRootSet { + Instruction *BaseInst; + SmallInstructionVector Roots; + // The instructions between IV and BaseInst (but not including BaseInst). + SmallInstructionSet SubsumedInsts; + }; + + // The set of all DAG roots, and state tracking of all roots + // for a particular induction variable. + struct DAGRootTracker { + DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV, + ScalarEvolution *SE, AliasAnalysis *AA, + TargetLibraryInfo *TLI, + DenseMap &IncrMap) + : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV), + IVToIncMap(IncrMap) {} + + /// Stage 1: Find all the DAG roots for the induction variable. + bool findRoots(); + /// Stage 2: Validate if the found roots are valid. + bool validate(ReductionTracker &Reductions); + /// Stage 3: Assuming validate() returned true, perform the + /// replacement. + /// @param IterCount The maximum iteration count of L. + void replace(const SCEV *IterCount); + + protected: + typedef MapVector UsesTy; + + bool findRootsRecursive(Instruction *IVU, + SmallInstructionSet SubsumedInsts); + bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts); + bool collectPossibleRoots(Instruction *Base, + std::map &Roots); + + bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet); + void collectInLoopUserSet(const SmallInstructionVector &Roots, + const SmallInstructionSet &Exclude, + const SmallInstructionSet &Final, + DenseSet &Users); + void collectInLoopUserSet(Instruction *Root, + const SmallInstructionSet &Exclude, + const SmallInstructionSet &Final, + DenseSet &Users); + + UsesTy::iterator nextInstr(int Val, UsesTy &In, + const SmallInstructionSet &Exclude, + UsesTy::iterator *StartI=nullptr); + bool isBaseInst(Instruction *I); + bool isRootInst(Instruction *I); + bool instrDependsOn(Instruction *I, + UsesTy::iterator Start, + UsesTy::iterator End); + + LoopReroll *Parent; + + // Members of Parent, replicated here for brevity. + Loop *L; + ScalarEvolution *SE; + AliasAnalysis *AA; + TargetLibraryInfo *TLI; + + // The loop induction variable. + Instruction *IV; + // Loop step amount. + int64_t Inc; + // Loop reroll count; if Inc == 1, this records the scaling applied + // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ; + // If Inc is not 1, Scale = Inc. + uint64_t Scale; + // The roots themselves. + SmallVector RootSets; + // All increment instructions for IV. + SmallInstructionVector LoopIncs; + // Map of all instructions in the loop (in order) to the iterations + // they are used in (or specially, IL_All for instructions + // used in the loop increment mechanism). + UsesTy Uses; + // Map between induction variable and its increment + DenseMap &IVToIncMap; + }; + void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs); void collectPossibleReductions(Loop *L, ReductionTracker &Reductions); - void collectInLoopUserSet(Loop *L, - const SmallInstructionVector &Roots, - const SmallInstructionSet &Exclude, - const SmallInstructionSet &Final, - DenseSet &Users); - void collectInLoopUserSet(Loop *L, - Instruction * Root, - const SmallInstructionSet &Exclude, - const SmallInstructionSet &Final, - DenseSet &Users); - bool findScaleFromMul(Instruction *RealIV, uint64_t &Scale, - Instruction *&IV, - SmallInstructionVector &LoopIncs); - bool collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, Instruction *IV, - SmallVector &Roots, - SmallInstructionSet &AllRoots, - SmallInstructionVector &LoopIncs); bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions); }; @@ -339,11 +433,11 @@ protected: char LoopReroll::ID = 0; INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false) Pass *llvm::createLoopRerollPass() { @@ -354,13 +448,10 @@ Pass *llvm::createLoopRerollPass() { // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in // non-loop blocks to be outside the loop. static bool hasUsesOutsideLoop(Instruction *I, Loop *L) { - for (Value::use_iterator UI = I->use_begin(), - UIE = I->use_end(); UI != UIE; ++UI) { - Instruction *User = cast(*UI); - if (!L->contains(User)) + for (User *U : I->users()) { + if (!L->contains(cast(U))) return true; } - return false; } @@ -377,21 +468,20 @@ void LoopReroll::collectPossibleIVs(Loop *L, continue; if (const SCEVAddRecExpr *PHISCEV = - dyn_cast(SE->getSCEV(I))) { + dyn_cast(SE->getSCEV(&*I))) { if (PHISCEV->getLoop() != L) continue; if (!PHISCEV->isAffine()) continue; if (const SCEVConstant *IncSCEV = dyn_cast(PHISCEV->getStepRecurrence(*SE))) { - if (!IncSCEV->getValue()->getValue().isStrictlyPositive()) + const APInt &AInt = IncSCEV->getValue()->getValue().abs(); + if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc)) continue; - if (IncSCEV->getValue()->uge(MaxInc)) - continue; - - DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << - *PHISCEV << "\n"); - PossibleIVs.push_back(I); + IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue(); + DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV + << "\n"); + PossibleIVs.push_back(&*I); } } } @@ -407,9 +497,11 @@ void LoopReroll::SimpleLoopReduction::add(Loop *L) { // (including the PHI), except for the last value (which is used by the PHI // and also outside the loop). Instruction *C = Instructions.front(); + if (C->user_empty()) + return; do { - C = cast(*C->use_begin()); + C = cast(*C->user_begin()); if (C->hasOneUse()) { if (!C->isBinaryOp()) return; @@ -424,15 +516,14 @@ void LoopReroll::SimpleLoopReduction::add(Loop *L) { if (Instructions.size() < 2 || !C->isSameOperationAs(Instructions.back()) || - C->use_begin() == C->use_end()) + C->use_empty()) return; // C is now the (potential) last instruction in the reduction chain. - for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end(); - UI != UIE; ++UI) { + for (User *U : C->users()) { // The only in-loop user can be the initial PHI. - if (L->contains(cast(*UI))) - if (cast(*UI ) != Instructions.front()) + if (L->contains(cast(U))) + if (cast(U) != Instructions.front()) return; } @@ -451,7 +542,7 @@ void LoopReroll::collectPossibleReductions(Loop *L, if (!I->getType()->isSingleValueType()) continue; - SimpleLoopReduction SLR(I, L); + SimpleLoopReduction SLR(&*I, L); if (!SLR.valid()) continue; @@ -473,7 +564,7 @@ void LoopReroll::collectPossibleReductions(Loop *L, // if they are users, but their users are not added. This is used, for // example, to prevent a reduction update from forcing all later reduction // updates into the use set. -void LoopReroll::collectInLoopUserSet(Loop *L, +void LoopReroll::DAGRootTracker::collectInLoopUserSet( Instruction *Root, const SmallInstructionSet &Exclude, const SmallInstructionSet &Final, DenseSet &Users) { @@ -484,15 +575,14 @@ void LoopReroll::collectInLoopUserSet(Loop *L, continue; if (!Final.count(I)) - for (Value::use_iterator UI = I->use_begin(), - UIE = I->use_end(); UI != UIE; ++UI) { - Instruction *User = cast(*UI); + for (Use &U : I->uses()) { + Instruction *User = cast(U.getUser()); if (PHINode *PN = dyn_cast(User)) { // Ignore "wrap-around" uses to PHIs of this loop's header. - if (PN->getIncomingBlock(UI) == L->getHeader()) + if (PN->getIncomingBlock(U) == L->getHeader()) continue; } - + if (L->contains(User) && !Exclude.count(User)) { Queue.push_back(User); } @@ -511,14 +601,14 @@ void LoopReroll::collectInLoopUserSet(Loop *L, // Collect all of the users of all of the provided root instructions (combined // into a single set). -void LoopReroll::collectInLoopUserSet(Loop *L, +void LoopReroll::DAGRootTracker::collectInLoopUserSet( const SmallInstructionVector &Roots, const SmallInstructionSet &Exclude, const SmallInstructionSet &Final, DenseSet &Users) { for (SmallInstructionVector::const_iterator I = Roots.begin(), IE = Roots.end(); I != IE; ++I) - collectInLoopUserSet(L, *I, Exclude, Final, Users); + collectInLoopUserSet(*I, Exclude, Final, Users); } static bool isSimpleLoadStore(Instruction *I) { @@ -531,131 +621,709 @@ static bool isSimpleLoadStore(Instruction *I) { return false; } -// Recognize loops that are setup like this: -// -// %iv = phi [ (preheader, ...), (body, %iv.next) ] -// %scaled.iv = mul %iv, scale -// f(%scaled.iv) -// %scaled.iv.1 = add %scaled.iv, 1 -// f(%scaled.iv.1) -// %scaled.iv.2 = add %scaled.iv, 2 -// f(%scaled.iv.2) -// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1 -// f(%scaled.iv.scale_m_1) -// ... -// %iv.next = add %iv, 1 -// %cmp = icmp(%iv, ...) -// br %cmp, header, exit -// -// and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs. -bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale, - Instruction *&IV, - SmallInstructionVector &LoopIncs) { - // This is a special case: here we're looking for all uses (except for - // the increment) to be multiplied by a common factor. The increment must - // be by one. This is to capture loops like: - // for (int i = 0; i < 500; ++i) { - // foo(3*i); foo(3*i+1); foo(3*i+2); - // } - if (RealIV->getNumUses() != 2) +/// Return true if IVU is a "simple" arithmetic operation. +/// This is used for narrowing the search space for DAGRoots; only arithmetic +/// and GEPs can be part of a DAGRoot. +static bool isSimpleArithmeticOp(User *IVU) { + if (Instruction *I = dyn_cast(IVU)) { + switch (I->getOpcode()) { + default: return false; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::GetElementPtr: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + return true; + } + } + return false; +} + +static bool isLoopIncrement(User *U, Instruction *IV) { + BinaryOperator *BO = dyn_cast(U); + if (!BO || BO->getOpcode() != Instruction::Add) return false; - const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(RealIV)); - Instruction *User1 = cast(*RealIV->use_begin()), - *User2 = cast(*llvm::next(RealIV->use_begin())); - if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType())) + + for (auto *UU : BO->users()) { + PHINode *PN = dyn_cast(UU); + if (PN && PN == IV) + return true; + } + return false; +} + +bool LoopReroll::DAGRootTracker:: +collectPossibleRoots(Instruction *Base, std::map &Roots) { + SmallInstructionVector BaseUsers; + + for (auto *I : Base->users()) { + ConstantInt *CI = nullptr; + + if (isLoopIncrement(I, IV)) { + LoopIncs.push_back(cast(I)); + continue; + } + + // The root nodes must be either GEPs, ORs or ADDs. + if (auto *BO = dyn_cast(I)) { + if (BO->getOpcode() == Instruction::Add || + BO->getOpcode() == Instruction::Or) + CI = dyn_cast(BO->getOperand(1)); + } else if (auto *GEP = dyn_cast(I)) { + Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1); + CI = dyn_cast(LastOperand); + } + + if (!CI) { + if (Instruction *II = dyn_cast(I)) { + BaseUsers.push_back(II); + continue; + } else { + DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I << "\n"); + return false; + } + } + + int64_t V = std::abs(CI->getValue().getSExtValue()); + if (Roots.find(V) != Roots.end()) + // No duplicates, please. + return false; + + Roots[V] = cast(I); + } + + if (Roots.empty()) return false; - const SCEVAddRecExpr *User1SCEV = - dyn_cast(SE->getSCEV(User1)), - *User2SCEV = - dyn_cast(SE->getSCEV(User2)); - if (!User1SCEV || !User1SCEV->isAffine() || - !User2SCEV || !User2SCEV->isAffine()) + + // If we found non-loop-inc, non-root users of Base, assume they are + // for the zeroth root index. This is because "add %a, 0" gets optimized + // away. + if (BaseUsers.size()) { + if (Roots.find(0) != Roots.end()) { + DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n"); + return false; + } + Roots[0] = Base; + } + + // Calculate the number of users of the base, or lowest indexed, iteration. + unsigned NumBaseUses = BaseUsers.size(); + if (NumBaseUses == 0) + NumBaseUses = Roots.begin()->second->getNumUses(); + + // Check that every node has the same number of users. + for (auto &KV : Roots) { + if (KV.first == 0) + continue; + if (KV.second->getNumUses() != NumBaseUses) { + DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: " + << "#Base=" << NumBaseUses << ", #Root=" << + KV.second->getNumUses() << "\n"); + return false; + } + } + + return true; +} + +bool LoopReroll::DAGRootTracker:: +findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) { + // Does the user look like it could be part of a root set? + // All its users must be simple arithmetic ops. + if (I->getNumUses() > IL_MaxRerollIterations) return false; - // We assume below that User1 is the scale multiply and User2 is the - // increment. If this can't be true, then swap them. - if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) { - std::swap(User1, User2); - std::swap(User1SCEV, User2SCEV); + if ((I->getOpcode() == Instruction::Mul || + I->getOpcode() == Instruction::PHI) && + I != IV && + findRootsBase(I, SubsumedInsts)) + return true; + + SubsumedInsts.insert(I); + + for (User *V : I->users()) { + Instruction *I = dyn_cast(V); + if (std::find(LoopIncs.begin(), LoopIncs.end(), I) != LoopIncs.end()) + continue; + + if (!I || !isSimpleArithmeticOp(I) || + !findRootsRecursive(I, SubsumedInsts)) + return false; } + return true; +} + +bool LoopReroll::DAGRootTracker:: +findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) { - if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE)) + // The base instruction needs to be a multiply so + // that we can erase it. + if (IVU->getOpcode() != Instruction::Mul && + IVU->getOpcode() != Instruction::PHI) return false; - assert(User2SCEV->getStepRecurrence(*SE)->isOne() && - "Invalid non-unit step for multiplicative scaling"); - LoopIncs.push_back(User2); - - if (const SCEVConstant *MulScale = - dyn_cast(User1SCEV->getStepRecurrence(*SE))) { - // Make sure that both the start and step have the same multiplier. - if (RealIVSCEV->getStart()->getType() != MulScale->getType()) + + std::map V; + if (!collectPossibleRoots(IVU, V)) + return false; + + // If we didn't get a root for index zero, then IVU must be + // subsumed. + if (V.find(0) == V.end()) + SubsumedInsts.insert(IVU); + + // Partition the vector into monotonically increasing indexes. + DAGRootSet DRS; + DRS.BaseInst = nullptr; + + for (auto &KV : V) { + if (!DRS.BaseInst) { + DRS.BaseInst = KV.second; + DRS.SubsumedInsts = SubsumedInsts; + } else if (DRS.Roots.empty()) { + DRS.Roots.push_back(KV.second); + } else if (V.find(KV.first - 1) != V.end()) { + DRS.Roots.push_back(KV.second); + } else { + // Linear sequence terminated. + RootSets.push_back(DRS); + DRS.BaseInst = KV.second; + DRS.SubsumedInsts = SubsumedInsts; + DRS.Roots.clear(); + } + } + RootSets.push_back(DRS); + + return true; +} + +bool LoopReroll::DAGRootTracker::findRoots() { + Inc = IVToIncMap[IV]; + + assert(RootSets.empty() && "Unclean state!"); + if (std::abs(Inc) == 1) { + for (auto *IVU : IV->users()) { + if (isLoopIncrement(IVU, IV)) + LoopIncs.push_back(cast(IVU)); + } + if (!findRootsRecursive(IV, SmallInstructionSet())) return false; - if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) != - User1SCEV->getStart()) + LoopIncs.push_back(IV); + } else { + if (!findRootsBase(IV, SmallInstructionSet())) return false; + } - ConstantInt *MulScaleCI = MulScale->getValue(); - if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc)) + // Ensure all sets have the same size. + if (RootSets.empty()) { + DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n"); + return false; + } + for (auto &V : RootSets) { + if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) { + DEBUG(dbgs() + << "LRR: Aborting because not all root sets have the same size\n"); return false; - Scale = MulScaleCI->getZExtValue(); - IV = User1; - } else + } + } + + // And ensure all loop iterations are consecutive. We rely on std::map + // providing ordered traversal. + for (auto &V : RootSets) { + const auto *ADR = dyn_cast(SE->getSCEV(V.BaseInst)); + if (!ADR) + return false; + + // Consider a DAGRootSet with N-1 roots (so N different values including + // BaseInst). + // Define d = Roots[0] - BaseInst, which should be the same as + // Roots[I] - Roots[I-1] for all I in [1..N). + // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the + // loop iteration J. + // + // Now, For the loop iterations to be consecutive: + // D = d * N + + unsigned N = V.Roots.size() + 1; + const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(V.Roots[0]), ADR); + const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N); + if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) { + DEBUG(dbgs() << "LRR: Aborting because iterations are not consecutive\n"); + return false; + } + } + Scale = RootSets[0].Roots.size() + 1; + + if (Scale > IL_MaxRerollIterations) { + DEBUG(dbgs() << "LRR: Aborting - too many iterations found. " + << "#Found=" << Scale << ", #Max=" << IL_MaxRerollIterations + << "\n"); return false; + } + + DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale << "\n"); - DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n"); return true; } -// Collect all root increments with respect to the provided induction variable -// (normally the PHI, but sometimes a multiply). A root increment is an -// instruction, normally an add, with a positive constant less than Scale. In a -// rerollable loop, each of these increments is the root of an instruction -// graph isomorphic to the others. Also, we collect the final induction -// increment (the increment equal to the Scale), and its users in LoopIncs. -bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, - Instruction *IV, - SmallVector &Roots, - SmallInstructionSet &AllRoots, - SmallInstructionVector &LoopIncs) { - for (Value::use_iterator UI = IV->use_begin(), - UIE = IV->use_end(); UI != UIE; ++UI) { - Instruction *User = cast(*UI); - if (!SE->isSCEVable(User->getType())) - continue; - if (User->getType() != IV->getType()) - continue; - if (!L->contains(User)) - continue; - if (hasUsesOutsideLoop(User, L)) - continue; +bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) { + // Populate the MapVector with all instructions in the block, in order first, + // so we can iterate over the contents later in perfect order. + for (auto &I : *L->getHeader()) { + Uses[&I].resize(IL_End); + } - if (const SCEVConstant *Diff = dyn_cast(SE->getMinusSCEV( - SE->getSCEV(User), SE->getSCEV(IV)))) { - uint64_t Idx = Diff->getValue()->getValue().getZExtValue(); - if (Idx > 0 && Idx < Scale) { - Roots[Idx-1].push_back(User); - AllRoots.insert(User); - } else if (Idx == Scale && Inc > 1) { - LoopIncs.push_back(User); + SmallInstructionSet Exclude; + for (auto &DRS : RootSets) { + Exclude.insert(DRS.Roots.begin(), DRS.Roots.end()); + Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end()); + Exclude.insert(DRS.BaseInst); + } + Exclude.insert(LoopIncs.begin(), LoopIncs.end()); + + for (auto &DRS : RootSets) { + DenseSet VBase; + collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase); + for (auto *I : VBase) { + Uses[I].set(0); + } + + unsigned Idx = 1; + for (auto *Root : DRS.Roots) { + DenseSet V; + collectInLoopUserSet(Root, Exclude, PossibleRedSet, V); + + // While we're here, check the use sets are the same size. + if (V.size() != VBase.size()) { + DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n"); + return false; } + + for (auto *I : V) { + Uses[I].set(Idx); + } + ++Idx; + } + + // Make sure our subsumed instructions are remembered too. + for (auto *I : DRS.SubsumedInsts) { + Uses[I].set(IL_All); } } - if (Roots[0].empty()) + // Make sure the loop increments are also accounted for. + + Exclude.clear(); + for (auto &DRS : RootSets) { + Exclude.insert(DRS.Roots.begin(), DRS.Roots.end()); + Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end()); + Exclude.insert(DRS.BaseInst); + } + + DenseSet V; + collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V); + for (auto *I : V) { + Uses[I].set(IL_All); + } + + return true; + +} + +/// Get the next instruction in "In" that is a member of set Val. +/// Start searching from StartI, and do not return anything in Exclude. +/// If StartI is not given, start from In.begin(). +LoopReroll::DAGRootTracker::UsesTy::iterator +LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In, + const SmallInstructionSet &Exclude, + UsesTy::iterator *StartI) { + UsesTy::iterator I = StartI ? *StartI : In.begin(); + while (I != In.end() && (I->second.test(Val) == 0 || + Exclude.count(I->first) != 0)) + ++I; + return I; +} + +bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) { + for (auto &DRS : RootSets) { + if (DRS.BaseInst == I) + return true; + } + return false; +} + +bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) { + for (auto &DRS : RootSets) { + if (std::find(DRS.Roots.begin(), DRS.Roots.end(), I) != DRS.Roots.end()) + return true; + } + return false; +} + +/// Return true if instruction I depends on any instruction between +/// Start and End. +bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I, + UsesTy::iterator Start, + UsesTy::iterator End) { + for (auto *U : I->users()) { + for (auto It = Start; It != End; ++It) + if (U == It->first) + return true; + } + return false; +} + +static bool isIgnorableInst(const Instruction *I) { + if (isa(I)) + return true; + const IntrinsicInst* II = dyn_cast(I); + if (!II) return false; - bool AllSame = true; - for (unsigned i = 1; i < Scale-1; ++i) - if (Roots[i].size() != Roots[0].size()) { - AllSame = false; - break; - } + switch (II->getIntrinsicID()) { + default: + return false; + case llvm::Intrinsic::annotation: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // TODO: the following intrinsics may also be whitelisted: + // lifetime_start, lifetime_end, invariant_start, invariant_end + return true; + } + return false; +} + +bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { + // We now need to check for equivalence of the use graph of each root with + // that of the primary induction variable (excluding the roots). Our goal + // here is not to solve the full graph isomorphism problem, but rather to + // catch common cases without a lot of work. As a result, we will assume + // that the relative order of the instructions in each unrolled iteration + // is the same (although we will not make an assumption about how the + // different iterations are intermixed). Note that while the order must be + // the same, the instructions may not be in the same basic block. - if (!AllSame) + // An array of just the possible reductions for this scale factor. When we + // collect the set of all users of some root instructions, these reduction + // instructions are treated as 'final' (their uses are not considered). + // This is important because we don't want the root use set to search down + // the reduction chain. + SmallInstructionSet PossibleRedSet; + SmallInstructionSet PossibleRedLastSet; + SmallInstructionSet PossibleRedPHISet; + Reductions.restrictToScale(Scale, PossibleRedSet, + PossibleRedPHISet, PossibleRedLastSet); + + // Populate "Uses" with where each instruction is used. + if (!collectUsedInstructions(PossibleRedSet)) return false; + // Make sure we mark the reduction PHIs as used in all iterations. + for (auto *I : PossibleRedPHISet) { + Uses[I].set(IL_All); + } + + // Make sure all instructions in the loop are in one and only one + // set. + for (auto &KV : Uses) { + if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) { + DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: " + << *KV.first << " (#uses=" << KV.second.count() << ")\n"); + return false; + } + } + + DEBUG( + for (auto &KV : Uses) { + dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n"; + } + ); + + for (unsigned Iter = 1; Iter < Scale; ++Iter) { + // In addition to regular aliasing information, we need to look for + // instructions from later (future) iterations that have side effects + // preventing us from reordering them past other instructions with side + // effects. + bool FutureSideEffects = false; + AliasSetTracker AST(*AA); + // The map between instructions in f(%iv.(i+1)) and f(%iv). + DenseMap BaseMap; + + // Compare iteration Iter to the base. + SmallInstructionSet Visited; + auto BaseIt = nextInstr(0, Uses, Visited); + auto RootIt = nextInstr(Iter, Uses, Visited); + auto LastRootIt = Uses.begin(); + + while (BaseIt != Uses.end() && RootIt != Uses.end()) { + Instruction *BaseInst = BaseIt->first; + Instruction *RootInst = RootIt->first; + + // Skip over the IV or root instructions; only match their users. + bool Continue = false; + if (isBaseInst(BaseInst)) { + Visited.insert(BaseInst); + BaseIt = nextInstr(0, Uses, Visited); + Continue = true; + } + if (isRootInst(RootInst)) { + LastRootIt = RootIt; + Visited.insert(RootInst); + RootIt = nextInstr(Iter, Uses, Visited); + Continue = true; + } + if (Continue) continue; + + if (!BaseInst->isSameOperationAs(RootInst)) { + // Last chance saloon. We don't try and solve the full isomorphism + // problem, but try and at least catch the case where two instructions + // *of different types* are round the wrong way. We won't be able to + // efficiently tell, given two ADD instructions, which way around we + // should match them, but given an ADD and a SUB, we can at least infer + // which one is which. + // + // This should allow us to deal with a greater subset of the isomorphism + // problem. It does however change a linear algorithm into a quadratic + // one, so limit the number of probes we do. + auto TryIt = RootIt; + unsigned N = NumToleratedFailedMatches; + while (TryIt != Uses.end() && + !BaseInst->isSameOperationAs(TryIt->first) && + N--) { + ++TryIt; + TryIt = nextInstr(Iter, Uses, Visited, &TryIt); + } + + if (TryIt == Uses.end() || TryIt == RootIt || + instrDependsOn(TryIt->first, RootIt, TryIt)) { + DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << + " vs. " << *RootInst << "\n"); + return false; + } + + RootIt = TryIt; + RootInst = TryIt->first; + } + + // All instructions between the last root and this root + // may belong to some other iteration. If they belong to a + // future iteration, then they're dangerous to alias with. + // + // Note that because we allow a limited amount of flexibility in the order + // that we visit nodes, LastRootIt might be *before* RootIt, in which + // case we've already checked this set of instructions so we shouldn't + // do anything. + for (; LastRootIt < RootIt; ++LastRootIt) { + Instruction *I = LastRootIt->first; + if (LastRootIt->second.find_first() < (int)Iter) + continue; + if (I->mayWriteToMemory()) + AST.add(I); + // Note: This is specifically guarded by a check on isa, + // which while a valid (somewhat arbitrary) micro-optimization, is + // needed because otherwise isSafeToSpeculativelyExecute returns + // false on PHI nodes. + if (!isa(I) && !isSimpleLoadStore(I) && + !isSafeToSpeculativelyExecute(I)) + // Intervening instructions cause side effects. + FutureSideEffects = true; + } + + // Make sure that this instruction, which is in the use set of this + // root instruction, does not also belong to the base set or the set of + // some other root instruction. + if (RootIt->second.count() > 1) { + DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << + " vs. " << *RootInst << " (prev. case overlap)\n"); + return false; + } + + // Make sure that we don't alias with any instruction in the alias set + // tracker. If we do, then we depend on a future iteration, and we + // can't reroll. + if (RootInst->mayReadFromMemory()) + for (auto &K : AST) { + if (K.aliasesUnknownInst(RootInst, *AA)) { + DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << + " vs. " << *RootInst << " (depends on future store)\n"); + return false; + } + } + + // If we've past an instruction from a future iteration that may have + // side effects, and this instruction might also, then we can't reorder + // them, and this matching fails. As an exception, we allow the alias + // set tracker to handle regular (simple) load/store dependencies. + if (FutureSideEffects && ((!isSimpleLoadStore(BaseInst) && + !isSafeToSpeculativelyExecute(BaseInst)) || + (!isSimpleLoadStore(RootInst) && + !isSafeToSpeculativelyExecute(RootInst)))) { + DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << + " vs. " << *RootInst << + " (side effects prevent reordering)\n"); + return false; + } + + // For instructions that are part of a reduction, if the operation is + // associative, then don't bother matching the operands (because we + // already know that the instructions are isomorphic, and the order + // within the iteration does not matter). For non-associative reductions, + // we do need to match the operands, because we need to reject + // out-of-order instructions within an iteration! + // For example (assume floating-point addition), we need to reject this: + // x += a[i]; x += b[i]; + // x += a[i+1]; x += b[i+1]; + // x += b[i+2]; x += a[i+2]; + bool InReduction = Reductions.isPairInSame(BaseInst, RootInst); + + if (!(InReduction && BaseInst->isAssociative())) { + bool Swapped = false, SomeOpMatched = false; + for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) { + Value *Op2 = RootInst->getOperand(j); + + // If this is part of a reduction (and the operation is not + // associatve), then we match all operands, but not those that are + // part of the reduction. + if (InReduction) + if (Instruction *Op2I = dyn_cast(Op2)) + if (Reductions.isPairInSame(RootInst, Op2I)) + continue; + + DenseMap::iterator BMI = BaseMap.find(Op2); + if (BMI != BaseMap.end()) { + Op2 = BMI->second; + } else { + for (auto &DRS : RootSets) { + if (DRS.Roots[Iter-1] == (Instruction*) Op2) { + Op2 = DRS.BaseInst; + break; + } + } + } + + if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) { + // If we've not already decided to swap the matched operands, and + // we've not already matched our first operand (note that we could + // have skipped matching the first operand because it is part of a + // reduction above), and the instruction is commutative, then try + // the swapped match. + if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched && + BaseInst->getOperand(!j) == Op2) { + Swapped = true; + } else { + DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst + << " vs. " << *RootInst << " (operand " << j << ")\n"); + return false; + } + } + + SomeOpMatched = true; + } + } + + if ((!PossibleRedLastSet.count(BaseInst) && + hasUsesOutsideLoop(BaseInst, L)) || + (!PossibleRedLastSet.count(RootInst) && + hasUsesOutsideLoop(RootInst, L))) { + DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << + " vs. " << *RootInst << " (uses outside loop)\n"); + return false; + } + + Reductions.recordPair(BaseInst, RootInst, Iter); + BaseMap.insert(std::make_pair(RootInst, BaseInst)); + + LastRootIt = RootIt; + Visited.insert(BaseInst); + Visited.insert(RootInst); + BaseIt = nextInstr(0, Uses, Visited); + RootIt = nextInstr(Iter, Uses, Visited); + } + assert (BaseIt == Uses.end() && RootIt == Uses.end() && + "Mismatched set sizes!"); + } + + DEBUG(dbgs() << "LRR: Matched all iteration increments for " << + *IV << "\n"); + return true; } +void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { + BasicBlock *Header = L->getHeader(); + // Remove instructions associated with non-base iterations. + for (BasicBlock::reverse_iterator J = Header->rbegin(); + J != Header->rend();) { + unsigned I = Uses[&*J].find_first(); + if (I > 0 && I < IL_All) { + Instruction *D = &*J; + DEBUG(dbgs() << "LRR: removing: " << *D << "\n"); + D->eraseFromParent(); + continue; + } + + ++J; + } + bool Negative = IVToIncMap[IV] < 0; + const DataLayout &DL = Header->getModule()->getDataLayout(); + + // We need to create a new induction variable for each different BaseInst. + for (auto &DRS : RootSets) { + // Insert the new induction variable. + const SCEVAddRecExpr *RealIVSCEV = + cast(SE->getSCEV(DRS.BaseInst)); + const SCEV *Start = RealIVSCEV->getStart(); + const SCEVAddRecExpr *H = cast(SE->getAddRecExpr( + Start, SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1), L, + SCEV::FlagAnyWrap)); + { // Limit the lifetime of SCEVExpander. + SCEVExpander Expander(*SE, DL, "reroll"); + Value *NewIV = Expander.expandCodeFor(H, IV->getType(), &Header->front()); + + for (auto &KV : Uses) { + if (KV.second.find_first() == 0) + KV.first->replaceUsesOfWith(DRS.BaseInst, NewIV); + } + + if (BranchInst *BI = dyn_cast(Header->getTerminator())) { + // FIXME: Why do we need this check? + if (Uses[BI].find_first() == IL_All) { + const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); + + // Iteration count SCEV minus 1 + const SCEV *ICMinus1SCEV = SE->getMinusSCEV( + ICSCEV, SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1)); + + Value *ICMinus1; // Iteration count minus 1 + if (isa(ICMinus1SCEV)) { + ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI); + } else { + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + Preheader = InsertPreheaderForLoop(L, Parent); + + ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), + Preheader->getTerminator()); + } + + Value *Cond = + new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond"); + BI->setCondition(Cond); + + if (BI->getSuccessor(1) != Header) + BI->swapSuccessors(); + } + } + } + } + + SimplifyInstructionsInBlock(Header, TLI); + DeleteDeadPHIs(Header, TLI); +} + // Validate the selected reductions. All iterations must have an isomorphic // part of the reduction chain and, for non-associative reductions, the chain // entries must appear in order. @@ -665,16 +1333,15 @@ bool LoopReroll::ReductionTracker::validateSelected() { RI != RIE; ++RI) { int i = *RI; int PrevIter = 0, BaseCount = 0, Count = 0; - for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(), - JE = PossibleReds[i].end(); J != JE; ++J) { - // Note that all instructions in the chain must have been found because - // all instructions in the function must have been assigned to some - // iteration. - int Iter = PossibleRedIter[*J]; + for (Instruction *J : PossibleReds[i]) { + // Note that all instructions in the chain must have been found because + // all instructions in the function must have been assigned to some + // iteration. + int Iter = PossibleRedIter[J]; if (Iter != PrevIter && Iter != PrevIter + 1 && !PossibleReds[i].getReducedValue()->isAssociative()) { DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " << - *J << "\n"); + J << "\n"); return false; } @@ -720,10 +1387,9 @@ void LoopReroll::ReductionTracker::replaceSelected() { // Replace users with the new end-of-chain value. SmallInstructionVector Users; - for (Value::use_iterator UI = - PossibleReds[i].getReducedValue()->use_begin(), - UIE = PossibleReds[i].getReducedValue()->use_end(); UI != UIE; ++UI) - Users.push_back(cast(*UI)); + for (User *U : PossibleReds[i].getReducedValue()->users()) { + Users.push_back(cast(U)); + } for (SmallInstructionVector::iterator J = Users.begin(), JE = Users.end(); J != JE; ++J) @@ -778,367 +1444,35 @@ void LoopReroll::ReductionTracker::replaceSelected() { bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions) { - const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(IV)); - uint64_t Inc = cast(RealIVSCEV->getOperand(1))-> - getValue()->getZExtValue(); - // The collection of loop increment instructions. - SmallInstructionVector LoopIncs; - uint64_t Scale = Inc; - - // The effective induction variable, IV, is normally also the real induction - // variable. When we're dealing with a loop like: - // for (int i = 0; i < 500; ++i) - // x[3*i] = ...; - // x[3*i+1] = ...; - // x[3*i+2] = ...; - // then the real IV is still i, but the effective IV is (3*i). - Instruction *RealIV = IV; - if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs)) - return false; - - assert(Scale <= MaxInc && "Scale is too large"); - assert(Scale > 1 && "Scale must be at least 2"); + DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, IVToIncMap); - // The set of increment instructions for each increment value. - SmallVector Roots(Scale-1); - SmallInstructionSet AllRoots; - if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs)) + if (!DAGRoots.findRoots()) return false; - DEBUG(dbgs() << "LRR: Found all root induction increments for: " << - *RealIV << "\n"); - - // An array of just the possible reductions for this scale factor. When we - // collect the set of all users of some root instructions, these reduction - // instructions are treated as 'final' (their uses are not considered). - // This is important because we don't want the root use set to search down - // the reduction chain. - SmallInstructionSet PossibleRedSet; - SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet; - Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet, - PossibleRedLastSet); - - // We now need to check for equivalence of the use graph of each root with - // that of the primary induction variable (excluding the roots). Our goal - // here is not to solve the full graph isomorphism problem, but rather to - // catch common cases without a lot of work. As a result, we will assume - // that the relative order of the instructions in each unrolled iteration - // is the same (although we will not make an assumption about how the - // different iterations are intermixed). Note that while the order must be - // the same, the instructions may not be in the same basic block. - SmallInstructionSet Exclude(AllRoots); - Exclude.insert(LoopIncs.begin(), LoopIncs.end()); - - DenseSet BaseUseSet; - collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet); - - DenseSet AllRootUses; - std::vector > RootUseSets(Scale-1); - - bool MatchFailed = false; - for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) { - DenseSet &RootUseSet = RootUseSets[i]; - collectInLoopUserSet(L, Roots[i], SmallInstructionSet(), - PossibleRedSet, RootUseSet); - - DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() << - " vs. iteration increment " << (i+1) << - " use set size: " << RootUseSet.size() << "\n"); - - if (BaseUseSet.size() != RootUseSet.size()) { - MatchFailed = true; - break; - } - - // In addition to regular aliasing information, we need to look for - // instructions from later (future) iterations that have side effects - // preventing us from reordering them past other instructions with side - // effects. - bool FutureSideEffects = false; - AliasSetTracker AST(*AA); - - // The map between instructions in f(%iv.(i+1)) and f(%iv). - DenseMap BaseMap; - - assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops"); - for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(), - JE = Header->end(); J1 != JE && !MatchFailed; ++J1) { - if (cast(J1) == RealIV) - continue; - if (cast(J1) == IV) - continue; - if (!BaseUseSet.count(J1)) - continue; - if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs. - continue; - - while (J2 != JE && (!RootUseSet.count(J2) || - std::find(Roots[i].begin(), Roots[i].end(), J2) != - Roots[i].end())) { - // As we iterate through the instructions, instructions that don't - // belong to previous iterations (or the base case), must belong to - // future iterations. We want to track the alias set of writes from - // previous iterations. - if (!isa(J2) && !BaseUseSet.count(J2) && - !AllRootUses.count(J2)) { - if (J2->mayWriteToMemory()) - AST.add(J2); - - // Note: This is specifically guarded by a check on isa, - // which while a valid (somewhat arbitrary) micro-optimization, is - // needed because otherwise isSafeToSpeculativelyExecute returns - // false on PHI nodes. - if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL)) - FutureSideEffects = true; - } - - ++J2; - } - - if (!J1->isSameOperationAs(J2)) { - DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << - " vs. " << *J2 << "\n"); - MatchFailed = true; - break; - } - - // Make sure that this instruction, which is in the use set of this - // root instruction, does not also belong to the base set or the set of - // some previous root instruction. - if (BaseUseSet.count(J2) || AllRootUses.count(J2)) { - DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << - " vs. " << *J2 << " (prev. case overlap)\n"); - MatchFailed = true; - break; - } - - // Make sure that we don't alias with any instruction in the alias set - // tracker. If we do, then we depend on a future iteration, and we - // can't reroll. - if (J2->mayReadFromMemory()) { - for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end(); - K != KE && !MatchFailed; ++K) { - if (K->aliasesUnknownInst(J2, *AA)) { - DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << - " vs. " << *J2 << " (depends on future store)\n"); - MatchFailed = true; - break; - } - } - } + *IV << "\n"); - // If we've past an instruction from a future iteration that may have - // side effects, and this instruction might also, then we can't reorder - // them, and this matching fails. As an exception, we allow the alias - // set tracker to handle regular (simple) load/store dependencies. - if (FutureSideEffects && - ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) || - (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) { - DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << - " vs. " << *J2 << - " (side effects prevent reordering)\n"); - MatchFailed = true; - break; - } - - // For instructions that are part of a reduction, if the operation is - // associative, then don't bother matching the operands (because we - // already know that the instructions are isomorphic, and the order - // within the iteration does not matter). For non-associative reductions, - // we do need to match the operands, because we need to reject - // out-of-order instructions within an iteration! - // For example (assume floating-point addition), we need to reject this: - // x += a[i]; x += b[i]; - // x += a[i+1]; x += b[i+1]; - // x += b[i+2]; x += a[i+2]; - bool InReduction = Reductions.isPairInSame(J1, J2); - - if (!(InReduction && J1->isAssociative())) { - bool Swapped = false, SomeOpMatched = false;; - for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) { - Value *Op2 = J2->getOperand(j); - - // If this is part of a reduction (and the operation is not - // associatve), then we match all operands, but not those that are - // part of the reduction. - if (InReduction) - if (Instruction *Op2I = dyn_cast(Op2)) - if (Reductions.isPairInSame(J2, Op2I)) - continue; - - DenseMap::iterator BMI = BaseMap.find(Op2); - if (BMI != BaseMap.end()) - Op2 = BMI->second; - else if (std::find(Roots[i].begin(), Roots[i].end(), - (Instruction*) Op2) != Roots[i].end()) - Op2 = IV; - - if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) { - // If we've not already decided to swap the matched operands, and - // we've not already matched our first operand (note that we could - // have skipped matching the first operand because it is part of a - // reduction above), and the instruction is commutative, then try - // the swapped match. - if (!Swapped && J1->isCommutative() && !SomeOpMatched && - J1->getOperand(!j) == Op2) { - Swapped = true; - } else { - DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << - " vs. " << *J2 << " (operand " << j << ")\n"); - MatchFailed = true; - break; - } - } - - SomeOpMatched = true; - } - } - - if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) || - (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) { - DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << - " vs. " << *J2 << " (uses outside loop)\n"); - MatchFailed = true; - break; - } - - if (!MatchFailed) - BaseMap.insert(std::pair(J2, J1)); - - AllRootUses.insert(J2); - Reductions.recordPair(J1, J2, i+1); - - ++J2; - } - } - - if (MatchFailed) - return false; - - DEBUG(dbgs() << "LRR: Matched all iteration increments for " << - *RealIV << "\n"); - - DenseSet LoopIncUseSet; - collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(), - SmallInstructionSet(), LoopIncUseSet); - DEBUG(dbgs() << "LRR: Loop increment set size: " << - LoopIncUseSet.size() << "\n"); - - // Make sure that all instructions in the loop have been included in some - // use set. - for (BasicBlock::iterator J = Header->begin(), JE = Header->end(); - J != JE; ++J) { - if (isa(J)) - continue; - if (cast(J) == RealIV) - continue; - if (cast(J) == IV) - continue; - if (BaseUseSet.count(J) || AllRootUses.count(J) || - (LoopIncUseSet.count(J) && (J->isTerminator() || - isSafeToSpeculativelyExecute(J, DL)))) - continue; - - if (AllRoots.count(J)) - continue; - - if (Reductions.isSelectedPHI(J)) - continue; - - DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV << - " unprocessed instruction found: " << *J << "\n"); - MatchFailed = true; - break; - } - - if (MatchFailed) + if (!DAGRoots.validate(Reductions)) return false; - - DEBUG(dbgs() << "LRR: all instructions processed from " << - *RealIV << "\n"); - if (!Reductions.validateSelected()) return false; - // At this point, we've validated the rerolling, and we're committed to // making changes! Reductions.replaceSelected(); + DAGRoots.replace(IterCount); - // Remove instructions associated with non-base iterations. - for (BasicBlock::reverse_iterator J = Header->rbegin(); - J != Header->rend();) { - if (AllRootUses.count(&*J)) { - Instruction *D = &*J; - DEBUG(dbgs() << "LRR: removing: " << *D << "\n"); - D->eraseFromParent(); - continue; - } - - ++J; - } - - // Insert the new induction variable. - const SCEV *Start = RealIVSCEV->getStart(); - if (Inc == 1) - Start = SE->getMulExpr(Start, - SE->getConstant(Start->getType(), Scale)); - const SCEVAddRecExpr *H = - cast(SE->getAddRecExpr(Start, - SE->getConstant(RealIVSCEV->getType(), 1), - L, SCEV::FlagAnyWrap)); - { // Limit the lifetime of SCEVExpander. - SCEVExpander Expander(*SE, "reroll"); - Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); - - for (DenseSet::iterator J = BaseUseSet.begin(), - JE = BaseUseSet.end(); J != JE; ++J) - (*J)->replaceUsesOfWith(IV, NewIV); - - if (BranchInst *BI = dyn_cast(Header->getTerminator())) { - if (LoopIncUseSet.count(BI)) { - const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); - if (Inc == 1) - ICSCEV = - SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale)); - // Iteration count SCEV minus 1 - const SCEV *ICMinus1SCEV = - SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1)); - - Value *ICMinus1; // Iteration count minus 1 - if (isa(ICMinus1SCEV)) { - ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI); - } else { - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) - Preheader = InsertPreheaderForLoop(L, this); - - ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), - Preheader->getTerminator()); - } - - Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, - "exitcond"); - BI->setCondition(Cond); - - if (BI->getSuccessor(1) != Header) - BI->swapSuccessors(); - } - } - } - - SimplifyInstructionsInBlock(Header, DL, TLI); - DeleteDeadPHIs(Header, TLI); ++NumRerolledLoops; return true; } bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { - AA = &getAnalysis(); - LI = &getAnalysis(); - SE = &getAnalysis(); - TLI = &getAnalysis(); - DL = getAnalysisIfAvailable(); + if (skipOptnoneFunction(L)) + return false; + + AA = &getAnalysis().getAAResults(); + LI = &getAnalysis().getLoopInfo(); + SE = &getAnalysis().getSE(); + TLI = &getAnalysis().getTLI(); DT = &getAnalysis().getDomTree(); BasicBlock *Header = L->getHeader(); @@ -1156,13 +1490,13 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { return Changed; const SCEV *LIBETC = SE->getBackedgeTakenCount(L); - const SCEV *IterCount = - SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1)); + const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType())); DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n"); // First, we need to find the induction variable with respect to which we can // reroll (there may be several possible options). SmallInstructionVector PossibleIVs; + IVToIncMap.clear(); collectPossibleIVs(L, PossibleIVs); if (PossibleIVs.empty()) { @@ -1184,4 +1518,3 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { return Changed; } -