lib/Transforms/Scalar/LoopRerollPass.cpp

   1 //===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This pass implements a simple loop reroller.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #define DEBUG_TYPE "loop-reroll"
  15 #include "llvm/Transforms/Scalar.h"
  16 #include "llvm/ADT/SmallSet.h"
  17 #include "llvm/ADT/Statistic.h"
  18 #include "llvm/ADT/STLExtras.h"
  19 #include "llvm/Analysis/AliasAnalysis.h"
  20 #include "llvm/Analysis/AliasSetTracker.h"
  21 #include "llvm/Analysis/LoopPass.h"
  22 #include "llvm/Analysis/ScalarEvolution.h"
  23 #include "llvm/Analysis/ScalarEvolutionExpander.h"
  24 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  25 #include "llvm/Analysis/ValueTracking.h"
  26 #include "llvm/IR/DataLayout.h"
  27 #include "llvm/IR/IntrinsicInst.h"
  28 #include "llvm/Support/CommandLine.h"
  29 #include "llvm/Support/Debug.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 #include "llvm/Target/TargetLibraryInfo.h"
  32 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  33 #include "llvm/Transforms/Utils/Local.h"
  34 #include "llvm/Transforms/Utils/LoopUtils.h"
  35
  36 using namespace llvm;
  37
  38 STATISTIC(NumRerolledLoops, "Number of rerolled loops");
  39
  40 static cl::opt<unsigned>
  41 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
  42   cl::desc("The maximum increment for loop rerolling"));
  43
  44 // This loop re-rolling transformation aims to transform loops like this:
  45 //
  46 // int foo(int a);
  47 // void bar(int *x) {
  48 //   for (int i = 0; i < 500; i += 3) {
  49 //     foo(i);
  50 //     foo(i+1);
  51 //     foo(i+2);
  52 //   }
  53 // }
  54 //
  55 // into a loop like this:
  56 //
  57 // void bar(int *x) {
  58 //   for (int i = 0; i < 500; ++i)
  59 //     foo(i);
  60 // }
  61 //
  62 // It does this by looking for loops that, besides the latch code, are composed
  63 // of isomorphic DAGs of instructions, with each DAG rooted at some increment
  64 // to the induction variable, and where each DAG is isomorphic to the DAG
  65 // rooted at the induction variable (excepting the sub-DAGs which root the
  66 // other induction-variable increments). In other words, we're looking for loop
  67 // bodies of the form:
  68 //
  69 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
  70 // f(%iv)
  71 // %iv.1 = add %iv, 1                <-- a root increment
  72 // f(%iv.1)
  73 // %iv.2 = add %iv, 2                <-- a root increment
  74 // f(%iv.2)
  75 // %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
  76 // f(%iv.scale_m_1)
  77 // ...
  78 // %iv.next = add %iv, scale
  79 // %cmp = icmp(%iv, ...)
  80 // br %cmp, header, exit
  81 //
  82 // where each f(i) is a set of instructions that, collectively, are a function
  83 // only of i (and other loop-invariant values).
  84 //
  85 // As a special case, we can also reroll loops like this:
  86 //
  87 // int foo(int);
  88 // void bar(int *x) {
  89 //   for (int i = 0; i < 500; ++i) {
  90 //     x[3*i] = foo(0);
  91 //     x[3*i+1] = foo(0);
  92 //     x[3*i+2] = foo(0);
  93 //   }
  94 // }
  95 //
  96 // into this:
  97 //
  98 // void bar(int *x) {
  99 //   for (int i = 0; i < 1500; ++i)
 100 //     x[i] = foo(0);
 101 // }
 102 //
 103 // in which case, we're looking for inputs like this:
 104 //
 105 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
 106 // %scaled.iv = mul %iv, scale
 107 // f(%scaled.iv)
 108 // %scaled.iv.1 = add %scaled.iv, 1
 109 // f(%scaled.iv.1)
 110 // %scaled.iv.2 = add %scaled.iv, 2
 111 // f(%scaled.iv.2)
 112 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
 113 // f(%scaled.iv.scale_m_1)
 114 // ...
 115 // %iv.next = add %iv, 1
 116 // %cmp = icmp(%iv, ...)
 117 // br %cmp, header, exit
 118
 119 namespace {
 120   class LoopReroll : public LoopPass {
 121   public:
 122     static char ID; // Pass ID, replacement for typeid
 123     LoopReroll() : LoopPass(ID) {
 124       initializeLoopRerollPass(*PassRegistry::getPassRegistry());
 125     }
 126
 127     bool runOnLoop(Loop *L, LPPassManager &LPM);
 128
 129     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
 130       AU.addRequired<AliasAnalysis>();
 131       AU.addRequired<LoopInfo>();
 132       AU.addPreserved<LoopInfo>();
 133       AU.addRequired<DominatorTree>();
 134       AU.addPreserved<DominatorTree>();
 135       AU.addRequired<ScalarEvolution>();
 136       AU.addRequired<TargetLibraryInfo>();
 137     }
 138
 139 protected:
 140     AliasAnalysis *AA;
 141     LoopInfo *LI;
 142     ScalarEvolution *SE;
 143     DataLayout *DL;
 144     TargetLibraryInfo *TLI;
 145     DominatorTree *DT;
 146
 147     typedef SmallVector<Instruction *, 16> SmallInstructionVector;
 148     typedef SmallSet<Instruction *, 16>   SmallInstructionSet;
 149
 150     // A chain of isomorphic instructions, indentified by a single-use PHI,
 151     // representing a reduction. Only the last value may be used outside the
 152     // loop.
 153     struct SimpleLoopReduction {
 154       SimpleLoopReduction(Instruction *P, Loop *L)
 155         : Valid(false), Instructions(1, P) {
 156         assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
 157         add(L);
 158       }
 159
 160       bool valid() const {
 161         return Valid;
 162       }
 163
 164       Instruction *getPHI() const {
 165         assert(Valid && "Using invalid reduction");
 166         return Instructions.front();
 167       }
 168
 169       Instruction *getReducedValue() const {
 170         assert(Valid && "Using invalid reduction");
 171         return Instructions.back();
 172       }
 173
 174       Instruction *get(size_t i) const {
 175         assert(Valid && "Using invalid reduction");
 176         return Instructions[i+1];
 177       }
 178
 179       Instruction *operator [] (size_t i) const { return get(i); }
 180
 181       // The size, ignoring the initial PHI.
 182       size_t size() const {
 183         assert(Valid && "Using invalid reduction");
 184         return Instructions.size()-1;
 185       }
 186
 187       typedef SmallInstructionVector::iterator iterator;
 188       typedef SmallInstructionVector::const_iterator const_iterator;
 189
 190       iterator begin() {
 191         assert(Valid && "Using invalid reduction");
 192         return llvm::next(Instructions.begin());
 193       }
 194
 195       const_iterator begin() const {
 196         assert(Valid && "Using invalid reduction");
 197         return llvm::next(Instructions.begin());
 198       }
 199
 200       iterator end() { return Instructions.end(); }
 201       const_iterator end() const { return Instructions.end(); }
 202
 203     protected:
 204       bool Valid;
 205       SmallInstructionVector Instructions;
 206
 207       void add(Loop *L);
 208     };
 209
 210     // The set of all reductions, and state tracking of possible reductions
 211     // during loop instruction processing.
 212     struct ReductionTracker {
 213       typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector;
 214
 215       // Add a new possible reduction.
 216       void addSLR(SimpleLoopReduction &SLR) {
 217         PossibleReds.push_back(SLR);
 218       }
 219
 220       // Setup to track possible reductions corresponding to the provided
 221       // rerolling scale. Only reductions with a number of non-PHI instructions
 222       // that is divisible by the scale are considered. Three instructions sets
 223       // are filled in:
 224       //   - A set of all possible instructions in eligible reductions.
 225       //   - A set of all PHIs in eligible reductions
 226       //   - A set of all reduced values (last instructions) in eligible reductions.
 227       void restrictToScale(uint64_t Scale,
 228                            SmallInstructionSet &PossibleRedSet,
 229                            SmallInstructionSet &PossibleRedPHISet,
 230                            SmallInstructionSet &PossibleRedLastSet) {
 231         PossibleRedIdx.clear();
 232         PossibleRedIter.clear();
 233         Reds.clear();
 234
 235         for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
 236           if (PossibleReds[i].size() % Scale == 0) {
 237             PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
 238             PossibleRedPHISet.insert(PossibleReds[i].getPHI());
 239
 240             PossibleRedSet.insert(PossibleReds[i].getPHI());
 241             PossibleRedIdx[PossibleReds[i].getPHI()] = i;
 242             for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
 243                  JE = PossibleReds[i].end(); J != JE; ++J) {
 244               PossibleRedSet.insert(*J);
 245               PossibleRedIdx[*J] = i;
 246             }
 247           }
 248       }
 249
 250       // The functions below are used while processing the loop instructions.
 251
 252       // Are the two instructions both from reductions, and furthermore, from
 253       // the same reduction?
 254       bool isPairInSame(Instruction *J1, Instruction *J2) {
 255         DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
 256         if (J1I != PossibleRedIdx.end()) {
 257           DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
 258           if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
 259             return true;
 260         }
 261
 262         return false;
 263       }
 264
 265       // The two provided instructions, the first from the base iteration, and
 266       // the second from iteration i, form a matched pair. If these are part of
 267       // a reduction, record that fact.
 268       void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
 269         if (PossibleRedIdx.count(J1)) {
 270           assert(PossibleRedIdx.count(J2) &&
 271                  "Recording reduction vs. non-reduction instruction?");
 272
 273           PossibleRedIter[J1] = 0;
 274           PossibleRedIter[J2] = i;
 275
 276           int Idx = PossibleRedIdx[J1];
 277           assert(Idx == PossibleRedIdx[J2] &&
 278                  "Recording pair from different reductions?");
 279           Reds.insert(Idx);
 280         }
 281       }
 282
 283       // The functions below can be called after we've finished processing all
 284       // instructions in the loop, and we know which reductions were selected.
 285
 286       // Is the provided instruction the PHI of a reduction selected for
 287       // rerolling?
 288       bool isSelectedPHI(Instruction *J) {
 289         if (!isa<PHINode>(J))
 290           return false;
 291
 292         for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
 293              RI != RIE; ++RI) {
 294           int i = *RI;
 295           if (cast<Instruction>(J) == PossibleReds[i].getPHI())
 296             return true;
 297         }
 298
 299         return false;
 300       }
 301
 302       bool validateSelected();
 303       void replaceSelected();
 304
 305     protected:
 306       // The vector of all possible reductions (for any scale).
 307       SmallReductionVector PossibleReds;
 308
 309       DenseMap<Instruction *, int> PossibleRedIdx;
 310       DenseMap<Instruction *, int> PossibleRedIter;
 311       DenseSet<int> Reds;
 312     };
 313
 314     void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
 315     void collectPossibleReductions(Loop *L,
 316            ReductionTracker &Reductions);
 317     void collectInLoopUserSet(Loop *L,
 318            const SmallInstructionVector &Roots,
 319            const SmallInstructionSet &Exclude,
 320            const SmallInstructionSet &Final,
 321            DenseSet<Instruction *> &Users);
 322     void collectInLoopUserSet(Loop *L,
 323            Instruction * Root,
 324            const SmallInstructionSet &Exclude,
 325            const SmallInstructionSet &Final,
 326            DenseSet<Instruction *> &Users);
 327     bool findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
 328                           Instruction *&IV,
 329                           SmallInstructionVector &LoopIncs);
 330     bool collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, Instruction *IV,
 331                          SmallVector<SmallInstructionVector, 32> &Roots,
 332                          SmallInstructionSet &AllRoots,
 333                          SmallInstructionVector &LoopIncs);
 334     bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
 335                 ReductionTracker &Reductions);
 336   };
 337 }
 338
 339 char LoopReroll::ID = 0;
 340 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
 341 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 342 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 343 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 344 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 345 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 346 INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
 347
 348 Pass *llvm::createLoopRerollPass() {
 349   return new LoopReroll;
 350 }
 351
 352 // Returns true if the provided instruction is used outside the given loop.
 353 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
 354 // non-loop blocks to be outside the loop.
 355 static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
 356   for (Value::use_iterator UI = I->use_begin(),
 357        UIE = I->use_end(); UI != UIE; ++UI) {
 358     Instruction *User = cast<Instruction>(*UI);
 359     if (!L->contains(User))
 360       return true;
 361   }
 362
 363   return false;
 364 }
 365
 366 // Collect the list of loop induction variables with respect to which it might
 367 // be possible to reroll the loop.
 368 void LoopReroll::collectPossibleIVs(Loop *L,
 369                                     SmallInstructionVector &PossibleIVs) {
 370   BasicBlock *Header = L->getHeader();
 371   for (BasicBlock::iterator I = Header->begin(),
 372        IE = Header->getFirstInsertionPt(); I != IE; ++I) {
 373     if (!isa<PHINode>(I))
 374       continue;
 375     if (!I->getType()->isIntegerTy())
 376       continue;
 377
 378     if (const SCEVAddRecExpr *PHISCEV =
 379         dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
 380       if (PHISCEV->getLoop() != L)
 381         continue;
 382       if (!PHISCEV->isAffine())
 383         continue;
 384       if (const SCEVConstant *IncSCEV =
 385           dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
 386         if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
 387           continue;
 388         if (IncSCEV->getValue()->uge(MaxInc))
 389           continue;
 390
 391         DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
 392               *PHISCEV << "\n");
 393         PossibleIVs.push_back(I);
 394       }
 395     }
 396   }
 397 }
 398
 399 // Add the remainder of the reduction-variable chain to the instruction vector
 400 // (the initial PHINode has already been added). If successful, the object is
 401 // marked as valid.
 402 void LoopReroll::SimpleLoopReduction::add(Loop *L) {
 403   assert(!Valid && "Cannot add to an already-valid chain");
 404
 405   // The reduction variable must be a chain of single-use instructions
 406   // (including the PHI), except for the last value (which is used by the PHI
 407   // and also outside the loop).
 408   Instruction *C = Instructions.front();
 409
 410   do {
 411     C = cast<Instruction>(*C->use_begin());
 412     if (C->hasOneUse()) {
 413       if (!C->isBinaryOp())
 414         return;
 415
 416       if (!(isa<PHINode>(Instructions.back()) ||
 417             C->isSameOperationAs(Instructions.back())))
 418         return;
 419
 420       Instructions.push_back(C);
 421     }
 422   } while (C->hasOneUse());
 423
 424   if (Instructions.size() < 2 ||
 425       !C->isSameOperationAs(Instructions.back()) ||
 426       C->use_begin() == C->use_end())
 427     return;
 428
 429   // C is now the (potential) last instruction in the reduction chain.
 430   for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end();
 431        UI != UIE; ++UI) {
 432     // The only in-loop user can be the initial PHI.
 433     if (L->contains(cast<Instruction>(*UI)))
 434       if (cast<Instruction>(*UI ) != Instructions.front())
 435         return;
 436   }
 437
 438   Instructions.push_back(C);
 439   Valid = true;
 440 }
 441
 442 // Collect the vector of possible reduction variables.
 443 void LoopReroll::collectPossibleReductions(Loop *L,
 444   ReductionTracker &Reductions) {
 445   BasicBlock *Header = L->getHeader();
 446   for (BasicBlock::iterator I = Header->begin(),
 447        IE = Header->getFirstInsertionPt(); I != IE; ++I) {
 448     if (!isa<PHINode>(I))
 449       continue;
 450     if (!I->getType()->isSingleValueType())
 451       continue;
 452
 453     SimpleLoopReduction SLR(I, L);
 454     if (!SLR.valid())
 455       continue;
 456
 457     DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " <<
 458           SLR.size() << " chained instructions)\n");
 459     Reductions.addSLR(SLR);
 460   }
 461 }
 462
 463 // Collect the set of all users of the provided root instruction. This set of
 464 // users contains not only the direct users of the root instruction, but also
 465 // all users of those users, and so on. There are two exceptions:
 466 //
 467 //   1. Instructions in the set of excluded instructions are never added to the
 468 //   use set (even if they are users). This is used, for example, to exclude
 469 //   including root increments in the use set of the primary IV.
 470 //
 471 //   2. Instructions in the set of final instructions are added to the use set
 472 //   if they are users, but their users are not added. This is used, for
 473 //   example, to prevent a reduction update from forcing all later reduction
 474 //   updates into the use set.
 475 void LoopReroll::collectInLoopUserSet(Loop *L,
 476   Instruction *Root, const SmallInstructionSet &Exclude,
 477   const SmallInstructionSet &Final,
 478   DenseSet<Instruction *> &Users) {
 479   SmallInstructionVector Queue(1, Root);
 480   while (!Queue.empty()) {
 481     Instruction *I = Queue.pop_back_val();
 482     if (!Users.insert(I).second)
 483       continue;
 484
 485     if (!Final.count(I))
 486       for (Value::use_iterator UI = I->use_begin(),
 487            UIE = I->use_end(); UI != UIE; ++UI) {
 488         Instruction *User = cast<Instruction>(*UI);
 489         if (PHINode *PN = dyn_cast<PHINode>(User)) {
 490           // Ignore "wrap-around" uses to PHIs of this loop's header.
 491           if (PN->getIncomingBlock(UI) == L->getHeader())
 492             continue;
 493         }
 494
 495         if (L->contains(User) && !Exclude.count(User)) {
 496           Queue.push_back(User);
 497         }
 498       }
 499
 500     // We also want to collect single-user "feeder" values.
 501     for (User::op_iterator OI = I->op_begin(),
 502          OIE = I->op_end(); OI != OIE; ++OI) {
 503       if (Instruction *Op = dyn_cast<Instruction>(*OI))
 504         if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
 505             !Final.count(Op))
 506           Queue.push_back(Op);
 507     }
 508   }
 509 }
 510
 511 // Collect all of the users of all of the provided root instructions (combined
 512 // into a single set).
 513 void LoopReroll::collectInLoopUserSet(Loop *L,
 514   const SmallInstructionVector &Roots,
 515   const SmallInstructionSet &Exclude,
 516   const SmallInstructionSet &Final,
 517   DenseSet<Instruction *> &Users) {
 518   for (SmallInstructionVector::const_iterator I = Roots.begin(),
 519        IE = Roots.end(); I != IE; ++I)
 520     collectInLoopUserSet(L, *I, Exclude, Final, Users);
 521 }
 522
 523 static bool isSimpleLoadStore(Instruction *I) {
 524   if (LoadInst *LI = dyn_cast<LoadInst>(I))
 525     return LI->isSimple();
 526   if (StoreInst *SI = dyn_cast<StoreInst>(I))
 527     return SI->isSimple();
 528   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
 529     return !MI->isVolatile();
 530   return false;
 531 }
 532
 533 // Recognize loops that are setup like this:
 534 //
 535 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
 536 // %scaled.iv = mul %iv, scale
 537 // f(%scaled.iv)
 538 // %scaled.iv.1 = add %scaled.iv, 1
 539 // f(%scaled.iv.1)
 540 // %scaled.iv.2 = add %scaled.iv, 2
 541 // f(%scaled.iv.2)
 542 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
 543 // f(%scaled.iv.scale_m_1)
 544 // ...
 545 // %iv.next = add %iv, 1
 546 // %cmp = icmp(%iv, ...)
 547 // br %cmp, header, exit
 548 //
 549 // and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs.
 550 bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
 551                                   Instruction *&IV,
 552                                   SmallInstructionVector &LoopIncs) {
 553   // This is a special case: here we're looking for all uses (except for
 554   // the increment) to be multiplied by a common factor. The increment must
 555   // be by one. This is to capture loops like:
 556   //   for (int i = 0; i < 500; ++i) {
 557   //     foo(3*i); foo(3*i+1); foo(3*i+2);
 558   //   }
 559   if (RealIV->getNumUses() != 2)
 560     return false;
 561   const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
 562   Instruction *User1 = cast<Instruction>(*RealIV->use_begin()),
 563               *User2 = cast<Instruction>(*llvm::next(RealIV->use_begin()));
 564   if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
 565     return false;
 566   const SCEVAddRecExpr *User1SCEV =
 567                          dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User1)),
 568                        *User2SCEV =
 569                          dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User2));
 570   if (!User1SCEV || !User1SCEV->isAffine() ||
 571       !User2SCEV || !User2SCEV->isAffine())
 572     return false;
 573
 574   // We assume below that User1 is the scale multiply and User2 is the
 575   // increment. If this can't be true, then swap them.
 576   if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) {
 577     std::swap(User1, User2);
 578     std::swap(User1SCEV, User2SCEV);
 579   }
 580
 581   if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE))
 582     return false;
 583   assert(User2SCEV->getStepRecurrence(*SE)->isOne() &&
 584          "Invalid non-unit step for multiplicative scaling");
 585   LoopIncs.push_back(User2);
 586
 587   if (const SCEVConstant *MulScale =
 588       dyn_cast<SCEVConstant>(User1SCEV->getStepRecurrence(*SE))) {
 589     // Make sure that both the start and step have the same multiplier.
 590     if (RealIVSCEV->getStart()->getType() != MulScale->getType())
 591       return false;
 592     if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) !=
 593         User1SCEV->getStart())
 594       return false;
 595
 596     ConstantInt *MulScaleCI = MulScale->getValue();
 597     if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc))
 598       return false;
 599     Scale = MulScaleCI->getZExtValue();
 600     IV = User1;
 601   } else
 602     return false;
 603
 604   DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n");
 605   return true;
 606 }
 607
 608 // Collect all root increments with respect to the provided induction variable
 609 // (normally the PHI, but sometimes a multiply). A root increment is an
 610 // instruction, normally an add, with a positive constant less than Scale. In a
 611 // rerollable loop, each of these increments is the root of an instruction
 612 // graph isomorphic to the others. Also, we collect the final induction
 613 // increment (the increment equal to the Scale), and its users in LoopIncs.
 614 bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale,
 615                                  Instruction *IV,
 616                                  SmallVector<SmallInstructionVector, 32> &Roots,
 617                                  SmallInstructionSet &AllRoots,
 618                                  SmallInstructionVector &LoopIncs) {
 619   for (Value::use_iterator UI = IV->use_begin(),
 620        UIE = IV->use_end(); UI != UIE; ++UI) {
 621     Instruction *User = cast<Instruction>(*UI);
 622     if (!SE->isSCEVable(User->getType()))
 623       continue;
 624     if (User->getType() != IV->getType())
 625       continue;
 626     if (!L->contains(User))
 627       continue;
 628     if (hasUsesOutsideLoop(User, L))
 629       continue;
 630
 631     if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
 632           SE->getSCEV(User), SE->getSCEV(IV)))) {
 633       uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
 634       if (Idx > 0 && Idx < Scale) {
 635         Roots[Idx-1].push_back(User);
 636         AllRoots.insert(User);
 637       } else if (Idx == Scale && Inc > 1) {
 638         LoopIncs.push_back(User);
 639       }
 640     }
 641   }
 642
 643   if (Roots[0].empty())
 644     return false;
 645   bool AllSame = true;
 646   for (unsigned i = 1; i < Scale-1; ++i)
 647     if (Roots[i].size() != Roots[0].size()) {
 648       AllSame = false;
 649       break;
 650     }
 651
 652   if (!AllSame)
 653     return false;
 654
 655   return true;
 656 }
 657
 658 // Validate the selected reductions. All iterations must have an isomorphic
 659 // part of the reduction chain and, for non-associative reductions, the chain
 660 // entries must appear in order.
 661 bool LoopReroll::ReductionTracker::validateSelected() {
 662   // For a non-associative reduction, the chain entries must appear in order.
 663   for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
 664        RI != RIE; ++RI) {
 665     int i = *RI;
 666     int PrevIter = 0, BaseCount = 0, Count = 0;
 667     for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
 668          JE = PossibleReds[i].end(); J != JE; ++J) {
 669         // Note that all instructions in the chain must have been found because
 670         // all instructions in the function must have been assigned to some
 671         // iteration.
 672       int Iter = PossibleRedIter[*J];
 673       if (Iter != PrevIter && Iter != PrevIter + 1 &&
 674           !PossibleReds[i].getReducedValue()->isAssociative()) {
 675         DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
 676                         *J << "\n");
 677         return false;
 678       }
 679
 680       if (Iter != PrevIter) {
 681         if (Count != BaseCount) {
 682           DEBUG(dbgs() << "LRR: Iteration " << PrevIter <<
 683                 " reduction use count " << Count <<
 684                 " is not equal to the base use count " <<
 685                 BaseCount << "\n");
 686           return false;
 687         }
 688
 689         Count = 0;
 690       }
 691
 692       ++Count;
 693       if (Iter == 0)
 694         ++BaseCount;
 695
 696       PrevIter = Iter;
 697     }
 698   }
 699
 700   return true;
 701 }
 702
 703 // For all selected reductions, remove all parts except those in the first
 704 // iteration (and the PHI). Replace outside uses of the reduced value with uses
 705 // of the first-iteration reduced value (in other words, reroll the selected
 706 // reductions).
 707 void LoopReroll::ReductionTracker::replaceSelected() {
 708   // Fixup reductions to refer to the last instruction associated with the
 709   // first iteration (not the last).
 710   for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
 711        RI != RIE; ++RI) {
 712     int i = *RI;
 713     int j = 0;
 714     for (int e = PossibleReds[i].size(); j != e; ++j)
 715       if (PossibleRedIter[PossibleReds[i][j]] != 0) {
 716         --j;
 717         break;
 718       }
 719
 720     // Replace users with the new end-of-chain value.
 721     SmallInstructionVector Users;
 722     for (Value::use_iterator UI =
 723            PossibleReds[i].getReducedValue()->use_begin(),
 724          UIE = PossibleReds[i].getReducedValue()->use_end(); UI != UIE; ++UI)
 725       Users.push_back(cast<Instruction>(*UI));
 726
 727     for (SmallInstructionVector::iterator J = Users.begin(),
 728          JE = Users.end(); J != JE; ++J)
 729       (*J)->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
 730                               PossibleReds[i][j]);
 731   }
 732 }
 733
 734 // Reroll the provided loop with respect to the provided induction variable.
 735 // Generally, we're looking for a loop like this:
 736 //
 737 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
 738 // f(%iv)
 739 // %iv.1 = add %iv, 1                <-- a root increment
 740 // f(%iv.1)
 741 // %iv.2 = add %iv, 2                <-- a root increment
 742 // f(%iv.2)
 743 // %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
 744 // f(%iv.scale_m_1)
 745 // ...
 746 // %iv.next = add %iv, scale
 747 // %cmp = icmp(%iv, ...)
 748 // br %cmp, header, exit
 749 //
 750 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
 751 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
 752 // be intermixed with eachother. The restriction imposed by this algorithm is
 753 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
 754 // etc. be the same.
 755 //
 756 // First, we collect the use set of %iv, excluding the other increment roots.
 757 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
 758 // times, having collected the use set of f(%iv.(i+1)), during which we:
 759 //   - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
 760 //     the next unmatched instruction in f(%iv.(i+1)).
 761 //   - Ensure that both matched instructions don't have any external users
 762 //     (with the exception of last-in-chain reduction instructions).
 763 //   - Track the (aliasing) write set, and other side effects, of all
 764 //     instructions that belong to future iterations that come before the matched
 765 //     instructions. If the matched instructions read from that write set, then
 766 //     f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
 767 //     f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
 768 //     if any of these future instructions had side effects (could not be
 769 //     speculatively executed), and so do the matched instructions, when we
 770 //     cannot reorder those side-effect-producing instructions, and rerolling
 771 //     fails.
 772 //
 773 // Finally, we make sure that all loop instructions are either loop increment
 774 // roots, belong to simple latch code, parts of validated reductions, part of
 775 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
 776 // have been validated), then we reroll the loop.
 777 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
 778                         const SCEV *IterCount,
 779                         ReductionTracker &Reductions) {
 780   const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
 781   uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
 782                    getValue()->getZExtValue();
 783   // The collection of loop increment instructions.
 784   SmallInstructionVector LoopIncs;
 785   uint64_t Scale = Inc;
 786
 787   // The effective induction variable, IV, is normally also the real induction
 788   // variable. When we're dealing with a loop like:
 789   //   for (int i = 0; i < 500; ++i)
 790   //     x[3*i] = ...;
 791   //     x[3*i+1] = ...;
 792   //     x[3*i+2] = ...;
 793   // then the real IV is still i, but the effective IV is (3*i).
 794   Instruction *RealIV = IV;
 795   if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
 796     return false;
 797
 798   assert(Scale <= MaxInc && "Scale is too large");
 799   assert(Scale > 1 && "Scale must be at least 2");
 800
 801   // The set of increment instructions for each increment value.
 802   SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
 803   SmallInstructionSet AllRoots;
 804   if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
 805     return false;
 806
 807   DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
 808                   *RealIV << "\n");
 809
 810   // An array of just the possible reductions for this scale factor. When we
 811   // collect the set of all users of some root instructions, these reduction
 812   // instructions are treated as 'final' (their uses are not considered).
 813   // This is important because we don't want the root use set to search down
 814   // the reduction chain.
 815   SmallInstructionSet PossibleRedSet;
 816   SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
 817   Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
 818                              PossibleRedLastSet);
 819
 820   // We now need to check for equivalence of the use graph of each root with
 821   // that of the primary induction variable (excluding the roots). Our goal
 822   // here is not to solve the full graph isomorphism problem, but rather to
 823   // catch common cases without a lot of work. As a result, we will assume
 824   // that the relative order of the instructions in each unrolled iteration
 825   // is the same (although we will not make an assumption about how the
 826   // different iterations are intermixed). Note that while the order must be
 827   // the same, the instructions may not be in the same basic block.
 828   SmallInstructionSet Exclude(AllRoots);
 829   Exclude.insert(LoopIncs.begin(), LoopIncs.end());
 830
 831   DenseSet<Instruction *> BaseUseSet;
 832   collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);
 833
 834   DenseSet<Instruction *> AllRootUses;
 835   std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);
 836
 837   bool MatchFailed = false;
 838   for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
 839     DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
 840     collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
 841                          PossibleRedSet, RootUseSet);
 842
 843     DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
 844                     " vs. iteration increment " << (i+1) <<
 845                     " use set size: " << RootUseSet.size() << "\n");
 846
 847     if (BaseUseSet.size() != RootUseSet.size()) {
 848       MatchFailed = true;
 849       break;
 850     }
 851
 852     // In addition to regular aliasing information, we need to look for
 853     // instructions from later (future) iterations that have side effects
 854     // preventing us from reordering them past other instructions with side
 855     // effects.
 856     bool FutureSideEffects = false;
 857     AliasSetTracker AST(*AA);
 858
 859     // The map between instructions in f(%iv.(i+1)) and f(%iv).
 860     DenseMap<Value *, Value *> BaseMap;
 861
 862     assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
 863     for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
 864          JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
 865       if (cast<Instruction>(J1) == RealIV)
 866         continue;
 867       if (cast<Instruction>(J1) == IV)
 868         continue;
 869       if (!BaseUseSet.count(J1))
 870         continue;
 871       if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
 872         continue;
 873
 874       while (J2 != JE && (!RootUseSet.count(J2) ||
 875              std::find(Roots[i].begin(), Roots[i].end(), J2) !=
 876                Roots[i].end())) {
 877         // As we iterate through the instructions, instructions that don't
 878         // belong to previous iterations (or the base case), must belong to
 879         // future iterations. We want to track the alias set of writes from
 880         // previous iterations.
 881         if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
 882             !AllRootUses.count(J2)) {
 883           if (J2->mayWriteToMemory())
 884             AST.add(J2);
 885
 886           // Note: This is specifically guarded by a check on isa<PHINode>,
 887           // which while a valid (somewhat arbitrary) micro-optimization, is
 888           // needed because otherwise isSafeToSpeculativelyExecute returns
 889           // false on PHI nodes.
 890           if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
 891             FutureSideEffects = true;
 892         }
 893
 894         ++J2;
 895       }
 896
 897       if (!J1->isSameOperationAs(J2)) {
 898         DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
 899                         " vs. " << *J2 << "\n");
 900         MatchFailed = true;
 901         break;
 902       }
 903
 904       // Make sure that this instruction, which is in the use set of this
 905       // root instruction, does not also belong to the base set or the set of
 906       // some previous root instruction.
 907       if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
 908         DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
 909                         " vs. " << *J2 << " (prev. case overlap)\n");
 910         MatchFailed = true;
 911         break;
 912       }
 913
 914       // Make sure that we don't alias with any instruction in the alias set
 915       // tracker. If we do, then we depend on a future iteration, and we
 916       // can't reroll.
 917       if (J2->mayReadFromMemory()) {
 918         for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
 919              K != KE && !MatchFailed; ++K) {
 920           if (K->aliasesUnknownInst(J2, *AA)) {
 921             DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
 922                             " vs. " << *J2 << " (depends on future store)\n");
 923             MatchFailed = true;
 924             break;
 925           }
 926         }
 927       }
 928
 929       // If we've past an instruction from a future iteration that may have
 930       // side effects, and this instruction might also, then we can't reorder
 931       // them, and this matching fails. As an exception, we allow the alias
 932       // set tracker to handle regular (simple) load/store dependencies.
 933       if (FutureSideEffects &&
 934             ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
 935              (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
 936         DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
 937                         " vs. " << *J2 <<
 938                         " (side effects prevent reordering)\n");
 939         MatchFailed = true;
 940         break;
 941       }
 942
 943       // For instructions that are part of a reduction, if the operation is
 944       // associative, then don't bother matching the operands (because we
 945       // already know that the instructions are isomorphic, and the order
 946       // within the iteration does not matter). For non-associative reductions,
 947       // we do need to match the operands, because we need to reject
 948       // out-of-order instructions within an iteration!
 949       // For example (assume floating-point addition), we need to reject this:
 950       //   x += a[i]; x += b[i];
 951       //   x += a[i+1]; x += b[i+1];
 952       //   x += b[i+2]; x += a[i+2];
 953       bool InReduction = Reductions.isPairInSame(J1, J2);
 954
 955       if (!(InReduction && J1->isAssociative())) {
 956         bool Swapped = false, SomeOpMatched = false;;
 957         for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
 958           Value *Op2 = J2->getOperand(j);
 959
 960           // If this is part of a reduction (and the operation is not
 961           // associatve), then we match all operands, but not those that are
 962           // part of the reduction.
 963           if (InReduction)
 964             if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
 965               if (Reductions.isPairInSame(J2, Op2I))
 966                 continue;
 967
 968           DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
 969           if (BMI != BaseMap.end())
 970             Op2 = BMI->second;
 971           else if (std::find(Roots[i].begin(), Roots[i].end(),
 972                              (Instruction*) Op2) != Roots[i].end())
 973             Op2 = IV;
 974
 975           if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
 976             // If we've not already decided to swap the matched operands, and
 977             // we've not already matched our first operand (note that we could
 978             // have skipped matching the first operand because it is part of a
 979             // reduction above), and the instruction is commutative, then try
 980             // the swapped match.
 981             if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
 982                 J1->getOperand(!j) == Op2) {
 983               Swapped = true;
 984             } else {
 985               DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
 986                               " vs. " << *J2 << " (operand " << j << ")\n");
 987               MatchFailed = true;
 988               break;
 989             }
 990           }
 991
 992           SomeOpMatched = true;
 993         }
 994       }
 995
 996       if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
 997           (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
 998         DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
 999                         " vs. " << *J2 << " (uses outside loop)\n");
1000         MatchFailed = true;
1001         break;
1002       }
1003
1004       if (!MatchFailed)
1005         BaseMap.insert(std::pair<Value *, Value *>(J2, J1));
1006
1007       AllRootUses.insert(J2);
1008       Reductions.recordPair(J1, J2, i+1);
1009
1010       ++J2;
1011     }
1012   }
1013
1014   if (MatchFailed)
1015     return false;
1016
1017   DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
1018                   *RealIV << "\n");
1019
1020   DenseSet<Instruction *> LoopIncUseSet;
1021   collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
1022                        SmallInstructionSet(), LoopIncUseSet);
1023   DEBUG(dbgs() << "LRR: Loop increment set size: " <<
1024                   LoopIncUseSet.size() << "\n");
1025
1026   // Make sure that all instructions in the loop have been included in some
1027   // use set.
1028   for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
1029        J != JE; ++J) {
1030     if (isa<DbgInfoIntrinsic>(J))
1031       continue;
1032     if (cast<Instruction>(J) == RealIV)
1033       continue;
1034     if (cast<Instruction>(J) == IV)
1035       continue;
1036     if (BaseUseSet.count(J) || AllRootUses.count(J) ||
1037         (LoopIncUseSet.count(J) && (J->isTerminator() ||
1038                                     isSafeToSpeculativelyExecute(J, DL))))
1039       continue;
1040
1041     if (AllRoots.count(J))
1042       continue;
1043
1044     if (Reductions.isSelectedPHI(J))
1045       continue;
1046
1047     DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
1048                     " unprocessed instruction found: " << *J << "\n");
1049     MatchFailed = true;
1050     break;
1051   }
1052
1053   if (MatchFailed)
1054     return false;
1055
1056   DEBUG(dbgs() << "LRR: all instructions processed from " <<
1057                   *RealIV << "\n");
1058
1059   if (!Reductions.validateSelected())
1060     return false;
1061
1062   // At this point, we've validated the rerolling, and we're committed to
1063   // making changes!
1064
1065   Reductions.replaceSelected();
1066
1067   // Remove instructions associated with non-base iterations.
1068   for (BasicBlock::reverse_iterator J = Header->rbegin();
1069        J != Header->rend();) {
1070     if (AllRootUses.count(&*J)) {
1071       Instruction *D = &*J;
1072       DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
1073       D->eraseFromParent();
1074       continue;
1075     }
1076
1077     ++J;
1078   }
1079
1080   // Insert the new induction variable.
1081   const SCEV *Start = RealIVSCEV->getStart();
1082   if (Inc == 1)
1083     Start = SE->getMulExpr(Start,
1084                            SE->getConstant(Start->getType(), Scale));
1085   const SCEVAddRecExpr *H =
1086     cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
1087                            SE->getConstant(RealIVSCEV->getType(), 1),
1088                            L, SCEV::FlagAnyWrap));
1089   { // Limit the lifetime of SCEVExpander.
1090     SCEVExpander Expander(*SE, "reroll");
1091     PHINode *NewIV =
1092       cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
1093                                            Header->begin()));
1094     for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
1095          JE = BaseUseSet.end(); J != JE; ++J)
1096       (*J)->replaceUsesOfWith(IV, NewIV);
1097
1098     if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
1099       if (LoopIncUseSet.count(BI)) {
1100         const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1101         if (Inc == 1)
1102           ICSCEV =
1103             SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
1104         Value *IC;
1105         if (isa<SCEVConstant>(ICSCEV)) {
1106           IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
1107         } else {
1108           BasicBlock *Preheader = L->getLoopPreheader();
1109           if (!Preheader)
1110             Preheader = InsertPreheaderForLoop(L, this);
1111
1112           IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
1113                                       Preheader->getTerminator());
1114         }
1115
1116         Value *NewIVNext = NewIV->getIncomingValueForBlock(Header);
1117         Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
1118                                    "exitcond");
1119         BI->setCondition(Cond);
1120
1121         if (BI->getSuccessor(1) != Header)
1122           BI->swapSuccessors();
1123       }
1124     }
1125   }
1126
1127   SimplifyInstructionsInBlock(Header, DL, TLI);
1128   DeleteDeadPHIs(Header, TLI);
1129   ++NumRerolledLoops;
1130   return true;
1131 }
1132
1133 bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
1134   AA = &getAnalysis<AliasAnalysis>();
1135   LI = &getAnalysis<LoopInfo>();
1136   SE = &getAnalysis<ScalarEvolution>();
1137   TLI = &getAnalysis<TargetLibraryInfo>();
1138   DL = getAnalysisIfAvailable<DataLayout>();
1139   DT = &getAnalysis<DominatorTree>();
1140
1141   BasicBlock *Header = L->getHeader();
1142   DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
1143         "] Loop %" << Header->getName() << " (" <<
1144         L->getNumBlocks() << " block(s))\n");
1145
1146   bool Changed = false;
1147
1148   // For now, we'll handle only single BB loops.
1149   if (L->getNumBlocks() > 1)
1150     return Changed;
1151
1152   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
1153     return Changed;
1154
1155   const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
1156   const SCEV *IterCount =
1157     SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
1158   DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
1159
1160   // First, we need to find the induction variable with respect to which we can
1161   // reroll (there may be several possible options).
1162   SmallInstructionVector PossibleIVs;
1163   collectPossibleIVs(L, PossibleIVs);
1164
1165   if (PossibleIVs.empty()) {
1166     DEBUG(dbgs() << "LRR: No possible IVs found\n");
1167     return Changed;
1168   }
1169
1170   ReductionTracker Reductions;
1171   collectPossibleReductions(L, Reductions);
1172
1173   // For each possible IV, collect the associated possible set of 'root' nodes
1174   // (i+1, i+2, etc.).
1175   for (SmallInstructionVector::iterator I = PossibleIVs.begin(),
1176        IE = PossibleIVs.end(); I != IE; ++I)
1177     if (reroll(*I, L, Header, IterCount, Reductions)) {
1178       Changed = true;
1179       break;
1180     }
1181
1182   return Changed;
1183 }
1184