lib/Transforms/Scalar/LoopRotation.cpp

   1 //===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements Loop Rotation Pass.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #define DEBUG_TYPE "loop-rotate"
  15 #include "llvm/Transforms/Scalar.h"
  16 #include "llvm/Function.h"
  17 #include "llvm/Analysis/CodeMetrics.h"
  18 #include "llvm/Analysis/DominanceFrontier.h"
  19 #include "llvm/Analysis/LoopPass.h"
  20 #include "llvm/Analysis/InstructionSimplify.h"
  21 #include "llvm/Analysis/ScalarEvolution.h"
  22 #include "llvm/Transforms/Utils/Local.h"
  23 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  24 #include "llvm/Transforms/Utils/SSAUpdater.h"
  25 #include "llvm/Transforms/Utils/ValueMapper.h"
  26 #include "llvm/Support/Debug.h"
  27 #include "llvm/ADT/Statistic.h"
  28 using namespace llvm;
  29
  30 #define MAX_HEADER_SIZE 16
  31
  32 STATISTIC(NumRotated, "Number of loops rotated");
  33 namespace {
  34
  35   class LoopRotate : public LoopPass {
  36   public:
  37     static char ID; // Pass ID, replacement for typeid
  38     LoopRotate() : LoopPass(ID) {
  39       initializeLoopRotatePass(*PassRegistry::getPassRegistry());
  40     }
  41
  42     // Rotate Loop L as many times as possible. Return true if
  43     // loop is rotated at least once.
  44     bool runOnLoop(Loop *L, LPPassManager &LPM);
  45
  46     // LCSSA form makes instruction renaming easier.
  47     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
  48       AU.addPreserved<DominatorTree>();
  49       AU.addPreserved<DominanceFrontier>();
  50       AU.addRequired<LoopInfo>();
  51       AU.addPreserved<LoopInfo>();
  52       AU.addRequiredID(LoopSimplifyID);
  53       AU.addPreservedID(LoopSimplifyID);
  54       AU.addRequiredID(LCSSAID);
  55       AU.addPreservedID(LCSSAID);
  56       AU.addPreserved<ScalarEvolution>();
  57     }
  58
  59     // Helper functions
  60
  61     /// Do actual work
  62     bool rotateLoop(Loop *L, LPPassManager &LPM);
  63
  64     /// Initialize local data
  65     void initialize();
  66
  67     /// After loop rotation, loop pre-header has multiple sucessors.
  68     /// Insert one forwarding basic block to ensure that loop pre-header
  69     /// has only one successor.
  70     void preserveCanonicalLoopForm(LPPassManager &LPM);
  71
  72   private:
  73     LoopInfo *LI;
  74     Loop *L;
  75     BasicBlock *OrigHeader;
  76     BasicBlock *OrigPreHeader;
  77     BasicBlock *OrigLatch;
  78     BasicBlock *NewHeader;
  79     BasicBlock *Exit;
  80     LPPassManager *LPM_Ptr;
  81   };
  82 }
  83
  84 char LoopRotate::ID = 0;
  85 INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
  86 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
  87 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  88 INITIALIZE_PASS_DEPENDENCY(LCSSA)
  89 INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
  90
  91 Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
  92
  93 /// Initialize local data
  94 void LoopRotate::initialize() {
  95   L = NULL;
  96   OrigHeader = NULL;
  97   OrigPreHeader = NULL;
  98   NewHeader = NULL;
  99   Exit = NULL;
 100 }
 101
 102 /// Rotate Loop L as many times as possible. Return true if
 103 /// the loop is rotated at least once.
 104 bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
 105   LI = &getAnalysis<LoopInfo>();
 106
 107   initialize();
 108   LPM_Ptr = &LPM;
 109
 110   // One loop can be rotated multiple times.
 111   bool MadeChange = false;
 112   while (rotateLoop(Lp,LPM)) {
 113     MadeChange = true;
 114     initialize();
 115   }
 116
 117   return MadeChange;
 118 }
 119
 120 /// Rotate loop LP. Return true if the loop is rotated.
 121 bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
 122   L = Lp;
 123
 124   OrigPreHeader = L->getLoopPreheader();
 125   if (!OrigPreHeader) return false;
 126
 127   OrigLatch = L->getLoopLatch();
 128   if (!OrigLatch) return false;
 129
 130   OrigHeader =  L->getHeader();
 131
 132   // If the loop has only one block then there is not much to rotate.
 133   if (L->getBlocks().size() == 1)
 134     return false;
 135
 136   // If the loop header is not one of the loop exiting blocks then
 137   // either this loop is already rotated or it is not
 138   // suitable for loop rotation transformations.
 139   if (!L->isLoopExiting(OrigHeader))
 140     return false;
 141
 142   BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
 143   if (!BI)
 144     return false;
 145   assert(BI->isConditional() && "Branch Instruction is not conditional");
 146
 147   // Updating PHInodes in loops with multiple exits adds complexity.
 148   // Keep it simple, and restrict loop rotation to loops with one exit only.
 149   // In future, lift this restriction and support for multiple exits if
 150   // required.
 151   SmallVector<BasicBlock*, 8> ExitBlocks;
 152   L->getExitBlocks(ExitBlocks);
 153   if (ExitBlocks.size() > 1)
 154     return false;
 155
 156   // Check size of original header and reject loop if it is very big.
 157   {
 158     CodeMetrics Metrics;
 159     Metrics.analyzeBasicBlock(OrigHeader);
 160     if (Metrics.NumInsts > MAX_HEADER_SIZE)
 161       return false;
 162   }
 163
 164   // Now, this loop is suitable for rotation.
 165
 166   // Anything ScalarEvolution may know about this loop or the PHI nodes
 167   // in its header will soon be invalidated.
 168   if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
 169     SE->forgetLoop(L);
 170
 171   // Find new Loop header. NewHeader is a Header's one and only successor
 172   // that is inside loop.  Header's other successor is outside the
 173   // loop.  Otherwise loop is not suitable for rotation.
 174   Exit = BI->getSuccessor(0);
 175   NewHeader = BI->getSuccessor(1);
 176   if (L->contains(Exit))
 177     std::swap(Exit, NewHeader);
 178   assert(NewHeader && "Unable to determine new loop header");
 179   assert(L->contains(NewHeader) && !L->contains(Exit) &&
 180          "Unable to determine loop header and exit blocks");
 181
 182   // This code assumes that the new header has exactly one predecessor.
 183   // Remove any single-entry PHI nodes in it.
 184   assert(NewHeader->getSinglePredecessor() &&
 185          "New header doesn't have one pred!");
 186   FoldSingleEntryPHINodes(NewHeader);
 187
 188   // Begin by walking OrigHeader and populating ValueMap with an entry for
 189   // each Instruction.
 190   BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
 191   ValueToValueMapTy ValueMap;
 192
 193   // For PHI nodes, the value available in OldPreHeader is just the
 194   // incoming value from OldPreHeader.
 195   for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
 196     ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
 197
 198   // For the rest of the instructions, either hoist to the OrigPreheader if
 199   // possible or create a clone in the OldPreHeader if not.
 200   TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator();
 201   while (I != E) {
 202     Instruction *Inst = I++;
 203
 204     // If the instruction's operands are invariant and it doesn't read or write
 205     // memory, then it is safe to hoist.  Doing this doesn't change the order of
 206     // execution in the preheader, but does prevent the instruction from
 207     // executing in each iteration of the loop.  This means it is safe to hoist
 208     // something that might trap, but isn't safe to hoist something that reads
 209     // memory (without proving that the loop doesn't write).
 210     if (L->hasLoopInvariantOperands(Inst) &&
 211         !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
 212         !isa<TerminatorInst>(Inst)) {
 213       Inst->moveBefore(LoopEntryBranch);
 214       continue;
 215     }
 216
 217     // Otherwise, create a duplicate of the instruction.
 218     Instruction *C = Inst->clone();
 219
 220     // Eagerly remap the operands of the instruction.
 221     RemapInstruction(C, ValueMap,
 222                      RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
 223
 224     // With the operands remapped, see if the instruction constant folds or is
 225     // otherwise simplifyable.  This commonly occurs because the entry from PHI
 226     // nodes allows icmps and other instructions to fold.
 227     Value *V = SimplifyInstruction(C);
 228     if (V && LI->replacementPreservesLCSSAForm(C, V)) {
 229       // If so, then delete the temporary instruction and stick the folded value
 230       // in the map.
 231       delete C;
 232       ValueMap[Inst] = V;
 233     } else {
 234       // Otherwise, stick the new instruction into the new block!
 235       C->setName(Inst->getName());
 236       C->insertBefore(LoopEntryBranch);
 237       ValueMap[Inst] = C;
 238     }
 239   }
 240
 241   // Along with all the other instructions, we just cloned OrigHeader's
 242   // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
 243   // successors by duplicating their incoming values for OrigHeader.
 244   TerminatorInst *TI = OrigHeader->getTerminator();
 245   for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
 246     for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
 247          PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
 248       PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreHeader);
 249
 250   // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
 251   // OrigPreHeader's old terminator (the original branch into the loop), and
 252   // remove the corresponding incoming values from the PHI nodes in OrigHeader.
 253   LoopEntryBranch->eraseFromParent();
 254   for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
 255     PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
 256
 257   // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
 258   // as necessary.
 259   SSAUpdater SSA;
 260   for (I = OrigHeader->begin(); I != E; ++I) {
 261     Value *OrigHeaderVal = I;
 262     Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
 263
 264     // If there are no uses of the value (e.g. because it returns void), there
 265     // is nothing to rewrite.
 266     if (OrigHeaderVal->use_empty() && OrigPreHeaderVal->use_empty())
 267       continue;
 268
 269     // The value now exits in two versions: the initial value in the preheader
 270     // and the loop "next" value in the original header.
 271     SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
 272     SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
 273     SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
 274
 275     // Visit each use of the OrigHeader instruction.
 276     for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
 277          UE = OrigHeaderVal->use_end(); UI != UE; ) {
 278       // Grab the use before incrementing the iterator.
 279       Use &U = UI.getUse();
 280
 281       // Increment the iterator before removing the use from the list.
 282       ++UI;
 283
 284       // SSAUpdater can't handle a non-PHI use in the same block as an
 285       // earlier def. We can easily handle those cases manually.
 286       Instruction *UserInst = cast<Instruction>(U.getUser());
 287       if (!isa<PHINode>(UserInst)) {
 288         BasicBlock *UserBB = UserInst->getParent();
 289
 290         // The original users in the OrigHeader are already using the
 291         // original definitions.
 292         if (UserBB == OrigHeader)
 293           continue;
 294
 295         // Users in the OrigPreHeader need to use the value to which the
 296         // original definitions are mapped.
 297         if (UserBB == OrigPreHeader) {
 298           U = OrigPreHeaderVal;
 299           continue;
 300         }
 301       }
 302
 303       // Anything else can be handled by SSAUpdater.
 304       SSA.RewriteUse(U);
 305     }
 306   }
 307
 308   // NewHeader is now the header of the loop.
 309   L->moveToHeader(NewHeader);
 310
 311   // Move the original header to the bottom of the loop, where it now more
 312   // naturally belongs. This isn't necessary for correctness, and CodeGen can
 313   // usually reorder blocks on its own to fix things like this up, but it's
 314   // still nice to keep the IR readable.
 315   //
 316   // The original header should have only one predecessor at this point, since
 317   // we checked that the loop had a proper preheader and unique backedge before
 318   // we started.
 319   assert(OrigHeader->getSinglePredecessor() &&
 320          "Original loop header has too many predecessors after loop rotation!");
 321   OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
 322
 323   // Also, since this original header only has one predecessor, zap its
 324   // PHI nodes, which are now trivial.
 325   FoldSingleEntryPHINodes(OrigHeader);
 326
 327   // TODO: We could just go ahead and merge OrigHeader into its predecessor
 328   // at this point, if we don't mind updating dominator info.
 329
 330   // Establish a new preheader, update dominators, etc.
 331   preserveCanonicalLoopForm(LPM);
 332
 333   ++NumRotated;
 334   return true;
 335 }
 336
 337
 338 /// After loop rotation, loop pre-header has multiple sucessors.
 339 /// Insert one forwarding basic block to ensure that loop pre-header
 340 /// has only one successor.
 341 void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
 342
 343   // Right now original pre-header has two successors, new header and
 344   // exit block. Insert new block between original pre-header and
 345   // new header such that loop's new pre-header has only one successor.
 346   BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(),
 347                                                 "bb.nph",
 348                                                 OrigHeader->getParent(),
 349                                                 NewHeader);
 350   LoopInfo &LI = getAnalysis<LoopInfo>();
 351   if (Loop *PL = LI.getLoopFor(OrigPreHeader))
 352     PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
 353   BranchInst::Create(NewHeader, NewPreHeader);
 354
 355   BranchInst *OrigPH_BI = cast<BranchInst>(OrigPreHeader->getTerminator());
 356   if (OrigPH_BI->getSuccessor(0) == NewHeader)
 357     OrigPH_BI->setSuccessor(0, NewPreHeader);
 358   else {
 359     assert(OrigPH_BI->getSuccessor(1) == NewHeader &&
 360            "Unexpected original pre-header terminator");
 361     OrigPH_BI->setSuccessor(1, NewPreHeader);
 362   }
 363
 364   PHINode *PN;
 365   for (BasicBlock::iterator I = NewHeader->begin();
 366        (PN = dyn_cast<PHINode>(I)); ++I) {
 367     int index = PN->getBasicBlockIndex(OrigPreHeader);
 368     assert(index != -1 && "Expected incoming value from Original PreHeader");
 369     PN->setIncomingBlock(index, NewPreHeader);
 370     assert(PN->getBasicBlockIndex(OrigPreHeader) == -1 &&
 371            "Expected only one incoming value from Original PreHeader");
 372   }
 373
 374   if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
 375     DT->addNewBlock(NewPreHeader, OrigPreHeader);
 376     DT->changeImmediateDominator(L->getHeader(), NewPreHeader);
 377     DT->changeImmediateDominator(Exit, OrigPreHeader);
 378     for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
 379          BI != BE; ++BI) {
 380       BasicBlock *B = *BI;
 381       if (L->getHeader() != B) {
 382         DomTreeNode *Node = DT->getNode(B);
 383         if (Node && Node->getBlock() == OrigHeader)
 384           DT->changeImmediateDominator(*BI, L->getHeader());
 385       }
 386     }
 387     DT->changeImmediateDominator(OrigHeader, OrigLatch);
 388   }
 389
 390   if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) {
 391     // New Preheader's dominance frontier is Exit block.
 392     DominanceFrontier::DomSetType NewPHSet;
 393     NewPHSet.insert(Exit);
 394     DF->addBasicBlock(NewPreHeader, NewPHSet);
 395
 396     // New Header's dominance frontier now includes itself and Exit block
 397     DominanceFrontier::iterator HeadI = DF->find(L->getHeader());
 398     if (HeadI != DF->end()) {
 399       DominanceFrontier::DomSetType & HeaderSet = HeadI->second;
 400       HeaderSet.clear();
 401       HeaderSet.insert(L->getHeader());
 402       HeaderSet.insert(Exit);
 403     } else {
 404       DominanceFrontier::DomSetType HeaderSet;
 405       HeaderSet.insert(L->getHeader());
 406       HeaderSet.insert(Exit);
 407       DF->addBasicBlock(L->getHeader(), HeaderSet);
 408     }
 409
 410     // Original header (new Loop Latch)'s dominance frontier is Exit.
 411     DominanceFrontier::iterator LatchI = DF->find(L->getLoopLatch());
 412     if (LatchI != DF->end()) {
 413       DominanceFrontier::DomSetType &LatchSet = LatchI->second;
 414       LatchSet = LatchI->second;
 415       LatchSet.clear();
 416       LatchSet.insert(Exit);
 417     } else {
 418       DominanceFrontier::DomSetType LatchSet;
 419       LatchSet.insert(Exit);
 420       DF->addBasicBlock(L->getHeader(), LatchSet);
 421     }
 422
 423     // If a loop block dominates new loop latch then add to its frontiers
 424     // new header and Exit and remove new latch (which is equal to original
 425     // header).
 426     BasicBlock *NewLatch = L->getLoopLatch();
 427
 428     assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader");
 429
 430     if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
 431       for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
 432            BI != BE; ++BI) {
 433         BasicBlock *B = *BI;
 434         if (DT->dominates(B, NewLatch)) {
 435           DominanceFrontier::iterator BDFI = DF->find(B);
 436           if (BDFI != DF->end()) {
 437             DominanceFrontier::DomSetType &BSet = BDFI->second;
 438             BSet.erase(NewLatch);
 439             BSet.insert(L->getHeader());
 440             BSet.insert(Exit);
 441           } else {
 442             DominanceFrontier::DomSetType BSet;
 443             BSet.insert(L->getHeader());
 444             BSet.insert(Exit);
 445             DF->addBasicBlock(B, BSet);
 446           }
 447         }
 448       }
 449     }
 450   }
 451
 452   // Preserve canonical loop form, which means Exit block should
 453   // have only one predecessor.
 454   SplitEdge(L->getLoopLatch(), Exit, this);
 455
 456   assert(NewHeader && L->getHeader() == NewHeader &&
 457          "Invalid loop header after loop rotation");
 458   assert(NewPreHeader && L->getLoopPreheader() == NewPreHeader &&
 459          "Invalid loop preheader after loop rotation");
 460   assert(L->getLoopLatch() &&
 461          "Invalid loop latch after loop rotation");
 462 }