X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FLoopDistribute.cpp;h=6c211458100ca9bf88ad45772655679db467b3b4;hp=3d2972eb7f07943931256453224762322e5786a1;hb=8452a3f46e1636321765e2022b14b789f2b99ec4;hpb=e7beeb8ea166e44fef3efe1e88c2b8006efbdd53 diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp index 3d2972eb7f0..6c211458100 100644 --- a/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/lib/Transforms/Scalar/LoopDistribute.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" #include #define LDIST_NAME "loop-distribute" @@ -55,70 +56,6 @@ static cl::opt DistributeNonIfConvertible( STATISTIC(NumLoopsDistributed, "Number of loops distributed"); -/// \brief Remaps instructions in a loop including the preheader. -static void remapInstructionsInLoop(const SmallVectorImpl &Blocks, - ValueToValueMapTy &VMap) { - // Rewrite the code to refer to itself. - for (auto *BB : Blocks) - for (auto &Inst : *BB) - RemapInstruction(&Inst, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); -} - -/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p -/// Blocks. -/// -/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block -/// \p LoopDomBB. Insert the new blocks before block specified in \p Before. -static Loop *cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, - Loop *OrigLoop, ValueToValueMapTy &VMap, - const Twine &NameSuffix, LoopInfo *LI, - DominatorTree *DT, - SmallVectorImpl &Blocks) { - Function *F = OrigLoop->getHeader()->getParent(); - Loop *ParentLoop = OrigLoop->getParentLoop(); - - Loop *NewLoop = new Loop(); - if (ParentLoop) - ParentLoop->addChildLoop(NewLoop); - else - LI->addTopLevelLoop(NewLoop); - - BasicBlock *OrigPH = OrigLoop->getLoopPreheader(); - BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F); - // To rename the loop PHIs. - VMap[OrigPH] = NewPH; - Blocks.push_back(NewPH); - - // Update LoopInfo. - if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewPH, *LI); - - // Update DominatorTree. - DT->addNewBlock(NewPH, LoopDomBB); - - for (BasicBlock *BB : OrigLoop->getBlocks()) { - BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F); - VMap[BB] = NewBB; - - // Update LoopInfo. - NewLoop->addBasicBlockToLoop(NewBB, *LI); - - // Update DominatorTree. - BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); - DT->addNewBlock(NewBB, cast(VMap[IDomBB])); - - Blocks.push_back(NewBB); - } - - // Move them physically from the end of the block list. - F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH); - F->getBasicBlockList().splice(Before, F->getBasicBlockList(), - NewLoop->getHeader(), F->end()); - - return NewLoop; -} - namespace { /// \brief Maintains the set of instructions of the loop for a partition before /// cloning. After cloning, it hosts the new loop. @@ -204,7 +141,9 @@ public: ValueToValueMapTy &getVMap() { return VMap; } /// \brief Remaps the cloned instructions using VMap. - void remapInstructions() { remapInstructionsInLoop(ClonedLoopBlocks, VMap); } + void remapInstructions() { + remapInstructionsInBlocks(ClonedLoopBlocks, VMap); + } /// \brief Based on the set of instructions selected for this partition, /// removes the unnecessary ones. @@ -225,16 +164,14 @@ public: // Delete the instructions backwards, as it has a reduced likelihood of // having to update as many def-use and use-def chains. - for (auto I = Unused.rbegin(), E = Unused.rend(); I != E; ++I) { - auto *Inst = *I; - + for (auto *Inst : make_range(Unused.rbegin(), Unused.rend())) { if (!Inst->use_empty()) Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); Inst->eraseFromParent(); } } - void print() { + void print() const { if (DepCycle) dbgs() << " (cycle)\n"; for (auto *I : Set) @@ -287,11 +224,10 @@ public: /// contain cycles. Otherwise start a new partition for it. void addToCyclicPartition(Instruction *Inst) { // If the current partition is non-cyclic. Start a new one. - if (PartitionContainer.empty() || !PartitionContainer.back()->hasDepCycle()) - PartitionContainer.push_back( - llvm::make_unique(Inst, L, true)); + if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle()) + PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true); else - PartitionContainer.back()->add(Inst); + PartitionContainer.back().add(Inst); } /// \brief Adds \p Inst into a partition that is not marked to contain @@ -300,7 +236,7 @@ public: // Initially we isolate memory instructions into as many partitions as // possible, then later we may merge them back together. void addToNewNonCyclicPartition(Instruction *Inst) { - PartitionContainer.push_back(llvm::make_unique(Inst, L)); + PartitionContainer.emplace_back(Inst, L); } /// \brief Merges adjacent non-cyclic partitions. @@ -360,7 +296,7 @@ public: for (PartitionContainerT::iterator I = PartitionContainer.begin(), E = PartitionContainer.end(); I != E; ++I) { - auto *PartI = I->get(); + auto *PartI = &*I; // If a load occurs in two partitions PartI and PartJ, merge all // partitions (PartI, PartJ] into PartI. @@ -379,8 +315,8 @@ public: auto PartJ = I; do { --PartJ; - ToBeMerged.unionSets(PartI, PartJ->get()); - } while (PartJ->get() != LoadToPart->second); + ToBeMerged.unionSets(PartI, &*PartJ); + } while (&*PartJ != LoadToPart->second); } } } @@ -402,13 +338,8 @@ public: } // Remove the empty partitions. - for (PartitionContainerT::iterator PartI = PartitionContainer.begin(), - E = PartitionContainer.end(); - PartI != E;) - if ((*PartI)->empty()) - PartI = PartitionContainer.erase(PartI); - else - ++PartI; + PartitionContainer.remove_if( + [](const InstPartition &P) { return P.empty(); }); return true; } @@ -417,8 +348,8 @@ public: /// instruction is duplicated across multiple partitions, set the entry to -1. void setupPartitionIdOnInstructions() { int PartitionID = 0; - for (auto &PartitionPtr : PartitionContainer) { - for (Instruction *Inst : *PartitionPtr) { + for (const auto &Partition : PartitionContainer) { + for (Instruction *Inst : Partition) { bool NewElt; InstToPartitionIdT::iterator Iter; @@ -435,7 +366,7 @@ public: /// instructions require. void populateUsedSet() { for (auto &P : PartitionContainer) - P->populateUsedSet(); + P.populateUsedSet(); } /// \brief This performs the main chunk of the work of cloning the loops for @@ -461,10 +392,10 @@ public: // update PH to point to the newly added preheader. BasicBlock *TopPH = OrigPH; unsigned Index = getSize() - 1; - for (auto I = std::next(PartitionContainer.crbegin()), - E = PartitionContainer.crend(); + for (auto I = std::next(PartitionContainer.rbegin()), + E = PartitionContainer.rend(); I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) { - auto &Part = *I; + auto *Part = &*I; NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT); @@ -481,14 +412,14 @@ public: E = PartitionContainer.cend(); Next != E; ++Curr, ++Next) DT->changeImmediateDominator( - (*Next)->getDistributedLoop()->getLoopPreheader(), - (*Curr)->getDistributedLoop()->getExitingBlock()); + Next->getDistributedLoop()->getLoopPreheader(), + Curr->getDistributedLoop()->getExitingBlock()); } /// \brief Removes the dead instructions from the cloned loops. void removeUnusedInsts() { - for (auto &PartitionPtr : PartitionContainer) - PartitionPtr->removeUnusedInsts(); + for (auto &Partition : PartitionContainer) + Partition.removeUnusedInsts(); } /// \brief For each memory pointer, it computes the partitionId the pointer is @@ -499,15 +430,14 @@ public: /// partitions its entry is set to -1. SmallVector computePartitionSetForPointers(const LoopAccessInfo &LAI) { - const LoopAccessInfo::RuntimePointerCheck *RtPtrCheck = - LAI.getRuntimePointerCheck(); + const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking(); unsigned N = RtPtrCheck->Pointers.size(); SmallVector PtrToPartitions(N); for (unsigned I = 0; I < N; ++I) { - Value *Ptr = RtPtrCheck->Pointers[I]; + Value *Ptr = RtPtrCheck->Pointers[I].PointerValue; auto Instructions = - LAI.getInstructionsForAccess(Ptr, RtPtrCheck->IsWritePtr[I]); + LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr); int &Partition = PtrToPartitions[I]; // First set it to uninitialized. @@ -532,9 +462,9 @@ public: void print(raw_ostream &OS) const { unsigned Index = 0; - for (auto &P : PartitionContainer) { - OS << "Partition " << Index++ << " (" << P.get() << "):\n"; - P->print(); + for (const auto &P : PartitionContainer) { + OS << "Partition " << Index++ << " (" << &P << "):\n"; + P.print(); } } @@ -550,14 +480,14 @@ public: void printBlocks() const { unsigned Index = 0; - for (auto &P : PartitionContainer) { - dbgs() << "\nPartition " << Index++ << " (" << P.get() << "):\n"; - P->printBlocks(); + for (const auto &P : PartitionContainer) { + dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n"; + P.printBlocks(); } } private: - typedef std::list> PartitionContainerT; + typedef std::list PartitionContainerT; /// \brief List of partitions. PartitionContainerT PartitionContainer; @@ -576,12 +506,12 @@ private: void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) { InstPartition *PrevMatch = nullptr; for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) { - auto DoesMatch = Predicate(I->get()); + auto DoesMatch = Predicate(&*I); if (PrevMatch == nullptr && DoesMatch) { - PrevMatch = I->get(); + PrevMatch = &*I; ++I; } else if (PrevMatch != nullptr && DoesMatch) { - (*I)->moveTo(*PrevMatch); + I->moveTo(*PrevMatch); I = PartitionContainer.erase(I); } else { PrevMatch = nullptr; @@ -616,9 +546,7 @@ public: MemoryInstructionDependences( const SmallVectorImpl &Instructions, const SmallVectorImpl &InterestingDependences) { - std::transform(Instructions.begin(), Instructions.end(), - std::back_inserter(Accesses), - [](Instruction *Inst) { return Entry(Inst); }); + Accesses.append(Instructions.begin(), Instructions.end()); DEBUG(dbgs() << "Backward dependences:\n"); for (auto &Dep : InterestingDependences) @@ -637,126 +565,6 @@ private: AccessesType Accesses; }; -/// \brief Handles the loop versioning based on memchecks. -class RuntimeCheckEmitter { -public: - RuntimeCheckEmitter(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, - DominatorTree *DT) - : OrigLoop(L), NonDistributedLoop(nullptr), LAI(LAI), LI(LI), DT(DT) {} - - /// \brief Given the \p Partitions formed by Loop Distribution, it determines - /// in which partition each pointer is used. - void partitionPointers(InstPartitionContainer &Partitions) { - // Set up partition id in PtrRtChecks. Ptr -> Access -> Intruction -> - // Partition. - PtrToPartition = Partitions.computePartitionSetForPointers(LAI); - - DEBUG(dbgs() << "\nPointers:\n"); - DEBUG(LAI.getRuntimePointerCheck()->print(dbgs(), 0, &PtrToPartition)); - } - - /// \brief Returns true if we need memchecks to distribute the loop. - bool needsRuntimeChecks() const { - return LAI.getRuntimePointerCheck()->needsAnyChecking(&PtrToPartition); - } - - /// \brief Performs the CFG manipulation part of versioning the loop including - /// the DominatorTree and LoopInfo updates. - void versionLoop(Pass *P) { - Instruction *FirstCheckInst; - Instruction *MemRuntimeCheck; - // Add the memcheck in the original preheader (this is empty initially). - BasicBlock *MemCheckBB = OrigLoop->getLoopPreheader(); - std::tie(FirstCheckInst, MemRuntimeCheck) = - LAI.addRuntimeCheck(MemCheckBB->getTerminator(), &PtrToPartition); - assert(MemRuntimeCheck && "called even though needsAnyChecking = false"); - - // Rename the block to make the IR more readable. - MemCheckBB->setName(OrigLoop->getHeader()->getName() + ".ldist.memcheck"); - - // Create empty preheader for the loop (and after cloning for the - // original/nondist loop). - BasicBlock *PH = - SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI); - PH->setName(OrigLoop->getHeader()->getName() + ".ph"); - - // Clone the loop including the preheader. - // - // FIXME: This does not currently preserve SimplifyLoop because the exit - // block is a join between the two loops. - SmallVector NonDistributedLoopBlocks; - NonDistributedLoop = - cloneLoopWithPreheader(PH, MemCheckBB, OrigLoop, VMap, ".ldist.nondist", - LI, DT, NonDistributedLoopBlocks); - remapInstructionsInLoop(NonDistributedLoopBlocks, VMap); - - // Insert the conditional branch based on the result of the memchecks. - Instruction *OrigTerm = MemCheckBB->getTerminator(); - BranchInst::Create(NonDistributedLoop->getLoopPreheader(), - OrigLoop->getLoopPreheader(), MemRuntimeCheck, OrigTerm); - OrigTerm->eraseFromParent(); - - // The loops merge in the original exit block. This is now dominated by the - // memchecking block. - DT->changeImmediateDominator(OrigLoop->getExitBlock(), MemCheckBB); - } - - /// \brief Adds the necessary PHI nodes for the versioned loops based on the - /// loop-defined values used outside of the loop. - void addPHINodes(const SmallVectorImpl &DefsUsedOutside) { - BasicBlock *PHIBlock = OrigLoop->getExitBlock(); - assert(PHIBlock && "No single successor to loop exit block"); - - for (auto *Inst : DefsUsedOutside) { - auto *NonDistInst = cast(VMap[Inst]); - PHINode *PN; - BasicBlock::iterator I; - - // First see if we have a single-operand PHI with the value defined by the - // original loop. - for (I = PHIBlock->begin(); (PN = dyn_cast(I)); ++I) { - assert(PN->getNumOperands() == 1 && - "Exit block should only have on predecessor"); - if (PN->getIncomingValue(0) == Inst) - break; - } - // If not create it. - if (!PN) { - PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".ldist", - PHIBlock->begin()); - for (auto *User : Inst->users()) - if (!OrigLoop->contains(cast(User)->getParent())) - User->replaceUsesOfWith(Inst, PN); - PN->addIncoming(Inst, OrigLoop->getExitingBlock()); - } - // Add the new incoming value from the non-distributed loop. - PN->addIncoming(NonDistInst, NonDistributedLoop->getExitingBlock()); - } - } - -private: - /// \brief The original loop. This becomes the "versioned" one, i.e. control - /// goes if the memchecks all pass. - Loop *OrigLoop; - /// \brief The fall-back loop, i.e. if any of the memchecks fail. - Loop *NonDistributedLoop; - - /// \brief For each memory pointer it contains the partitionId it is used in. - /// - /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck(). - /// If the pointer is used in multiple partitions the entry is set to -1. - SmallVector PtrToPartition; - - /// \brief This maps the instructions from OrigLoop to their counterpart in - /// NonDistributedLoop. - ValueToValueMapTy VMap; - - /// \brief Analyses used. - const LoopAccessInfo &LAI; - LoopInfo *LI; - DominatorTree *DT; -}; - /// \brief Returns the instructions that use values defined in the loop. static SmallVector findDefsUsedOutsideOfLoop(Loop *L) { SmallVector UsedOutside; @@ -819,6 +627,45 @@ public: static char ID; private: + /// \brief Filter out checks between pointers from the same partition. + /// + /// \p PtrToPartition contains the partition number for pointers. Partition + /// number -1 means that the pointer is used in multiple partitions. In this + /// case we can't safely omit the check. + SmallVector + includeOnlyCrossPartitionChecks( + const SmallVectorImpl &AllChecks, + const SmallVectorImpl &PtrToPartition, + const RuntimePointerChecking *RtPtrChecking) { + SmallVector Checks; + + std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks), + [&](const RuntimePointerChecking::PointerCheck &Check) { + for (unsigned PtrIdx1 : Check.first->Members) + for (unsigned PtrIdx2 : Check.second->Members) + // Only include this check if there is a pair of pointers + // that require checking and the pointers fall into + // separate partitions. + // + // (Note that we already know at this point that the two + // pointer groups need checking but it doesn't follow + // that each pair of pointers within the two groups need + // checking as well. + // + // In other words we don't want to include a check just + // because there is a pair of pointers between the two + // pointer groups that require checks and a different + // pair whose pointers fall into different partitions.) + if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) && + !RuntimePointerChecking::arePointersInSamePartition( + PtrToPartition, PtrIdx1, PtrIdx2)) + return true; + return false; + }); + + return Checks; + } + /// \brief Try to distribute an inner-most loop. bool processLoop(Loop *L) { assert(L->empty() && "Only process inner loops."); @@ -938,11 +785,17 @@ private: // If we need run-time checks to disambiguate pointers are run-time, version // the loop now. - RuntimeCheckEmitter RtCheckEmitter(LAI, L, LI, DT); - RtCheckEmitter.partitionPointers(Partitions); - if (RtCheckEmitter.needsRuntimeChecks()) { - RtCheckEmitter.versionLoop(this); - RtCheckEmitter.addPHINodes(DefsUsedOutside); + auto PtrToPartition = Partitions.computePartitionSetForPointers(LAI); + const auto *RtPtrChecking = LAI.getRuntimePointerChecking(); + const auto &AllChecks = RtPtrChecking->getChecks(); + auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition, + RtPtrChecking); + if (!Checks.empty()) { + DEBUG(dbgs() << "\nPointers:\n"); + DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks)); + LoopVersioning LVer(std::move(Checks), LAI, L, LI, DT); + LVer.versionLoop(); + LVer.addPHINodes(DefsUsedOutside); } // Create identical copies of the original loop for each partition and hook