#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
#include <list>
#define LDIST_NAME "loop-distribute"
STATISTIC(NumLoopsDistributed, "Number of loops distributed");
-/// \brief Remaps instructions in a loop including the preheader.
-static void remapInstructionsInLoop(const SmallVectorImpl<BasicBlock *> &Blocks,
- ValueToValueMapTy &VMap) {
- // Rewrite the code to refer to itself.
- for (auto *BB : Blocks)
- for (auto &Inst : *BB)
- RemapInstruction(&Inst, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
-}
-
-/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
-/// Blocks.
-///
-/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
-/// \p LoopDomBB. Insert the new blocks before block specified in \p Before.
-static Loop *cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
- Loop *OrigLoop, ValueToValueMapTy &VMap,
- const Twine &NameSuffix, LoopInfo *LI,
- DominatorTree *DT,
- SmallVectorImpl<BasicBlock *> &Blocks) {
- Function *F = OrigLoop->getHeader()->getParent();
- Loop *ParentLoop = OrigLoop->getParentLoop();
-
- Loop *NewLoop = new Loop();
- if (ParentLoop)
- ParentLoop->addChildLoop(NewLoop);
- else
- LI->addTopLevelLoop(NewLoop);
-
- BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
- BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
- // To rename the loop PHIs.
- VMap[OrigPH] = NewPH;
- Blocks.push_back(NewPH);
-
- // Update LoopInfo.
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewPH, *LI);
-
- // Update DominatorTree.
- DT->addNewBlock(NewPH, LoopDomBB);
-
- for (BasicBlock *BB : OrigLoop->getBlocks()) {
- BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
- VMap[BB] = NewBB;
-
- // Update LoopInfo.
- NewLoop->addBasicBlockToLoop(NewBB, *LI);
-
- // Update DominatorTree.
- BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
- DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
-
- Blocks.push_back(NewBB);
- }
-
- // Move them physically from the end of the block list.
- F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH);
- F->getBasicBlockList().splice(Before, F->getBasicBlockList(),
- NewLoop->getHeader(), F->end());
-
- return NewLoop;
-}
-
namespace {
/// \brief Maintains the set of instructions of the loop for a partition before
/// cloning. After cloning, it hosts the new loop.
ValueToValueMapTy &getVMap() { return VMap; }
/// \brief Remaps the cloned instructions using VMap.
- void remapInstructions() { remapInstructionsInLoop(ClonedLoopBlocks, VMap); }
+ void remapInstructions() {
+ remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
+ }
/// \brief Based on the set of instructions selected for this partition,
/// removes the unnecessary ones.
// Delete the instructions backwards, as it has a reduced likelihood of
// having to update as many def-use and use-def chains.
- for (auto I = Unused.rbegin(), E = Unused.rend(); I != E; ++I) {
- auto *Inst = *I;
-
+ for (auto *Inst : make_range(Unused.rbegin(), Unused.rend())) {
if (!Inst->use_empty())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
Inst->eraseFromParent();
}
}
- void print() {
+ void print() const {
if (DepCycle)
dbgs() << " (cycle)\n";
for (auto *I : Set)
/// contain cycles. Otherwise start a new partition for it.
void addToCyclicPartition(Instruction *Inst) {
// If the current partition is non-cyclic. Start a new one.
- if (PartitionContainer.empty() || !PartitionContainer.back()->hasDepCycle())
- PartitionContainer.push_back(
- llvm::make_unique<InstPartition>(Inst, L, true));
+ if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
+ PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
else
- PartitionContainer.back()->add(Inst);
+ PartitionContainer.back().add(Inst);
}
/// \brief Adds \p Inst into a partition that is not marked to contain
// Initially we isolate memory instructions into as many partitions as
// possible, then later we may merge them back together.
void addToNewNonCyclicPartition(Instruction *Inst) {
- PartitionContainer.push_back(llvm::make_unique<InstPartition>(Inst, L));
+ PartitionContainer.emplace_back(Inst, L);
}
/// \brief Merges adjacent non-cyclic partitions.
for (PartitionContainerT::iterator I = PartitionContainer.begin(),
E = PartitionContainer.end();
I != E; ++I) {
- auto *PartI = I->get();
+ auto *PartI = &*I;
// If a load occurs in two partitions PartI and PartJ, merge all
// partitions (PartI, PartJ] into PartI.
auto PartJ = I;
do {
--PartJ;
- ToBeMerged.unionSets(PartI, PartJ->get());
- } while (PartJ->get() != LoadToPart->second);
+ ToBeMerged.unionSets(PartI, &*PartJ);
+ } while (&*PartJ != LoadToPart->second);
}
}
}
}
// Remove the empty partitions.
- for (PartitionContainerT::iterator PartI = PartitionContainer.begin(),
- E = PartitionContainer.end();
- PartI != E;)
- if ((*PartI)->empty())
- PartI = PartitionContainer.erase(PartI);
- else
- ++PartI;
+ PartitionContainer.remove_if(
+ [](const InstPartition &P) { return P.empty(); });
return true;
}
/// instruction is duplicated across multiple partitions, set the entry to -1.
void setupPartitionIdOnInstructions() {
int PartitionID = 0;
- for (auto &PartitionPtr : PartitionContainer) {
- for (Instruction *Inst : *PartitionPtr) {
+ for (const auto &Partition : PartitionContainer) {
+ for (Instruction *Inst : Partition) {
bool NewElt;
InstToPartitionIdT::iterator Iter;
/// instructions require.
void populateUsedSet() {
for (auto &P : PartitionContainer)
- P->populateUsedSet();
+ P.populateUsedSet();
}
/// \brief This performs the main chunk of the work of cloning the loops for
// update PH to point to the newly added preheader.
BasicBlock *TopPH = OrigPH;
unsigned Index = getSize() - 1;
- for (auto I = std::next(PartitionContainer.crbegin()),
- E = PartitionContainer.crend();
+ for (auto I = std::next(PartitionContainer.rbegin()),
+ E = PartitionContainer.rend();
I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
- auto &Part = *I;
+ auto *Part = &*I;
NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
E = PartitionContainer.cend();
Next != E; ++Curr, ++Next)
DT->changeImmediateDominator(
- (*Next)->getDistributedLoop()->getLoopPreheader(),
- (*Curr)->getDistributedLoop()->getExitingBlock());
+ Next->getDistributedLoop()->getLoopPreheader(),
+ Curr->getDistributedLoop()->getExitingBlock());
}
/// \brief Removes the dead instructions from the cloned loops.
void removeUnusedInsts() {
- for (auto &PartitionPtr : PartitionContainer)
- PartitionPtr->removeUnusedInsts();
+ for (auto &Partition : PartitionContainer)
+ Partition.removeUnusedInsts();
}
/// \brief For each memory pointer, it computes the partitionId the pointer is
/// partitions its entry is set to -1.
SmallVector<int, 8>
computePartitionSetForPointers(const LoopAccessInfo &LAI) {
- const LoopAccessInfo::RuntimePointerCheck *RtPtrCheck =
- LAI.getRuntimePointerCheck();
+ const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
unsigned N = RtPtrCheck->Pointers.size();
SmallVector<int, 8> PtrToPartitions(N);
for (unsigned I = 0; I < N; ++I) {
- Value *Ptr = RtPtrCheck->Pointers[I];
+ Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
auto Instructions =
- LAI.getInstructionsForAccess(Ptr, RtPtrCheck->IsWritePtr[I]);
+ LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
int &Partition = PtrToPartitions[I];
// First set it to uninitialized.
void print(raw_ostream &OS) const {
unsigned Index = 0;
- for (auto &P : PartitionContainer) {
- OS << "Partition " << Index++ << " (" << P.get() << "):\n";
- P->print();
+ for (const auto &P : PartitionContainer) {
+ OS << "Partition " << Index++ << " (" << &P << "):\n";
+ P.print();
}
}
void printBlocks() const {
unsigned Index = 0;
- for (auto &P : PartitionContainer) {
- dbgs() << "\nPartition " << Index++ << " (" << P.get() << "):\n";
- P->printBlocks();
+ for (const auto &P : PartitionContainer) {
+ dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
+ P.printBlocks();
}
}
private:
- typedef std::list<std::unique_ptr<InstPartition>> PartitionContainerT;
+ typedef std::list<InstPartition> PartitionContainerT;
/// \brief List of partitions.
PartitionContainerT PartitionContainer;
void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
InstPartition *PrevMatch = nullptr;
for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
- auto DoesMatch = Predicate(I->get());
+ auto DoesMatch = Predicate(&*I);
if (PrevMatch == nullptr && DoesMatch) {
- PrevMatch = I->get();
+ PrevMatch = &*I;
++I;
} else if (PrevMatch != nullptr && DoesMatch) {
- (*I)->moveTo(*PrevMatch);
+ I->moveTo(*PrevMatch);
I = PartitionContainer.erase(I);
} else {
PrevMatch = nullptr;
MemoryInstructionDependences(
const SmallVectorImpl<Instruction *> &Instructions,
const SmallVectorImpl<Dependence> &InterestingDependences) {
- std::transform(Instructions.begin(), Instructions.end(),
- std::back_inserter(Accesses),
- [](Instruction *Inst) { return Entry(Inst); });
+ Accesses.append(Instructions.begin(), Instructions.end());
DEBUG(dbgs() << "Backward dependences:\n");
for (auto &Dep : InterestingDependences)
AccessesType Accesses;
};
-/// \brief Handles the loop versioning based on memchecks.
-class RuntimeCheckEmitter {
-public:
- RuntimeCheckEmitter(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
- DominatorTree *DT)
- : OrigLoop(L), NonDistributedLoop(nullptr), LAI(LAI), LI(LI), DT(DT) {}
-
- /// \brief Given the \p Partitions formed by Loop Distribution, it determines
- /// in which partition each pointer is used.
- void partitionPointers(InstPartitionContainer &Partitions) {
- // Set up partition id in PtrRtChecks. Ptr -> Access -> Intruction ->
- // Partition.
- PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
-
- DEBUG(dbgs() << "\nPointers:\n");
- DEBUG(LAI.getRuntimePointerCheck()->print(dbgs(), 0, &PtrToPartition));
- }
-
- /// \brief Returns true if we need memchecks to distribute the loop.
- bool needsRuntimeChecks() const {
- return LAI.getRuntimePointerCheck()->needsAnyChecking(&PtrToPartition);
- }
-
- /// \brief Performs the CFG manipulation part of versioning the loop including
- /// the DominatorTree and LoopInfo updates.
- void versionLoop(Pass *P) {
- Instruction *FirstCheckInst;
- Instruction *MemRuntimeCheck;
- // Add the memcheck in the original preheader (this is empty initially).
- BasicBlock *MemCheckBB = OrigLoop->getLoopPreheader();
- std::tie(FirstCheckInst, MemRuntimeCheck) =
- LAI.addRuntimeCheck(MemCheckBB->getTerminator(), &PtrToPartition);
- assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
-
- // Rename the block to make the IR more readable.
- MemCheckBB->setName(OrigLoop->getHeader()->getName() + ".ldist.memcheck");
-
- // Create empty preheader for the loop (and after cloning for the
- // original/nondist loop).
- BasicBlock *PH =
- SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
- PH->setName(OrigLoop->getHeader()->getName() + ".ph");
-
- // Clone the loop including the preheader.
- //
- // FIXME: This does not currently preserve SimplifyLoop because the exit
- // block is a join between the two loops.
- SmallVector<BasicBlock *, 8> NonDistributedLoopBlocks;
- NonDistributedLoop =
- cloneLoopWithPreheader(PH, MemCheckBB, OrigLoop, VMap, ".ldist.nondist",
- LI, DT, NonDistributedLoopBlocks);
- remapInstructionsInLoop(NonDistributedLoopBlocks, VMap);
-
- // Insert the conditional branch based on the result of the memchecks.
- Instruction *OrigTerm = MemCheckBB->getTerminator();
- BranchInst::Create(NonDistributedLoop->getLoopPreheader(),
- OrigLoop->getLoopPreheader(), MemRuntimeCheck, OrigTerm);
- OrigTerm->eraseFromParent();
-
- // The loops merge in the original exit block. This is now dominated by the
- // memchecking block.
- DT->changeImmediateDominator(OrigLoop->getExitBlock(), MemCheckBB);
- }
-
- /// \brief Adds the necessary PHI nodes for the versioned loops based on the
- /// loop-defined values used outside of the loop.
- void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
- BasicBlock *PHIBlock = OrigLoop->getExitBlock();
- assert(PHIBlock && "No single successor to loop exit block");
-
- for (auto *Inst : DefsUsedOutside) {
- auto *NonDistInst = cast<Instruction>(VMap[Inst]);
- PHINode *PN;
- BasicBlock::iterator I;
-
- // First see if we have a single-operand PHI with the value defined by the
- // original loop.
- for (I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
- assert(PN->getNumOperands() == 1 &&
- "Exit block should only have on predecessor");
- if (PN->getIncomingValue(0) == Inst)
- break;
- }
- // If not create it.
- if (!PN) {
- PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".ldist",
- PHIBlock->begin());
- for (auto *User : Inst->users())
- if (!OrigLoop->contains(cast<Instruction>(User)->getParent()))
- User->replaceUsesOfWith(Inst, PN);
- PN->addIncoming(Inst, OrigLoop->getExitingBlock());
- }
- // Add the new incoming value from the non-distributed loop.
- PN->addIncoming(NonDistInst, NonDistributedLoop->getExitingBlock());
- }
- }
-
-private:
- /// \brief The original loop. This becomes the "versioned" one, i.e. control
- /// goes if the memchecks all pass.
- Loop *OrigLoop;
- /// \brief The fall-back loop, i.e. if any of the memchecks fail.
- Loop *NonDistributedLoop;
-
- /// \brief For each memory pointer it contains the partitionId it is used in.
- ///
- /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck().
- /// If the pointer is used in multiple partitions the entry is set to -1.
- SmallVector<int, 8> PtrToPartition;
-
- /// \brief This maps the instructions from OrigLoop to their counterpart in
- /// NonDistributedLoop.
- ValueToValueMapTy VMap;
-
- /// \brief Analyses used.
- const LoopAccessInfo &LAI;
- LoopInfo *LI;
- DominatorTree *DT;
-};
-
/// \brief Returns the instructions that use values defined in the loop.
static SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L) {
SmallVector<Instruction *, 8> UsedOutside;
static char ID;
private:
+ /// \brief Filter out checks between pointers from the same partition.
+ ///
+ /// \p PtrToPartition contains the partition number for pointers. Partition
+ /// number -1 means that the pointer is used in multiple partitions. In this
+ /// case we can't safely omit the check.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4>
+ includeOnlyCrossPartitionChecks(
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &AllChecks,
+ const SmallVectorImpl<int> &PtrToPartition,
+ const RuntimePointerChecking *RtPtrChecking) {
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
+
+ std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (unsigned PtrIdx1 : Check.first->Members)
+ for (unsigned PtrIdx2 : Check.second->Members)
+ // Only include this check if there is a pair of pointers
+ // that require checking and the pointers fall into
+ // separate partitions.
+ //
+ // (Note that we already know at this point that the two
+ // pointer groups need checking but it doesn't follow
+ // that each pair of pointers within the two groups need
+ // checking as well.
+ //
+ // In other words we don't want to include a check just
+ // because there is a pair of pointers between the two
+ // pointer groups that require checks and a different
+ // pair whose pointers fall into different partitions.)
+ if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
+ !RuntimePointerChecking::arePointersInSamePartition(
+ PtrToPartition, PtrIdx1, PtrIdx2))
+ return true;
+ return false;
+ });
+
+ return Checks;
+ }
+
/// \brief Try to distribute an inner-most loop.
bool processLoop(Loop *L) {
assert(L->empty() && "Only process inner loops.");
// If we need run-time checks to disambiguate pointers are run-time, version
// the loop now.
- RuntimeCheckEmitter RtCheckEmitter(LAI, L, LI, DT);
- RtCheckEmitter.partitionPointers(Partitions);
- if (RtCheckEmitter.needsRuntimeChecks()) {
- RtCheckEmitter.versionLoop(this);
- RtCheckEmitter.addPHINodes(DefsUsedOutside);
+ auto PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
+ const auto *RtPtrChecking = LAI.getRuntimePointerChecking();
+ const auto &AllChecks = RtPtrChecking->getChecks();
+ auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
+ RtPtrChecking);
+ if (!Checks.empty()) {
+ DEBUG(dbgs() << "\nPointers:\n");
+ DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
+ LoopVersioning LVer(std::move(Checks), LAI, L, LI, DT);
+ LVer.versionLoop();
+ LVer.addPHINodes(DefsUsedOutside);
}
// Create identical copies of the original loop for each partition and hook