X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FSROA.cpp;h=1ab7715168213f245a5e5bf77c9172f736914deb;hp=2c1aef68fbb0a6952063b9d3c6f7c566335ad3bf;hb=56e1394c8861ecdc551815ae875d2c3db2fa9cdb;hpb=47042bcc266285676f8ff284e5d46a2c196c367b diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 2c1aef68fbb..1ab77151682 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -29,7 +29,6 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Analysis/ValueTracking.h" @@ -38,6 +37,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -244,8 +244,8 @@ public: void printUse(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const; void print(raw_ostream &OS) const; - void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump(const_iterator I) const; - void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump() const; + void dump(const_iterator I) const; + void dump() const; #endif private: @@ -480,7 +480,7 @@ private: if (!II.isVolatile()) return markAsDead(II); - return insertUse(II, Offset, Size, /*IsSplittable=*/false);; + return insertUse(II, Offset, Size, /*IsSplittable=*/false); } // If we have seen both source and destination for a mem transfer, then @@ -653,12 +653,6 @@ private: } }; -namespace { -struct IsSliceDead { - bool operator()(const Slice &S) { return S.isDead(); } -}; -} - AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -676,12 +670,13 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) return; } + Slices.erase(std::remove_if(Slices.begin(), Slices.end(), + std::mem_fun_ref(&Slice::isDead)), + Slices.end()); + // Sort the uses. This arranges for the offsets to be in ascending order, // and the sizes to be in descending order. std::sort(Slices.begin(), Slices.end()); - - Slices.erase(std::remove_if(Slices.begin(), Slices.end(), IsSliceDead()), - Slices.end()); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -717,8 +712,10 @@ void AllocaSlices::print(raw_ostream &OS) const { print(OS, I); } -void AllocaSlices::dump(const_iterator I) const { print(dbgs(), I); } -void AllocaSlices::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const { + print(dbgs(), I); +} +LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); } #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -738,12 +735,13 @@ class AllocaPromoter : public LoadAndStorePromoter { SmallVector DVIs; public: - AllocaPromoter(const SmallVectorImpl &Insts, SSAUpdater &S, + AllocaPromoter(const SmallVectorImpl &Insts, SSAUpdater &S, AllocaInst &AI, DIBuilder &DIB) - : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {} + : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {} void run(const SmallVectorImpl &Insts) { - // Remember which alloca we're promoting (for isInstInList). + // Retain the debug information attached to the alloca for use when + // rewriting loads and stores. if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) { for (Value::use_iterator UI = DebugNode->use_begin(), UE = DebugNode->use_end(); @@ -755,7 +753,9 @@ public: } LoadAndStorePromoter::run(Insts); - AI.eraseFromParent(); + + // While we have the debug information, clear it off of the alloca. The + // caller takes care of deleting the alloca. while (!DDIs.empty()) DDIs.pop_back_val()->eraseFromParent(); while (!DVIs.empty()) @@ -764,9 +764,30 @@ public: virtual bool isInstInList(Instruction *I, const SmallVectorImpl &Insts) const { + Value *Ptr; if (LoadInst *LI = dyn_cast(I)) - return LI->getOperand(0) == &AI; - return cast(I)->getPointerOperand() == &AI; + Ptr = LI->getOperand(0); + else + Ptr = cast(I)->getPointerOperand(); + + // Only used to detect cycles, which will be rare and quickly found as + // we're walking up a chain of defs rather than down through uses. + SmallPtrSet Visited; + + do { + if (Ptr == &AI) + return true; + + if (BitCastInst *BCI = dyn_cast(Ptr)) + Ptr = BCI->getOperand(0); + else if (GetElementPtrInst *GEPI = dyn_cast(Ptr)) + Ptr = GEPI->getPointerOperand(); + else + return false; + + } while (Visited.insert(Ptr)); + + return false; } virtual void updateDebugInfo(Instruction *Inst) const { @@ -919,6 +940,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset) { Type *Ty = 0; + bool IgnoreNonIntegralTypes = false; for (AllocaSlices::const_iterator I = B; I != E; ++I) { Use *U = I->getUse(); if (isa(*U->getUser())) @@ -927,29 +949,37 @@ static Type *findCommonType(AllocaSlices::const_iterator B, continue; Type *UserTy = 0; - if (LoadInst *LI = dyn_cast(U->getUser())) + if (LoadInst *LI = dyn_cast(U->getUser())) { UserTy = LI->getType(); - else if (StoreInst *SI = dyn_cast(U->getUser())) + } else if (StoreInst *SI = dyn_cast(U->getUser())) { UserTy = SI->getValueOperand()->getType(); - else - return 0; // Bail if we have weird uses. + } else { + IgnoreNonIntegralTypes = true; // Give up on anything but an iN type. + continue; + } if (IntegerType *ITy = dyn_cast(UserTy)) { // If the type is larger than the partition, skip it. We only encounter // this for split integer operations where we want to use the type of the - // entity causing the split. - if (ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset())) + // entity causing the split. Also skip if the type is not a byte width + // multiple. + if (ITy->getBitWidth() % 8 != 0 || + ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset())) continue; // If we have found an integer type use covering the alloca, use that - // regardless of the other types, as integers are often used for a - // "bucket - // of bits" type. + // regardless of the other types, as integers are often used for + // a "bucket of bits" type. + // + // NB: This *must* be the only return from inside the loop so that the + // order of slices doesn't impact the computed type. return ITy; + } else if (IgnoreNonIntegralTypes) { + continue; } if (Ty && Ty != UserTy) - return 0; + IgnoreNonIntegralTypes = true; // Give up on anything but an iN type. Ty = UserTy; } @@ -1433,6 +1463,10 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) return false; + // We can convert pointers to integers and vice-versa. Same for vectors + // of pointers and integers. + OldTy = OldTy->getScalarType(); + NewTy = NewTy->getScalarType(); if (NewTy->isPointerTy() || OldTy->isPointerTy()) { if (NewTy->isPointerTy() && OldTy->isPointerTy()) return true; @@ -1451,21 +1485,53 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test /// two types for viability with this routine. static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, - Type *Ty) { - assert(canConvertValue(DL, V->getType(), Ty) && - "Value not convertable to type"); - if (V->getType() == Ty) + Type *NewTy) { + Type *OldTy = V->getType(); + assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type"); + + if (OldTy == NewTy) return V; - if (IntegerType *OldITy = dyn_cast(V->getType())) - if (IntegerType *NewITy = dyn_cast(Ty)) + + if (IntegerType *OldITy = dyn_cast(OldTy)) + if (IntegerType *NewITy = dyn_cast(NewTy)) if (NewITy->getBitWidth() > OldITy->getBitWidth()) return IRB.CreateZExt(V, NewITy); - if (V->getType()->isIntegerTy() && Ty->isPointerTy()) - return IRB.CreateIntToPtr(V, Ty); - if (V->getType()->isPointerTy() && Ty->isIntegerTy()) - return IRB.CreatePtrToInt(V, Ty); - return IRB.CreateBitCast(V, Ty); + // See if we need inttoptr for this type pair. A cast involving both scalars + // and vectors requires and additional bitcast. + if (OldTy->getScalarType()->isIntegerTy() && + NewTy->getScalarType()->isPointerTy()) { + // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8* + if (OldTy->isVectorTy() && !NewTy->isVectorTy()) + return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), + NewTy); + + // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*> + if (!OldTy->isVectorTy() && NewTy->isVectorTy()) + return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), + NewTy); + + return IRB.CreateIntToPtr(V, NewTy); + } + + // See if we need ptrtoint for this type pair. A cast involving both scalars + // and vectors requires and additional bitcast. + if (OldTy->getScalarType()->isPointerTy() && + NewTy->getScalarType()->isIntegerTy()) { + // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128 + if (OldTy->isVectorTy() && !NewTy->isVectorTy()) + return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), + NewTy); + + // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32> + if (!OldTy->isVectorTy() && NewTy->isVectorTy()) + return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), + NewTy); + + return IRB.CreatePtrToInt(V, NewTy); + } + + return IRB.CreateBitCast(V, NewTy); } /// \brief Test whether the given slice use can be promoted to a vector. @@ -1874,6 +1940,10 @@ class AllocaSliceRewriter : public InstVisitor { Use *OldUse; Instruction *OldPtr; + // Output members carrying state about the result of visiting and rewriting + // the slice of the alloca. + bool IsUsedByRewrittenSpeculatableInstructions; + // Utility IR builder, whose name prefix is setup for each visited use, and // the insertion point is set to point to the user. IRBuilderTy IRB; @@ -1896,7 +1966,8 @@ public: DL.getTypeSizeInBits(NewAI.getAllocatedType())) : 0), BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(), - OldPtr(), IRB(NewAI.getContext(), ConstantFolder()) { + OldPtr(), IsUsedByRewrittenSpeculatableInstructions(false), + IRB(NewAI.getContext(), ConstantFolder()) { if (VecTy) { assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"); @@ -1928,6 +1999,20 @@ public: return CanSROA; } + /// \brief Query whether this slice is used by speculatable instructions after + /// rewriting. + /// + /// These instructions (PHIs and Selects currently) require the alloca slice + /// to run back through the rewriter. Thus, they are promotable, but not on + /// this iteration. This is distinct from a slice which is unpromotable for + /// some other reason, in which case we don't even want to perform the + /// speculation. This can be querried at any time and reflects whether (at + /// that point) a visit call has rewritten a speculatable instruction on the + /// current slice. + bool isUsedByRewrittenSpeculatableInstructions() const { + return IsUsedByRewrittenSpeculatableInstructions; + } + private: // Make sure the other visit overloads are visible. using Base::visit; @@ -2545,7 +2630,7 @@ private: // as local as possible to the PHI. To do that, we re-use the location of // the old pointer, which necessarily must be in the right position to // dominate the PHI. - IRBuilderTy PtrBuilder(cast(OldPtr)); + IRBuilderTy PtrBuilder(OldPtr); PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + "."); @@ -2558,10 +2643,11 @@ private: deleteIfTriviallyDead(OldPtr); // Check whether we can speculate this PHI node, and if so remember that - // fact and return that this alloca remains viable for promotion to an SSA - // value. + // fact and queue it up for another iteration after the speculation + // occurs. if (isSafePHIToSpeculate(PN, &DL)) { Pass.SpeculatablePHIs.insert(&PN); + IsUsedByRewrittenSpeculatableInstructions = true; return true; } @@ -2586,10 +2672,11 @@ private: deleteIfTriviallyDead(OldPtr); // Check whether we can speculate this select instruction, and if so - // remember that fact and return that this alloca remains viable for - // promotion to an SSA value. + // remember that fact and queue it up for another iteration after the + // speculation occurs. if (isSafeSelectToSpeculate(SI, &DL)) { Pass.SpeculatableSelects.insert(&SI); + IsUsedByRewrittenSpeculatableInstructions = true; return true; } @@ -3035,10 +3122,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S, unsigned PPWOldSize = PostPromotionWorklist.size(); unsigned SPOldSize = SpeculatablePHIs.size(); unsigned SSOldSize = SpeculatableSelects.size(); - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) unsigned NumUses = 0; -#endif AllocaSliceRewriter Rewriter(*DL, S, *this, AI, *NewAI, BeginOffset, EndOffset, IsVectorPromotable, @@ -3050,38 +3134,34 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S, DEBUG(dbgs() << " rewriting split "); DEBUG(S.printSlice(dbgs(), *SUI, "")); Promotable &= Rewriter.visit(*SUI); -#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) ++NumUses; -#endif } for (AllocaSlices::iterator I = B; I != E; ++I) { DEBUG(dbgs() << " rewriting "); DEBUG(S.printSlice(dbgs(), I, "")); Promotable &= Rewriter.visit(I); -#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) ++NumUses; -#endif } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) NumAllocaPartitionUses += NumUses; MaxUsesPerAllocaPartition = std::max(NumUses, MaxUsesPerAllocaPartition); -#endif - if (Promotable && (SpeculatablePHIs.size() > SPOldSize || - SpeculatableSelects.size() > SSOldSize)) { - // If we have a promotable alloca except for some unspeculated loads below - // PHIs or Selects, iterate once. We will speculate the loads and on the - // next iteration rewrite them into a promotable form. - Worklist.insert(NewAI); - } else if (Promotable) { + if (Promotable && !Rewriter.isUsedByRewrittenSpeculatableInstructions()) { DEBUG(dbgs() << " and queuing for promotion\n"); PromotableAllocas.push_back(NewAI); - } else if (NewAI != &AI) { + } else if (NewAI != &AI || + (Promotable && + Rewriter.isUsedByRewrittenSpeculatableInstructions())) { // If we can't promote the alloca, iterate on it to check for new // refinements exposed by splitting the current alloca. Don't iterate on an // alloca which didn't actually change and didn't get promoted. + // + // Alternatively, if we could promote the alloca but have speculatable + // instructions then we will speculate them after finishing our processing + // of the original alloca. Mark the new one for re-visiting in the next + // iteration so the speculated operations can be rewritten. + // // FIXME: We should actually track whether the rewriter changed anything. Worklist.insert(NewAI); } @@ -3142,10 +3222,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) { if (S.begin() == S.end()) return false; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) unsigned NumPartitions = 0; -#endif - bool Changed = false; SmallVector SplitUses; uint64_t MaxSplitUseEndOffset = 0; @@ -3192,9 +3269,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) { // Rewrite a sequence of overlapping slices. Changed |= rewritePartition(AI, S, SI, SJ, BeginOffset, MaxEndOffset, SplitUses); -#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) ++NumPartitions; -#endif removeFinishedSplitUses(SplitUses, MaxSplitUseEndOffset, MaxEndOffset); } @@ -3234,9 +3309,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) { Changed |= rewritePartition(AI, S, SJ, SJ, MaxEndOffset, PostSplitEndOffset, SplitUses); -#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) ++NumPartitions; -#endif if (SJ == SE) break; // Skip the rest, we don't need to do any cleanup. @@ -3248,11 +3321,9 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) { BeginOffset = SJ->beginOffset(); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) NumAllocaPartitions += NumPartitions; MaxPartitionsPerAlloca = std::max(NumPartitions, MaxPartitionsPerAlloca); -#endif return Changed; } @@ -3360,6 +3431,15 @@ void SROA::deleteDeadInstructions(SmallPtrSet &DeletedAllocas) { } } +static void enqueueUsersInWorklist(Instruction &I, + SmallVectorImpl &Worklist, + SmallPtrSet &Visited) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; + ++UI) + if (Visited.insert(cast(*UI))) + Worklist.push_back(cast(*UI)); +} + /// \brief Promote the allocas, using the best available technique. /// /// This attempts to promote whatever allocas have been identified as viable in @@ -3384,25 +3464,28 @@ bool SROA::promoteAllocas(Function &F) { DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n"); SSAUpdater SSA; DIBuilder DIB(*F.getParent()); - SmallVector Insts; + SmallVector Insts; + + // We need a worklist to walk the uses of each alloca. + SmallVector Worklist; + SmallPtrSet Visited; + SmallVector DeadInsts; for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) { AllocaInst *AI = PromotableAllocas[Idx]; - for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE;) { - Instruction *I = cast(*UI++); + Insts.clear(); + Worklist.clear(); + Visited.clear(); + + enqueueUsersInWorklist(*AI, Worklist, Visited); + + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + // FIXME: Currently the SSAUpdater infrastructure doesn't reason about // lifetime intrinsics and so we strip them (and the bitcasts+GEPs // leading to them) here. Eventually it should use them to optimize the // scalar values produced. - if (isa(I) || isa(I)) { - assert(onlyUsedByLifetimeMarkers(I) && - "Found a bitcast used outside of a lifetime marker."); - while (!I->use_empty()) - cast(*I->use_begin())->eraseFromParent(); - I->eraseFromParent(); - continue; - } if (IntrinsicInst *II = dyn_cast(I)) { assert(II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end); @@ -3410,10 +3493,30 @@ bool SROA::promoteAllocas(Function &F) { continue; } - Insts.push_back(I); + // Push the loads and stores we find onto the list. SROA will already + // have validated that all loads and stores are viable candidates for + // promotion. + if (LoadInst *LI = dyn_cast(I)) { + assert(LI->getType() == AI->getAllocatedType()); + Insts.push_back(LI); + continue; + } + if (StoreInst *SI = dyn_cast(I)) { + assert(SI->getValueOperand()->getType() == AI->getAllocatedType()); + Insts.push_back(SI); + continue; + } + + // For everything else, we know that only no-op bitcasts and GEPs will + // make it this far, just recurse through them and recall them for later + // removal. + DeadInsts.push_back(I); + enqueueUsersInWorklist(*I, Worklist, Visited); } AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts); - Insts.clear(); + while (!DeadInsts.empty()) + DeadInsts.pop_back_val()->eraseFromParent(); + AI->eraseFromParent(); } PromotableAllocas.clear();