X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FTransforms%2FScalar%2FSROA.cpp;h=d37d1e753eb86583e5b6d5c7786d9bd105898515;hb=16d857e06ed633a18d3cc6fe0caa7e9421a5dc9e;hp=e8310d7f53ee485f273a5b183434236a7c72aadd;hpb=15552c46a0f3307e5e363dcce33638aa489fe928;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index e8310d7f53e..d37d1e753eb 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -55,7 +55,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #if __cplusplus >= 201103L && !defined(NDEBUG) // We only use this for a debug check in C++11 @@ -77,11 +76,6 @@ STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); STATISTIC(NumDeleted, "Number of instructions deleted"); STATISTIC(NumVectorized, "Number of vectorized aggregates"); -/// Hidden option to force the pass to not use DomTree and mem2reg, instead -/// forming SSA values through the SSAUpdater infrastructure. -static cl::opt ForceSSAUpdater("force-ssa-updater", cl::init(false), - cl::Hidden); - /// Hidden option to enable randomly shuffling the slices to help uncover /// instability in their order. static cl::opt SROARandomShuffleSlices("sroa-random-shuffle-slices", @@ -270,7 +264,8 @@ public: friend class AllocaSlices; friend class AllocaSlices::partition_iterator; - /// \brief The begining and ending offsets of the alloca for this partition. + /// \brief The beginning and ending offsets of the alloca for this + /// partition. uint64_t BeginOffset, EndOffset; /// \brief The start end end iterators of this partition. @@ -439,7 +434,7 @@ public: // OK, we need to consume new slices. Set the end offset based on the // current slice, and step SJ past it. The beginning offset of the - // parttion is the beginning offset of the next slice unless we have + // partition is the beginning offset of the next slice unless we have // pre-existing split slices that are continuing, in which case we begin // at the prior end offset. P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset; @@ -493,7 +488,7 @@ public: "End iterators don't match between compared partition iterators!"); // The observed positions of partitions is marked by the P.SI iterator and - // the emptyness of the split slices. The latter is only relevant when + // the emptiness of the split slices. The latter is only relevant when // P.SI == SE, as the end iterator will additionally have an empty split // slices list, but the prior may have the same P.SI and a tail of split // slices. @@ -1072,109 +1067,6 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); } #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -namespace { -/// \brief Implementation of LoadAndStorePromoter for promoting allocas. -/// -/// This subclass of LoadAndStorePromoter adds overrides to handle promoting -/// the loads and stores of an alloca instruction, as well as updating its -/// debug information. This is used when a domtree is unavailable and thus -/// mem2reg in its full form can't be used to handle promotion of allocas to -/// scalar values. -class AllocaPromoter : public LoadAndStorePromoter { - AllocaInst &AI; - DIBuilder &DIB; - - SmallVector DDIs; - SmallVector DVIs; - -public: - AllocaPromoter(const SmallVectorImpl &Insts, SSAUpdater &S, - AllocaInst &AI, DIBuilder &DIB) - : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {} - - void run(const SmallVectorImpl &Insts) { - // Retain the debug information attached to the alloca for use when - // rewriting loads and stores. - if (auto *L = LocalAsMetadata::getIfExists(&AI)) { - if (auto *DebugNode = MetadataAsValue::getIfExists(AI.getContext(), L)) { - for (User *U : DebugNode->users()) - if (DbgDeclareInst *DDI = dyn_cast(U)) - DDIs.push_back(DDI); - else if (DbgValueInst *DVI = dyn_cast(U)) - DVIs.push_back(DVI); - } - } - - LoadAndStorePromoter::run(Insts); - - // While we have the debug information, clear it off of the alloca. The - // caller takes care of deleting the alloca. - while (!DDIs.empty()) - DDIs.pop_back_val()->eraseFromParent(); - while (!DVIs.empty()) - DVIs.pop_back_val()->eraseFromParent(); - } - - bool - isInstInList(Instruction *I, - const SmallVectorImpl &Insts) const override { - Value *Ptr; - if (LoadInst *LI = dyn_cast(I)) - Ptr = LI->getOperand(0); - else - Ptr = cast(I)->getPointerOperand(); - - // Only used to detect cycles, which will be rare and quickly found as - // we're walking up a chain of defs rather than down through uses. - SmallPtrSet Visited; - - do { - if (Ptr == &AI) - return true; - - if (BitCastInst *BCI = dyn_cast(Ptr)) - Ptr = BCI->getOperand(0); - else if (GetElementPtrInst *GEPI = dyn_cast(Ptr)) - Ptr = GEPI->getPointerOperand(); - else - return false; - - } while (Visited.insert(Ptr).second); - - return false; - } - - void updateDebugInfo(Instruction *Inst) const override { - for (DbgDeclareInst *DDI : DDIs) - if (StoreInst *SI = dyn_cast(Inst)) - ConvertDebugDeclareToDebugValue(DDI, SI, DIB); - else if (LoadInst *LI = dyn_cast(Inst)) - ConvertDebugDeclareToDebugValue(DDI, LI, DIB); - for (DbgValueInst *DVI : DVIs) { - Value *Arg = nullptr; - if (StoreInst *SI = dyn_cast(Inst)) { - // If an argument is zero extended then use argument directly. The ZExt - // may be zapped by an optimization pass in future. - if (ZExtInst *ZExt = dyn_cast(SI->getOperand(0))) - Arg = dyn_cast(ZExt->getOperand(0)); - else if (SExtInst *SExt = dyn_cast(SI->getOperand(0))) - Arg = dyn_cast(SExt->getOperand(0)); - if (!Arg) - Arg = SI->getValueOperand(); - } else if (LoadInst *LI = dyn_cast(Inst)) { - Arg = LI->getPointerOperand(); - } else { - continue; - } - Instruction *DbgVal = - DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), - DIExpression(DVI->getExpression()), Inst); - DbgVal->setDebugLoc(DVI->getDebugLoc()); - } - } -}; -} // end anon namespace - namespace { /// \brief An optimization pass providing Scalar Replacement of Aggregates. /// @@ -1195,8 +1087,6 @@ namespace { /// this form. By doing so, it will enable promotion of vector aggregates to /// SSA vector values. class SROA : public FunctionPass { - const bool RequiresDomTree; - LLVMContext *C; DominatorTree *DT; AssumptionCache *AC; @@ -1244,9 +1134,7 @@ class SROA : public FunctionPass { SetVector> SpeculatableSelects; public: - SROA(bool RequiresDomTree = true) - : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr), - DT(nullptr) { + SROA() : FunctionPass(ID), C(nullptr), DT(nullptr) { initializeSROAPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -1272,8 +1160,8 @@ private: char SROA::ID = 0; -FunctionPass *llvm::createSROAPass(bool RequiresDomTree) { - return new SROA(RequiresDomTree); +FunctionPass *llvm::createSROAPass() { + return new SROA(); } INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", false, @@ -1407,7 +1295,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { // If this pointer is always safe to load, or if we can prove that there // is already a load in the block, then we can move the load to the pred // block. - if (InVal->isDereferenceablePointer(DL) || + if (isDereferenceablePointer(InVal, DL) || isSafeToLoadUnconditionally(InVal, TI, MaxAlign)) continue; @@ -1477,8 +1365,8 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); const DataLayout &DL = SI.getModule()->getDataLayout(); - bool TDerefable = TValue->isDereferenceablePointer(DL); - bool FDerefable = FValue->isDereferenceablePointer(DL); + bool TDerefable = isDereferenceablePointer(TValue, DL); + bool FDerefable = isDereferenceablePointer(FValue, DL); for (User *U : SI.users()) { LoadInst *LI = dyn_cast(U); @@ -1847,10 +1735,17 @@ static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset, static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { if (OldTy == NewTy) return true; - if (IntegerType *OldITy = dyn_cast(OldTy)) - if (IntegerType *NewITy = dyn_cast(NewTy)) - if (NewITy->getBitWidth() >= OldITy->getBitWidth()) - return true; + + // For integer types, we can't handle any bit-width differences. This would + // break both vector conversions with extension and introduce endianness + // issues when in conjunction with loads and stores. + if (isa(OldTy) && isa(NewTy)) { + assert(cast(OldTy)->getBitWidth() != + cast(NewTy)->getBitWidth() && + "We can't have the same bitwidth for different int types"); + return false; + } + if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) return false; if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) @@ -1885,10 +1780,8 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, if (OldTy == NewTy) return V; - if (IntegerType *OldITy = dyn_cast(OldTy)) - if (IntegerType *NewITy = dyn_cast(NewTy)) - if (NewITy->getBitWidth() > OldITy->getBitWidth()) - return IRB.CreateZExt(V, NewITy); + assert(!(isa(OldTy) && isa(NewTy)) && + "Integer types must be the exact same to convert."); // See if we need inttoptr for this type pair. A cast involving both scalars // and vectors requires and additional bitcast. @@ -1929,7 +1822,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, /// \brief Test whether the given slice use can be promoted to a vector. /// -/// This function is called to test each entry in a partioning which is slated +/// This function is called to test each entry in a partition which is slated /// for a single slice. static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P, const Slice &S, VectorType *Ty, @@ -2125,7 +2018,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t RelEnd = S.endOffset() - AllocBeginOffset; // We can't reasonably handle cases where the load or store extends past - // the end of the aloca's type and into its padding. + // the end of the alloca's type and into its padding. if (RelEnd > Size) return false; @@ -2134,6 +2027,9 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (LoadInst *LI = dyn_cast(U->getUser())) { if (LI->isVolatile()) return false; + // We can't handle loads that extend past the allocated memory. + if (DL.getTypeStoreSize(LI->getType()) > Size) + return false; // Note that we don't count vector loads or stores as whole-alloca // operations which enable integer widening because we would prefer to use // vector widening instead. @@ -2152,6 +2048,9 @@ static bool isIntegerWideningViableForSlice(const Slice &S, Type *ValueTy = SI->getValueOperand()->getType(); if (SI->isVolatile()) return false; + // We can't handle stores that extend past the allocated memory. + if (DL.getTypeStoreSize(ValueTy) > Size) + return false; // Note that we don't count vector loads or stores as whole-alloca // operations which enable integer widening because we would prefer to use // vector widening instead. @@ -2585,6 +2484,7 @@ private: Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8) : LI.getType(); + const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize; bool IsPtrAdjusted = false; Value *V; if (VecTy) { @@ -2592,14 +2492,36 @@ private: } else if (IntTy && LI.getType()->isIntegerTy()) { V = rewriteIntegerLoad(LI); } else if (NewBeginOffset == NewAllocaBeginOffset && - canConvertValue(DL, NewAllocaTy, LI.getType())) { - V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(), - LI.getName()); + NewEndOffset == NewAllocaEndOffset && + (canConvertValue(DL, NewAllocaTy, TargetTy) || + (IsLoadPastEnd && NewAllocaTy->isIntegerTy() && + TargetTy->isIntegerTy()))) { + LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + LI.isVolatile(), LI.getName()); + if (LI.isVolatile()) + NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + V = NewLI; + + // If this is an integer load past the end of the slice (which means the + // bytes outside the slice are undef or this load is dead) just forcibly + // fix the integer size with correct handling of endianness. + if (auto *AITy = dyn_cast(NewAllocaTy)) + if (auto *TITy = dyn_cast(TargetTy)) + if (AITy->getBitWidth() < TITy->getBitWidth()) { + V = IRB.CreateZExt(V, TITy, "load.ext"); + if (DL.isBigEndian()) + V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(), + "endian_shift"); + } } else { Type *LTy = TargetTy->getPointerTo(); - V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy), - getSliceAlign(TargetTy), LI.isVolatile(), - LI.getName()); + LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy), + getSliceAlign(TargetTy), + LI.isVolatile(), LI.getName()); + if (LI.isVolatile()) + NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + + V = NewLI; IsPtrAdjusted = true; } V = convertValue(DL, IRB, V, TargetTy); @@ -2710,10 +2632,25 @@ private: if (IntTy && V->getType()->isIntegerTy()) return rewriteIntegerStore(V, SI); + const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize; StoreInst *NewSI; if (NewBeginOffset == NewAllocaBeginOffset && NewEndOffset == NewAllocaEndOffset && - canConvertValue(DL, V->getType(), NewAllocaTy)) { + (canConvertValue(DL, V->getType(), NewAllocaTy) || + (IsStorePastEnd && NewAllocaTy->isIntegerTy() && + V->getType()->isIntegerTy()))) { + // If this is an integer store past the end of slice (and thus the bytes + // past that point are irrelevant or this is unreachable), truncate the + // value prior to storing. + if (auto *VITy = dyn_cast(V->getType())) + if (auto *AITy = dyn_cast(NewAllocaTy)) + if (VITy->getBitWidth() > AITy->getBitWidth()) { + if (DL.isBigEndian()) + V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(), + "endian_shift"); + V = IRB.CreateTrunc(V, AITy, "load.trunc"); + } + V = convertValue(DL, IRB, V, NewAllocaTy); NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), SI.isVolatile()); @@ -2722,7 +2659,8 @@ private: NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()), SI.isVolatile()); } - (void)NewSI; + if (SI.isVolatile()) + NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope()); Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); @@ -3661,7 +3599,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { return true; }), Stores.end()); - // Now we have to go *back* through all te stores, because a later store may + // Now we have to go *back* through all the stores, because a later store may // have caused an earlier store's load to become unsplittable and if it is // unsplittable for the later store, then we can't rely on it being split in // the earlier store either. @@ -3922,7 +3860,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // Mark the original store as dead now that we've split it up and kill its // slice. Note that we leave the original load in place unless this store - // was its ownly use. It may in turn be split up if it is an alloca load + // was its only use. It may in turn be split up if it is an alloca load // for some other alloca, but it may be a normal load. This may introduce // redundant loads, but where those can be merged the rest of the optimizer // should handle the merging, and this uncovers SSA splits which is more @@ -4180,17 +4118,17 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { std::max(NumPartitions, MaxPartitionsPerAlloca); // Migrate debug information from the old alloca to the new alloca(s) - // and the individial partitions. + // and the individual partitions. if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) { - DIVariable Var(DbgDecl->getVariable()); - DIExpression Expr(DbgDecl->getExpression()); + auto *Var = DbgDecl->getVariable(); + auto *Expr = DbgDecl->getExpression(); DIBuilder DIB(*AI.getParent()->getParent()->getParent(), /*AllowUnresolved*/ false); bool IsSplit = Pieces.size() > 1; for (auto Piece : Pieces) { // Create a piece expression describing the new partition or reuse AI's // expression if there is only one partition. - DIExpression PieceExpr = Expr; + auto *PieceExpr = Expr; if (IsSplit || Expr->isBitPiece()) { // If this alloca is already a scalar replacement of a larger aggregate, // Piece.Offset describes the offset inside the scalar. @@ -4211,8 +4149,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca)) OldDDI->eraseFromParent(); - auto *NewDDI = DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, &AI); - NewDDI->setDebugLoc(DbgDecl->getDebugLoc()); + DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(), + &AI); } } return Changed; @@ -4338,93 +4276,19 @@ void SROA::deleteDeadInstructions( } } -static void enqueueUsersInWorklist(Instruction &I, - SmallVectorImpl &Worklist, - SmallPtrSetImpl &Visited) { - for (User *U : I.users()) - if (Visited.insert(cast(U)).second) - Worklist.push_back(cast(U)); -} - /// \brief Promote the allocas, using the best available technique. /// /// This attempts to promote whatever allocas have been identified as viable in /// the PromotableAllocas list. If that list is empty, there is nothing to do. -/// If there is a domtree available, we attempt to promote using the full power -/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is -/// based on the SSAUpdater utilities. This function returns whether any -/// promotion occurred. +/// This function returns whether any promotion occurred. bool SROA::promoteAllocas(Function &F) { if (PromotableAllocas.empty()) return false; NumPromoted += PromotableAllocas.size(); - if (DT && !ForceSSAUpdater) { - DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); - PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC); - PromotableAllocas.clear(); - return true; - } - - DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n"); - SSAUpdater SSA; - DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); - SmallVector Insts; - - // We need a worklist to walk the uses of each alloca. - SmallVector Worklist; - SmallPtrSet Visited; - SmallVector DeadInsts; - - for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) { - AllocaInst *AI = PromotableAllocas[Idx]; - Insts.clear(); - Worklist.clear(); - Visited.clear(); - - enqueueUsersInWorklist(*AI, Worklist, Visited); - - while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - - // FIXME: Currently the SSAUpdater infrastructure doesn't reason about - // lifetime intrinsics and so we strip them (and the bitcasts+GEPs - // leading to them) here. Eventually it should use them to optimize the - // scalar values produced. - if (IntrinsicInst *II = dyn_cast(I)) { - assert(II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end); - II->eraseFromParent(); - continue; - } - - // Push the loads and stores we find onto the list. SROA will already - // have validated that all loads and stores are viable candidates for - // promotion. - if (LoadInst *LI = dyn_cast(I)) { - assert(LI->getType() == AI->getAllocatedType()); - Insts.push_back(LI); - continue; - } - if (StoreInst *SI = dyn_cast(I)) { - assert(SI->getValueOperand()->getType() == AI->getAllocatedType()); - Insts.push_back(SI); - continue; - } - - // For everything else, we know that only no-op bitcasts and GEPs will - // make it this far, just recurse through them and recall them for later - // removal. - DeadInsts.push_back(I); - enqueueUsersInWorklist(*I, Worklist, Visited); - } - AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts); - while (!DeadInsts.empty()) - DeadInsts.pop_back_val()->eraseFromParent(); - AI->eraseFromParent(); - } - + DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); + PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC); PromotableAllocas.clear(); return true; } @@ -4435,9 +4299,7 @@ bool SROA::runOnFunction(Function &F) { DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; + DT = &getAnalysis().getDomTree(); AC = &getAnalysis().getAssumptionCache(F); BasicBlock &EntryBB = F.getEntryBlock(); @@ -4482,7 +4344,6 @@ bool SROA::runOnFunction(Function &F) { void SROA::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); - if (RequiresDomTree) - AU.addRequired(); + AU.addRequired(); AU.setPreservesCFG(); }