X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FSROA.cpp;h=cb9838ef67443ed1dc79e482ebaf04db0cacc840;hp=b0b618860411fc4ea138852cdc90ca20d29209c9;hb=d04a8d4b33ff316ca4cf961e06c9e312eff8e64f;hpb=fd22019ec36e8335254430e61a1e5a3f8a681f6a diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index b0b61886041..cb9838ef674 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -25,33 +25,33 @@ #define DEBUG_TYPE "sroa" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/DIBuilder.h" +#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" +#include "llvm/InstVisitor.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -334,7 +334,7 @@ private: class UseBuilder; friend class AllocaPartitioning::UseBuilder; -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// \brief Handle to alloca instruction to simplify method interfaces. AllocaInst &AI; #endif @@ -541,33 +541,25 @@ private: void insertUse(Instruction &I, int64_t Offset, uint64_t Size, bool IsSplittable = false) { - // Completely skip uses which have a zero size or don't overlap the - // allocation. - if (Size == 0 || - (Offset >= 0 && (uint64_t)Offset >= AllocSize) || - (Offset < 0 && (uint64_t)-Offset >= Size)) { + // Completely skip uses which have a zero size or start either before or + // past the end of the allocation. + if (Size == 0 || Offset < 0 || (uint64_t)Offset >= AllocSize) { DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset - << " which starts past the end of the " << AllocSize - << " byte alloca:\n" + << " which has zero size or starts outside of the " + << AllocSize << " byte alloca:\n" << " alloca: " << P.AI << "\n" << " use: " << I << "\n"); return; } - // Clamp the start to the beginning of the allocation. - if (Offset < 0) { - DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset - << " to start at the beginning of the alloca:\n" - << " alloca: " << P.AI << "\n" - << " use: " << I << "\n"); - Size -= (uint64_t)-Offset; - Offset = 0; - } - uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size; // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. + // NOTE! This may appear superficially to be something we could ignore + // entirely, but that is not so! There may be PHI-node uses where some + // instructions are dead but not others. We can't completely ignore the + // PHI node, and so have to record at least the information here. assert(AllocSize >= BeginOffset); // Established above. if (Size > AllocSize - BeginOffset) { DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset @@ -877,16 +869,9 @@ private: void insertUse(Instruction &User, int64_t Offset, uint64_t Size) { // If the use has a zero size or extends outside of the allocation, record // it as a dead use for elimination later. - if (Size == 0 || (uint64_t)Offset >= AllocSize || - (Offset < 0 && (uint64_t)-Offset >= Size)) + if (Size == 0 || Offset < 0 || (uint64_t)Offset >= AllocSize) return markAsDead(User); - // Clamp the start to the beginning of the allocation. - if (Offset < 0) { - Size -= (uint64_t)-Offset; - Offset = 0; - } - uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size; // Clamp the end offset to the end of the allocation. Note that this is @@ -1122,7 +1107,7 @@ void AllocaPartitioning::splitAndMergePartitions() { AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) : -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) AI(AI), #endif PointerEscapingInstr(0) { @@ -1382,11 +1367,7 @@ class SROA : public FunctionPass { /// \brief A collection of instructions to delete. /// We try to batch deletions to simplify code and make things a bit more /// efficient. - SmallVector DeadInsts; - - /// \brief A set to prevent repeatedly marking an instruction split into many - /// uses as dead. Only used to guard insertion into DeadInsts. - SmallPtrSet DeadSplitInsts; + SetVector > DeadInsts; /// \brief Post-promotion worklist. /// @@ -1573,7 +1554,7 @@ private: do { LoadInst *LI = Loads.pop_back_val(); LI->replaceAllUsesWith(NewPN); - Pass.DeadInsts.push_back(LI); + Pass.DeadInsts.insert(LI); } while (!Loads.empty()); // Inject loads into all of the pred blocks. @@ -1717,7 +1698,7 @@ private: DEBUG(dbgs() << " speculated to: " << *V << "\n"); LI->replaceAllUsesWith(V); - Pass.DeadInsts.push_back(LI); + Pass.DeadInsts.insert(LI); } } }; @@ -2116,11 +2097,11 @@ static bool isVectorPromotionViable(const DataLayout &TD, EndIndex > Ty->getNumElements()) return false; - // FIXME: We should build shuffle vector instructions to handle - // non-element-sized accesses. - if ((EndOffset - BeginOffset) != ElementSize && - (EndOffset - BeginOffset) != VecSize) - return false; + assert(EndIndex > BeginIndex && "Empty vector!"); + uint64_t NumElements = EndIndex - BeginIndex; + Type *PartitionTy + = (NumElements == 1) ? Ty->getElementType() + : VectorType::get(Ty->getElementType(), NumElements); if (MemIntrinsic *MI = dyn_cast(I->U->getUser())) { if (MI->isVolatile()) @@ -2134,8 +2115,17 @@ static bool isVectorPromotionViable(const DataLayout &TD, } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. return false; - } else if (!isa(I->U->getUser()) && - !isa(I->U->getUser())) { + } else if (LoadInst *LI = dyn_cast(I->U->getUser())) { + if (LI->isVolatile()) + return false; + if (!canConvertValue(TD, PartitionTy, LI->getType())) + return false; + } else if (StoreInst *SI = dyn_cast(I->U->getUser())) { + if (SI->isVolatile()) + return false; + if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy)) + return false; + } else { return false; } } @@ -2155,6 +2145,9 @@ static bool isIntegerWideningViable(const DataLayout &TD, AllocaPartitioning::const_use_iterator I, AllocaPartitioning::const_use_iterator E) { uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy); + // Don't create integer types larger than the maximum bitwidth. + if (SizeInBits > IntegerType::MAX_INT_BITS) + return false; // Don't try to handle allocas with bit-padding. if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy)) @@ -2241,18 +2234,23 @@ static bool isIntegerWideningViable(const DataLayout &TD, static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name) { + DEBUG(dbgs() << " start: " << *V << "\n"); IntegerType *IntTy = cast(V->getType()); assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && "Element extends past full value"); uint64_t ShAmt = 8*Offset; if (DL.isBigEndian()) ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); - if (ShAmt) + if (ShAmt) { V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); + DEBUG(dbgs() << " shifted: " << *V << "\n"); + } assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot extract to a larger integer!"); - if (Ty != IntTy) + if (Ty != IntTy) { V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); + DEBUG(dbgs() << " trunced: " << *V << "\n"); + } return V; } @@ -2262,20 +2260,27 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, IntegerType *Ty = cast(V->getType()); assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot insert a larger integer!"); - if (Ty != IntTy) + DEBUG(dbgs() << " start: " << *V << "\n"); + if (Ty != IntTy) { V = IRB.CreateZExt(V, IntTy, Name + ".ext"); + DEBUG(dbgs() << " extended: " << *V << "\n"); + } assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && "Element store outside of alloca store"); uint64_t ShAmt = 8*Offset; if (DL.isBigEndian()) ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); - if (ShAmt) + if (ShAmt) { V = IRB.CreateShl(V, ShAmt, Name + ".shift"); + DEBUG(dbgs() << " shifted: " << *V << "\n"); + } if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); + DEBUG(dbgs() << " masked: " << *Old << "\n"); V = IRB.CreateOr(Old, V, Name + ".insert"); + DEBUG(dbgs() << " inserted: " << *V << "\n"); } return V; } @@ -2430,42 +2435,47 @@ private: return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset); } - ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) { + unsigned getIndex(uint64_t Offset) { assert(VecTy && "Can only call getIndex when rewriting a vector"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds"); uint32_t Index = RelOffset / ElementSize; assert(Index * ElementSize == RelOffset); - return IRB.getInt32(Index); + return Index; } void deleteIfTriviallyDead(Value *V) { Instruction *I = cast(V); if (isInstructionTriviallyDead(I)) - Pass.DeadInsts.push_back(I); + Pass.DeadInsts.insert(I); } - bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { - Value *Result; - if (LI.getType() == VecTy->getElementType() || - BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - Result = IRB.CreateExtractElement( - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - getIndex(IRB, BeginOffset), getName(".extract")); - } else { - Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); + unsigned BeginIndex = getIndex(BeginOffset); + unsigned EndIndex = getIndex(EndOffset); + assert(EndIndex > BeginIndex && "Empty vector!"); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + if (NumElements == 1) { + V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), + getName(".extract")); + DEBUG(dbgs() << " extract: " << *V << "\n"); + } else if (NumElements < VecTy->getNumElements()) { + SmallVector Mask; + Mask.reserve(NumElements); + for (unsigned i = BeginIndex; i != EndIndex; ++i) + Mask.push_back(IRB.getInt32(i)); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(Mask), + getName(".extract")); + DEBUG(dbgs() << " shuffle: " << *V << "\n"); } - if (Result->getType() != LI.getType()) - Result = convertValue(TD, IRB, Result, LI.getType()); - LI.replaceAllUsesWith(Result); - Pass.DeadInsts.push_back(&LI); - - DEBUG(dbgs() << " to: " << *Result << "\n"); - return true; + return V; } - bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { + Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { assert(IntTy && "We cannot insert an integer to the alloca"); assert(!LI.isVolatile()); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), @@ -2473,12 +2483,10 @@ private: V = convertValue(TD, IRB, V, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = extractInteger(TD, IRB, V, cast(LI.getType()), Offset, - getName(".extract")); - LI.replaceAllUsesWith(V); - Pass.DeadInsts.push_back(&LI); - DEBUG(dbgs() << " to: " << *V << "\n"); - return true; + if (Offset > 0 || EndOffset < NewAllocaEndOffset) + V = extractInteger(TD, IRB, V, cast(LI.getType()), Offset, + getName(".extract")); + return V; } bool visitLoadInst(LoadInst &LI) { @@ -2488,7 +2496,46 @@ private: IRBuilder<> IRB(&LI); uint64_t Size = EndOffset - BeginOffset; - if (Size < TD.getTypeStoreSize(LI.getType())) { + bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of the original allocation it's behavior is undefined. Rather + // than trying to transform it, just replace it with undef. + // FIXME: We should do something more clever for functions being + // instrumented by asan. + // FIXME: Eventually, once ASan and friends can flush out bugs here, this + // should be transformed to a load of null making it unreachable. + uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType()); + if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) { + LI.replaceAllUsesWith(UndefValue::get(LI.getType())); + Pass.DeadInsts.insert(&LI); + deleteIfTriviallyDead(OldOp); + DEBUG(dbgs() << " to: undef!!\n"); + return true; + } + + Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) + : LI.getType(); + bool IsPtrAdjusted = false; + Value *V; + if (VecTy) { + V = rewriteVectorizedLoadInst(IRB, LI, OldOp); + } else if (IntTy && LI.getType()->isIntegerTy()) { + V = rewriteIntegerLoad(IRB, LI); + } else if (BeginOffset == NewAllocaBeginOffset && + canConvertValue(TD, NewAllocaTy, LI.getType())) { + V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + LI.isVolatile(), getName(".load")); + } else { + Type *LTy = TargetTy->getPointerTo(); + V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy), + getPartitionTypeAlign(TargetTy), + LI.isVolatile(), getName(".load")); + IsPtrAdjusted = true; + } + V = convertValue(TD, IRB, V, TargetTy); + + if (IsSplitIntLoad) { assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); @@ -2498,21 +2545,8 @@ private: assert(LI.getType()->getIntegerBitWidth() == TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && "Only alloca-wide loads can be split and recomposed"); - IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8); - bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && - canConvertValue(TD, NewAllocaTy, NarrowTy); - Value *V; // Move the insertion point just past the load so that we can refer to it. IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); - if (IsConvertable) - V = convertValue(TD, IRB, - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")), - NarrowTy); - else - V = IRB.CreateAlignedLoad( - getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()), - getPartitionTypeAlign(NarrowTy), getName(".load")); // Create a placeholder value with the same type as LI to use as the // basis for the new value. This allows us to replace the uses of LI with // the computed value, and then replace the placeholder with LI, leaving @@ -2524,104 +2558,77 @@ private: LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); delete Placeholder; - if (Pass.DeadSplitInsts.insert(&LI)) - Pass.DeadInsts.push_back(&LI); - DEBUG(dbgs() << " to: " << *V << "\n"); - return IsConvertable; - } - - if (VecTy) - return rewriteVectorizedLoadInst(IRB, LI, OldOp); - if (IntTy && LI.getType()->isIntegerTy()) - return rewriteIntegerLoad(IRB, LI); - - if (BeginOffset == NewAllocaBeginOffset && - canConvertValue(TD, NewAllocaTy, LI.getType())) { - Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - LI.isVolatile(), getName(".load")); - Value *NewV = convertValue(TD, IRB, NewLI, LI.getType()); - LI.replaceAllUsesWith(NewV); - Pass.DeadInsts.push_back(&LI); - - DEBUG(dbgs() << " to: " << *NewLI << "\n"); - return !LI.isVolatile(); + } else { + LI.replaceAllUsesWith(V); } - assert(!IntTy && "Invalid load found with int-op widening enabled"); - - Value *NewPtr = getAdjustedAllocaPtr(IRB, - LI.getPointerOperand()->getType()); - LI.setOperand(0, NewPtr); - LI.setAlignment(getPartitionTypeAlign(LI.getType())); - DEBUG(dbgs() << " to: " << LI << "\n"); - + Pass.DeadInsts.insert(&LI); deleteIfTriviallyDead(OldOp); - return NewPtr == &NewAI && !LI.isVolatile(); - } - - bool rewriteWideStoreInst(IRBuilder<> &IRB, StoreInst &SI, Type *ValueTy, - unsigned Size) { - assert(!SI.isVolatile()); - assert(ValueTy->isIntegerTy() && - "Only integer type loads and stores are split"); - assert(ValueTy->getIntegerBitWidth() == - TD.getTypeStoreSizeInBits(ValueTy) && - "Non-byte-multiple bit width"); - assert(ValueTy->getIntegerBitWidth() == - TD.getTypeSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide stores can be split and recomposed"); - IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); - Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy, - BeginOffset, getName(".extract")); - StoreInst *NewSI; - bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && - canConvertValue(TD, NarrowTy, NewAllocaTy); - if (IsConvertable) - NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy), - &NewAI, NewAI.getAlignment()); - else - NewSI = IRB.CreateAlignedStore( - V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()), - getPartitionTypeAlign(NarrowTy)); - (void)NewSI; - if (Pass.DeadSplitInsts.insert(&SI)) - Pass.DeadInsts.push_back(&SI); - - DEBUG(dbgs() << " to: " << *NewSI << "\n"); - return IsConvertable; - } - - bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI, - Value *OldOp) { - Value *V = SI.getValueOperand(); - Type *ValueTy = V->getType(); - if (ValueTy == ElementTy || - BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - if (ValueTy != ElementTy) - V = convertValue(TD, IRB, V, ElementTy); + DEBUG(dbgs() << " to: " << *V << "\n"); + return !LI.isVolatile() && !IsPtrAdjusted; + } + + bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V, + StoreInst &SI, Value *OldOp) { + unsigned BeginIndex = getIndex(BeginOffset); + unsigned EndIndex = getIndex(EndOffset); + assert(EndIndex > BeginIndex && "Empty vector!"); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + Type *PartitionTy + = (NumElements == 1) ? ElementTy + : VectorType::get(ElementTy, NumElements); + if (V->getType() != PartitionTy) + V = convertValue(TD, IRB, V, PartitionTy); + if (NumElements < VecTy->getNumElements()) { + // We need to mix in the existing elements. LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); - V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset), - getName(".insert")); - } else if (ValueTy != VecTy) { - uint64_t Size = EndOffset - BeginOffset; - if (Size < TD.getTypeStoreSize(ValueTy)) - return rewriteWideStoreInst(IRB, SI, ValueTy, Size); - + if (NumElements == 1) { + V = IRB.CreateInsertElement(LI, V, IRB.getInt32(BeginIndex), + getName(".insert")); + DEBUG(dbgs() << " insert: " << *V << "\n"); + } else { + // When inserting a smaller vector into the larger to store, we first + // use a shuffle vector to widen it with undef elements, and then + // a second shuffle vector to select between the loaded vector and the + // incoming vector. + SmallVector Mask; + Mask.reserve(VecTy->getNumElements()); + for (unsigned i = 0; i != VecTy->getNumElements(); ++i) + if (i >= BeginIndex && i < EndIndex) + Mask.push_back(IRB.getInt32(i - BeginIndex)); + else + Mask.push_back(UndefValue::get(IRB.getInt32Ty())); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(Mask), + getName(".expand")); + DEBUG(dbgs() << " shuffle1: " << *V << "\n"); + + Mask.clear(); + for (unsigned i = 0; i != VecTy->getNumElements(); ++i) + if (i >= BeginIndex && i < EndIndex) + Mask.push_back(IRB.getInt32(i)); + else + Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); + V = IRB.CreateShuffleVector(V, LI, ConstantVector::get(Mask), + getName("insert")); + DEBUG(dbgs() << " shuffle2: " << *V << "\n"); + } + } else { V = convertValue(TD, IRB, V, VecTy); } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - Pass.DeadInsts.push_back(&SI); + Pass.DeadInsts.insert(&SI); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; } - bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) { + bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) { assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); - Value *V = SI.getValueOperand(); if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); @@ -2633,7 +2640,7 @@ private: } V = convertValue(TD, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - Pass.DeadInsts.push_back(&SI); + Pass.DeadInsts.insert(&SI); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2645,46 +2652,53 @@ private: assert(OldOp == OldPtr); IRBuilder<> IRB(&SI); - if (VecTy) - return rewriteVectorizedStoreInst(IRB, SI, OldOp); - Type *ValueTy = SI.getValueOperand()->getType(); - - uint64_t Size = EndOffset - BeginOffset; - if (Size < TD.getTypeStoreSize(ValueTy)) - return rewriteWideStoreInst(IRB, SI, ValueTy, Size); - - if (IntTy && ValueTy->isIntegerTy()) - return rewriteIntegerStore(IRB, SI); + Value *V = SI.getValueOperand(); // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after promoting this alloca. - if (ValueTy->isPointerTy()) - if (AllocaInst *AI = dyn_cast(SI.getValueOperand() - ->stripInBoundsOffsets())) + if (V->getType()->isPointerTy()) + if (AllocaInst *AI = dyn_cast(V->stripInBoundsOffsets())) Pass.PostPromotionWorklist.insert(AI); - if (BeginOffset == NewAllocaBeginOffset && - canConvertValue(TD, ValueTy, NewAllocaTy)) { - Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy); - StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(), - SI.isVolatile()); - (void)NewSI; - Pass.DeadInsts.push_back(&SI); - - DEBUG(dbgs() << " to: " << *NewSI << "\n"); - return !SI.isVolatile(); + uint64_t Size = EndOffset - BeginOffset; + if (Size < TD.getTypeStoreSize(V->getType())) { + assert(!SI.isVolatile()); + assert(V->getType()->isIntegerTy() && + "Only integer type loads and stores are split"); + assert(V->getType()->getIntegerBitWidth() == + TD.getTypeStoreSizeInBits(V->getType()) && + "Non-byte-multiple bit width"); + assert(V->getType()->getIntegerBitWidth() == + TD.getTypeSizeInBits(OldAI.getAllocatedType()) && + "Only alloca-wide stores can be split and recomposed"); + IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); + V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, + getName(".extract")); } - assert(!IntTy && "Invalid store found with int-op widening enabled"); - - Value *NewPtr = getAdjustedAllocaPtr(IRB, - SI.getPointerOperand()->getType()); - SI.setOperand(1, NewPtr); - SI.setAlignment(getPartitionTypeAlign(SI.getValueOperand()->getType())); - DEBUG(dbgs() << " to: " << SI << "\n"); + if (VecTy) + return rewriteVectorizedStoreInst(IRB, V, SI, OldOp); + if (IntTy && V->getType()->isIntegerTy()) + return rewriteIntegerStore(IRB, V, SI); + StoreInst *NewSI; + if (BeginOffset == NewAllocaBeginOffset && + canConvertValue(TD, V->getType(), NewAllocaTy)) { + V = convertValue(TD, IRB, V, NewAllocaTy); + NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + SI.isVolatile()); + } else { + Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo()); + NewSI = IRB.CreateAlignedStore(V, NewPtr, + getPartitionTypeAlign(V->getType()), + SI.isVolatile()); + } + (void)NewSI; + Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); - return NewPtr == &NewAI && !SI.isVolatile(); + + DEBUG(dbgs() << " to: " << *NewSI << "\n"); + return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); } bool visitMemSetInst(MemSetInst &II) { @@ -2704,8 +2718,7 @@ private: } // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); Type *AllocaTy = NewAI.getAllocatedType(); Type *ScalarTy = AllocaTy->getScalarType(); @@ -2756,7 +2769,7 @@ private: IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - V, getIndex(IRB, BeginOffset), + V, IRB.getInt32(getIndex(BeginOffset)), getName(".insert")), &NewAI, NewAI.getAlignment()); (void)Store; @@ -2861,8 +2874,7 @@ private: return false; } // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && EndOffset == NewAllocaEndOffset; @@ -2925,7 +2937,7 @@ private: // We have to extract rather than load. Src = IRB.CreateExtractElement( IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), - getIndex(IRB, BeginOffset), + IRB.getInt32(getIndex(BeginOffset)), getName(".copyextract")); } else if (IntTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), @@ -2953,7 +2965,7 @@ private: // We have to insert into a loaded copy before storing. Src = IRB.CreateInsertElement( IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - Src, getIndex(IRB, BeginOffset), + Src, IRB.getInt32(getIndex(BeginOffset)), getName(".insert")); } @@ -2972,8 +2984,7 @@ private: assert(II.getArgOperand(1) == OldPtr); // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); ConstantInt *Size = ConstantInt::get(cast(II.getArgOperand(0)->getType()), @@ -3542,7 +3553,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { DI != DE; ++DI) { Changed = true; (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType())); - DeadInsts.push_back(*DI); + DeadInsts.insert(*DI); } for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(), DE = P.dead_op_end(); @@ -3553,7 +3564,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { if (Instruction *OldI = dyn_cast(OldV)) if (isInstructionTriviallyDead(OldI)) { Changed = true; - DeadInsts.push_back(OldI); + DeadInsts.insert(OldI); } } @@ -3574,7 +3585,6 @@ bool SROA::runOnAlloca(AllocaInst &AI) { /// We also record the alloca instructions deleted here so that they aren't /// subsequently handed to mem2reg to promote. void SROA::deleteDeadInstructions(SmallPtrSet &DeletedAllocas) { - DeadSplitInsts.clear(); while (!DeadInsts.empty()) { Instruction *I = DeadInsts.pop_back_val(); DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); @@ -3586,7 +3596,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet &DeletedAllocas) { // Zero out the operand and see if it becomes trivially dead. *OI = 0; if (isInstructionTriviallyDead(U)) - DeadInsts.push_back(U); + DeadInsts.insert(U); } if (AllocaInst *AI = dyn_cast(I))