#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/ValueTracking.h"
/// hold.
void insert(ArrayRef<Slice> NewSlices) {
int OldSize = Slices.size();
- std::move(NewSlices.begin(), NewSlices.end(), std::back_inserter(Slices));
+ Slices.append(NewSlices.begin(), NewSlices.end());
auto SliceI = Slices.begin() + OldSize;
std::sort(SliceI, Slices.end());
std::inplace_merge(Slices.begin(), SliceI, Slices.end());
friend class AllocaSlices;
friend class AllocaSlices::partition_iterator;
- /// \brief The begining and ending offsets of the alloca for this partition.
+ /// \brief The beginning and ending offsets of the alloca for this
+ /// partition.
uint64_t BeginOffset, EndOffset;
/// \brief The start end end iterators of this partition.
// OK, we need to consume new slices. Set the end offset based on the
// current slice, and step SJ past it. The beginning offset of the
- // parttion is the beginning offset of the next slice unless we have
+ // partition is the beginning offset of the next slice unless we have
// pre-existing split slices that are continuing, in which case we begin
// at the prior end offset.
P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
"End iterators don't match between compared partition iterators!");
// The observed positions of partitions is marked by the P.SI iterator and
- // the emptyness of the split slices. The latter is only relevant when
+ // the emptiness of the split slices. The latter is only relevant when
// P.SI == SE, as the end iterator will additionally have an empty split
// slices list, but the prior may have the same P.SI and a tail of split
// slices.
// by writing out the code here where we have tho underlying allocation
// size readily available.
APInt GEPOffset = Offset;
+ const DataLayout &DL = GEPI.getModule()->getDataLayout();
for (gep_type_iterator GTI = gep_type_begin(GEPI),
GTE = gep_type_end(GEPI);
GTI != GTE; ++GTI) {
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
uint64_t Size, bool IsVolatile) {
- // We allow splitting of loads and stores where the type is an integer type
- // and cover the entire alloca. This prevents us from splitting over
- // eagerly.
- // FIXME: In the great blue eventually, we should eagerly split all integer
- // loads and stores, and then have a separate step that merges adjacent
- // alloca partitions into a single partition suitable for integer widening.
- // Or we should skip the merge step and rely on GVN and other passes to
- // merge adjacent loads and stores that survive mem2reg.
- bool IsSplittable =
- Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
+ // We allow splitting of non-volatile loads and stores where the type is an
+ // integer type. These may be used to implement 'memcpy' or other "transfer
+ // of bits" patterns.
+ bool IsSplittable = Ty->isIntegerTy() && !IsVolatile;
insertUse(I, Offset, Size, IsSplittable);
}
if (!IsOffsetKnown)
return PI.setAborted(&LI);
+ const DataLayout &DL = LI.getModule()->getDataLayout();
uint64_t Size = DL.getTypeStoreSize(LI.getType());
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
if (!IsOffsetKnown)
return PI.setAborted(&SI);
+ const DataLayout &DL = SI.getModule()->getDataLayout();
uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
// If this memory access can be shown to *statically* extend outside the
SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
Visited.insert(Root);
Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+ const DataLayout &DL = Root->getModule()->getDataLayout();
// If there are no loads or stores, the access is dead. We mark that as
// a size zero access.
Size = 0;
SmallVector<DbgValueInst *, 4> DVIs;
public:
- AllocaPromoter(const SmallVectorImpl<Instruction *> &Insts, SSAUpdater &S,
+ AllocaPromoter(ArrayRef<const Instruction *> Insts,
+ SSAUpdater &S,
AllocaInst &AI, DIBuilder &DIB)
: LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
// Retain the debug information attached to the alloca for use when
// rewriting loads and stores.
if (auto *L = LocalAsMetadata::getIfExists(&AI)) {
- if (auto *DebugNode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
- for (User *U : DebugNode->users())
+ if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
+ for (User *U : DINode->users())
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
DDIs.push_back(DDI);
else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
} else {
continue;
}
- Instruction *DbgVal =
- DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
- DIExpression(DVI->getExpression()), Inst);
- DbgVal->setDebugLoc(DVI->getDebugLoc());
+ DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
+ DVI->getExpression(), DVI->getDebugLoc(),
+ Inst);
}
}
};
const bool RequiresDomTree;
LLVMContext *C;
- const DataLayout *DL;
DominatorTree *DT;
- AssumptionTracker *AT;
+ AssumptionCache *AC;
/// \brief Worklist of alloca instructions to simplify.
///
public:
SROA(bool RequiresDomTree = true)
: FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr),
- DL(nullptr), DT(nullptr) {
+ DT(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
friend class AllocaSliceRewriter;
bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
- bool rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- AllocaSlices::Partition &P);
+ AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+ AllocaSlices::Partition &P);
bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
bool runOnAlloca(AllocaInst &AI);
void clobberUse(Use &U);
INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
false)
///
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
-static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) {
+static bool isSafePHIToSpeculate(PHINode &PN) {
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
if (!HaveLoad)
return false;
+ const DataLayout &DL = PN.getModule()->getDataLayout();
+
// We can only transform this if it is safe to push the loads into the
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
// If this pointer is always safe to load, or if we can prove that there
// is already a load in the block, then we can move the load to the pred
// block.
- if (InVal->isDereferenceablePointer(DL) ||
- isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL))
+ if (isDereferenceablePointer(InVal, DL) ||
+ isSafeToLoadUnconditionally(InVal, TI, MaxAlign))
continue;
return false;
///
/// We can do this to a select if its only uses are loads and if the operand
/// to the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst &SI,
- const DataLayout *DL = nullptr) {
+static bool isSafeSelectToSpeculate(SelectInst &SI) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
- bool TDerefable = TValue->isDereferenceablePointer(DL);
- bool FDerefable = FValue->isDereferenceablePointer(DL);
+ const DataLayout &DL = SI.getModule()->getDataLayout();
+ bool TDerefable = isDereferenceablePointer(TValue, DL);
+ bool FDerefable = isDereferenceablePointer(FValue, DL);
for (User *U : SI.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
if (!TDerefable &&
- !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment(), DL))
+ !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment()))
return false;
if (!FDerefable &&
- !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment(), DL))
+ !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment()))
return false;
}
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
+ return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
+ NamePrefix + "sroa_idx");
}
/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
// We may end up computing an offset pointer that has the wrong type. If we
// never are able to compute one directly that has the correct type, we'll
- // fall back to it, so keep it around here.
+ // fall back to it, so keep it and the base it was computed from around here.
Value *OffsetPtr = nullptr;
+ Value *OffsetBasePtr;
// Remember any i8 pointer we come across to re-use if we need to do a raw
// byte offset.
Indices.clear();
if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
Indices, NamePrefix)) {
- if (P->getType() == PointerTy) {
- // Zap any offset pointer that we ended up computing in previous rounds.
- if (OffsetPtr && OffsetPtr->use_empty())
- if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))
- I->eraseFromParent();
+ // If we have a new natural pointer at the offset, clear out any old
+ // offset pointer we computed. Unless it is the base pointer or
+ // a non-instruction, we built a GEP we don't need. Zap it.
+ if (OffsetPtr && OffsetPtr != OffsetBasePtr)
+ if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) {
+ assert(I->use_empty() && "Built a GEP with uses some how!");
+ I->eraseFromParent();
+ }
+ OffsetPtr = P;
+ OffsetBasePtr = Ptr;
+ // If we also found a pointer of the right type, we're done.
+ if (P->getType() == PointerTy)
return P;
- }
- if (!OffsetPtr) {
- OffsetPtr = P;
- }
}
// Stash this pointer if we've found an i8*.
OffsetPtr = Int8PtrOffset == 0
? Int8Ptr
- : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
+ : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
+ IRB.getInt(Int8PtrOffset),
NamePrefix + "sroa_raw_idx");
}
Ptr = OffsetPtr;
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
- if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
- if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
- if (NewITy->getBitWidth() >= OldITy->getBitWidth())
- return true;
+
+ // For integer types, we can't handle any bit-width differences. This would
+ // break both vector conversions with extension and introduce endianness
+ // issues when in conjunction with loads and stores.
+ if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
+ assert(cast<IntegerType>(OldTy)->getBitWidth() !=
+ cast<IntegerType>(NewTy)->getBitWidth() &&
+ "We can't have the same bitwidth for different int types");
+ return false;
+ }
+
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
if (OldTy == NewTy)
return V;
- if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
- if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
- if (NewITy->getBitWidth() > OldITy->getBitWidth())
- return IRB.CreateZExt(V, NewITy);
+ assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
+ "Integer types must be the exact same to convert.");
// See if we need inttoptr for this type pair. A cast involving both scalars
// and vectors requires and additional bitcast.
/// \brief Test whether the given slice use can be promoted to a vector.
///
-/// This function is called to test each entry in a partioning which is slated
+/// This function is called to test each entry in a partition which is slated
/// for a single slice.
static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
const Slice &S, VectorType *Ty,
uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
// We can't reasonably handle cases where the load or store extends past
- // the end of the aloca's type and into its padding.
+ // the end of the alloca's type and into its padding.
if (RelEnd > Size)
return false;
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
+ // We can't handle loads that extend past the allocated memory.
+ if (DL.getTypeStoreSize(LI->getType()) > Size)
+ return false;
// Note that we don't count vector loads or stores as whole-alloca
// operations which enable integer widening because we would prefer to use
// vector widening instead.
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
+ // We can't handle stores that extend past the allocated memory.
+ if (DL.getTypeStoreSize(ValueTy) > Size)
+ return false;
// Note that we don't count vector loads or stores as whole-alloca
// operations which enable integer widening because we would prefer to use
// vector widening instead.
Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
: LI.getType();
+ const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
} else if (IntTy && LI.getType()->isIntegerTy()) {
V = rewriteIntegerLoad(LI);
} else if (NewBeginOffset == NewAllocaBeginOffset &&
- canConvertValue(DL, NewAllocaTy, LI.getType())) {
- V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(),
- LI.getName());
+ NewEndOffset == NewAllocaEndOffset &&
+ (canConvertValue(DL, NewAllocaTy, TargetTy) ||
+ (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
+ TargetTy->isIntegerTy()))) {
+ LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ LI.isVolatile(), LI.getName());
+ if (LI.isVolatile())
+ NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ V = NewLI;
+
+ // If this is an integer load past the end of the slice (which means the
+ // bytes outside the slice are undef or this load is dead) just forcibly
+ // fix the integer size with correct handling of endianness.
+ if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
+ if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
+ if (AITy->getBitWidth() < TITy->getBitWidth()) {
+ V = IRB.CreateZExt(V, TITy, "load.ext");
+ if (DL.isBigEndian())
+ V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
+ "endian_shift");
+ }
} else {
Type *LTy = TargetTy->getPointerTo();
- V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
- getSliceAlign(TargetTy), LI.isVolatile(),
- LI.getName());
+ LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
+ getSliceAlign(TargetTy),
+ LI.isVolatile(), LI.getName());
+ if (LI.isVolatile())
+ NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
+ V = NewLI;
IsPtrAdjusted = true;
}
V = convertValue(DL, IRB, V, TargetTy);
// LI only used for this computation.
Value *Placeholder =
new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
- V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset, "insert");
+ V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
+ "insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
delete Placeholder;
DL.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
- V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset, "extract");
+ V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
+ "extract");
}
if (VecTy)
if (IntTy && V->getType()->isIntegerTy())
return rewriteIntegerStore(V, SI);
+ const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize;
StoreInst *NewSI;
if (NewBeginOffset == NewAllocaBeginOffset &&
NewEndOffset == NewAllocaEndOffset &&
- canConvertValue(DL, V->getType(), NewAllocaTy)) {
+ (canConvertValue(DL, V->getType(), NewAllocaTy) ||
+ (IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
+ V->getType()->isIntegerTy()))) {
+ // If this is an integer store past the end of slice (and thus the bytes
+ // past that point are irrelevant or this is unreachable), truncate the
+ // value prior to storing.
+ if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
+ if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
+ if (VITy->getBitWidth() > AITy->getBitWidth()) {
+ if (DL.isBigEndian())
+ V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
+ "endian_shift");
+ V = IRB.CreateTrunc(V, AITy, "load.trunc");
+ }
+
V = convertValue(DL, IRB, V, NewAllocaTy);
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
SI.isVolatile());
NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
SI.isVolatile());
}
- (void)NewSI;
+ if (SI.isVolatile())
+ NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope());
Pass.DeadInsts.insert(&SI);
deleteIfTriviallyDead(OldOp);
void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
- Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+ Value *GEP =
+ IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
Value *Load = IRB.CreateLoad(GEP, Name + ".load");
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
DEBUG(dbgs() << " to: " << *Load << "\n");
// Extract the single value and store it using the indices.
Value *Store = IRB.CreateStore(
IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
- IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"));
+ IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"));
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
}
};
SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
+ // Track loads out of this alloca which cannot, for any reason, be pre-split.
+ // This is important as we also cannot pre-split stores of those loads!
+ // FIXME: This is all pretty gross. It means that we can be more aggressive
+ // in pre-splitting when the load feeding the store happens to come from
+ // a separate alloca. Put another way, the effectiveness of SROA would be
+ // decreased by a frontend which just concatenated all of its local allocas
+ // into one big flat alloca. But defeating such patterns is exactly the job
+ // SROA is tasked with! Sadly, to not have this discrepancy we would have
+ // change store pre-splitting to actually force pre-splitting of the load
+ // that feeds it *and all stores*. That makes pre-splitting much harder, but
+ // maybe it would make it more principled?
+ SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
+
DEBUG(dbgs() << " Searching for candidate loads and stores\n");
for (auto &P : AS.partitions()) {
for (Slice &S : P) {
- if (!S.isSplittable())
- continue;
- if (S.endOffset() <= P.endOffset())
+ Instruction *I = cast<Instruction>(S.getUse()->getUser());
+ if (!S.isSplittable() ||S.endOffset() <= P.endOffset()) {
+ // If this was a load we have to track that it can't participate in any
+ // pre-splitting!
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ UnsplittableLoads.insert(LI);
continue;
+ }
assert(P.endOffset() > S.beginOffset() &&
"Empty or backwards partition!");
// Determine if this is a pre-splittable slice.
- Instruction *I = cast<Instruction>(S.getUse()->getUser());
if (auto *LI = dyn_cast<LoadInst>(I)) {
assert(!LI->isVolatile() && "Cannot split volatile loads!");
}
return true;
};
- if (!IsLoadSimplyStored(LI))
+ if (!IsLoadSimplyStored(LI)) {
+ UnsplittableLoads.insert(LI);
continue;
+ }
Loads.push_back(LI);
} else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) {
assert(Offsets.Splits.empty() &&
"Should not have splits the first time we see an instruction!");
Offsets.S = &S;
- Offsets.Splits.push_back(P.endOffset());
+ Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
}
// Now scan the already split slices, and add a split for any of them which
assert(Offsets.S == S && "Found a mismatched slice!");
assert(!Offsets.Splits.empty() &&
"Cannot have an empty set of splits on the second partition!");
- assert(Offsets.Splits.back() == P.beginOffset() &&
+ assert(Offsets.Splits.back() ==
+ P.beginOffset() - Offsets.S->beginOffset() &&
"Previous split does not end where this one begins!");
// Record each split. The last partition's end isn't needed as the size
// of the slice dictates that.
if (S->endOffset() > P.endOffset())
- Offsets.Splits.push_back(P.endOffset());
+ Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
}
}
// such loads and stores, we can only pre-split them if their splits exactly
// match relative to their starting offset. We have to verify this prior to
// any rewriting.
- SmallPtrSet<LoadInst *, 4> BadSplitLoads;
Stores.erase(
std::remove_if(Stores.begin(), Stores.end(),
- [&BadSplitLoads, &SplitOffsetsMap](StoreInst *SI) {
+ [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
// Lookup the load we are storing in our map of split
// offsets.
auto *LI = cast<LoadInst>(SI->getValueOperand());
+ // If it was completely unsplittable, then we're done,
+ // and this store can't be pre-split.
+ if (UnsplittableLoads.count(LI))
+ return true;
+
auto LoadOffsetsI = SplitOffsetsMap.find(LI);
if (LoadOffsetsI == SplitOffsetsMap.end())
- return false; // Unrelated loads are always safe.
+ return false; // Unrelated loads are definitely safe.
auto &LoadOffsets = LoadOffsetsI->second;
// Now lookup the store's offsets.
// with mismatched relative splits. Just give up on them
// and remove both instructions from our list of
// candidates.
- BadSplitLoads.insert(LI);
+ UnsplittableLoads.insert(LI);
return true;
}),
Stores.end());
+ // Now we have to go *back* through all the stores, because a later store may
+ // have caused an earlier store's load to become unsplittable and if it is
+ // unsplittable for the later store, then we can't rely on it being split in
+ // the earlier store either.
+ Stores.erase(std::remove_if(Stores.begin(), Stores.end(),
+ [&UnsplittableLoads](StoreInst *SI) {
+ auto *LI =
+ cast<LoadInst>(SI->getValueOperand());
+ return UnsplittableLoads.count(LI);
+ }),
+ Stores.end());
+ // Once we've established all the loads that can't be split for some reason,
+ // filter any that made it into our list out.
Loads.erase(std::remove_if(Loads.begin(), Loads.end(),
- [&BadSplitLoads](LoadInst *LI) {
- return BadSplitLoads.count(LI);
+ [&UnsplittableLoads](LoadInst *LI) {
+ return UnsplittableLoads.count(LI);
}),
Loads.end());
+
// If no loads or stores are left, there is no pre-splitting to be done for
// this alloca.
if (Loads.empty() && Stores.empty())
// them to the alloca slices.
SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
std::vector<LoadInst *> SplitLoads;
+ const DataLayout &DL = AI.getModule()->getDataLayout();
for (LoadInst *LI : Loads) {
SplitLoads.clear();
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
LoadInst *PLoad = IRB.CreateAlignedLoad(
- getAdjustedPtr(IRB, *DL, BasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ getAdjustedPtr(IRB, DL, BasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
- getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false,
+ getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
// Append this load onto the list of split loads so we can find it later
NewSlices.push_back(
Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
&PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
- /*IsSplittable*/ true));
+ /*IsSplittable*/ false));
DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
<< ", " << NewSlices.back().endOffset() << "): " << *PLoad
<< "\n");
PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
StoreInst *PStore = IRB.CreateAlignedStore(
- PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
- getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false);
+ getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
(void)PStore;
DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
}
} else {
IRB.SetInsertPoint(BasicBlock::iterator(LI));
PLoad = IRB.CreateAlignedLoad(
- getAdjustedPtr(IRB, *DL, LoadBasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ getAdjustedPtr(IRB, DL, LoadBasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, LoadBasePtr->getName() + "."),
- getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false,
+ getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
}
// And store this partition.
IRB.SetInsertPoint(BasicBlock::iterator(SI));
StoreInst *PStore = IRB.CreateAlignedStore(
- PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
- getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false);
+ getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
// Now build a new slice for the alloca.
NewSlices.push_back(
Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
&PStore->getOperandUse(PStore->getPointerOperandIndex()),
- /*IsSplittable*/ true));
+ /*IsSplittable*/ false));
DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
<< ", " << NewSlices.back().endOffset() << "): " << *PStore
<< "\n");
}
// Mark the original store as dead now that we've split it up and kill its
- // slice. Note that we leave the original load in place. It may in turn be
- // split up if it is an alloca load for some other alloca, but it may be
- // a normal load. This may introduce redundant loads, but where those can
- // be merged the rest of the optimizer should handle the merging, and this
- // uncovers SSA splits which is more important. In practice, the original
- // loads will almost always be fully split and removed eventually, and the
- // splits will be merged by any trivial CSE, including instcombine.
+ // slice. Note that we leave the original load in place unless this store
+ // was its only use. It may in turn be split up if it is an alloca load
+ // for some other alloca, but it may be a normal load. This may introduce
+ // redundant loads, but where those can be merged the rest of the optimizer
+ // should handle the merging, and this uncovers SSA splits which is more
+ // important. In practice, the original loads will almost always be fully
+ // split and removed eventually, and the splits will be merged by any
+ // trivial CSE, including instcombine.
+ if (LI->hasOneUse()) {
+ assert(*LI->user_begin() == SI && "Single use isn't this store!");
+ DeadInsts.insert(LI);
+ }
DeadInsts.insert(SI);
Offsets.S->kill();
}
- // Now we need to remove the killed slices, sort the newly added slices, and
- // merge the two sorted ranges of slices so that the entire range is sorted
- // properly for us to re-compute the partitions.
+ // Remove the killed slices that have ben pre-split.
AS.erase(std::remove_if(AS.begin(), AS.end(), [](const Slice &S) {
return S.isDead();
}), AS.end());
+ // Insert our new slices. This will sort and merge them into the sorted
+ // sequence.
AS.insert(NewSlices);
DEBUG(dbgs() << " Pre-split slices:\n");
/// appropriate new offsets. It also evaluates how successful the rewrite was
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
-bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- AllocaSlices::Partition &P) {
+AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+ AllocaSlices::Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
Type *SliceTy = nullptr;
+ const DataLayout &DL = AI.getModule()->getDataLayout();
if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()))
- if (DL->getTypeAllocSize(CommonUseTy) >= P.size())
+ if (DL.getTypeAllocSize(CommonUseTy) >= P.size())
SliceTy = CommonUseTy;
if (!SliceTy)
- if (Type *TypePartitionTy = getTypePartition(*DL, AI.getAllocatedType(),
+ if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
P.beginOffset(), P.size()))
SliceTy = TypePartitionTy;
if ((!SliceTy || (SliceTy->isArrayTy() &&
SliceTy->getArrayElementType()->isIntegerTy())) &&
- DL->isLegalInteger(P.size() * 8))
+ DL.isLegalInteger(P.size() * 8))
SliceTy = Type::getIntNTy(*C, P.size() * 8);
if (!SliceTy)
SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
- assert(DL->getTypeAllocSize(SliceTy) >= P.size());
+ assert(DL.getTypeAllocSize(SliceTy) >= P.size());
- bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, *DL);
+ bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
VectorType *VecTy =
- IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, *DL);
+ IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
if (VecTy)
SliceTy = VecTy;
NewAI = &AI;
// FIXME: We should be able to bail at this point with "nothing changed".
// FIXME: We might want to defer PHI speculation until after here.
+ // FIXME: return nullptr;
} else {
unsigned Alignment = AI.getAlignment();
if (!Alignment) {
// The minimum alignment which users can rely on when the explicit
// alignment is omitted or zero is that required by the ABI for this
// type.
- Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
+ Alignment = DL.getABITypeAlignment(AI.getAllocatedType());
}
Alignment = MinAlign(Alignment, P.beginOffset());
// If we will get at least this much alignment from the type alone, leave
// the alloca's alignment unconstrained.
- if (Alignment <= DL->getABITypeAlignment(SliceTy))
+ if (Alignment <= DL.getABITypeAlignment(SliceTy))
Alignment = 0;
NewAI = new AllocaInst(
SliceTy, nullptr, Alignment,
SmallPtrSet<PHINode *, 8> PHIUsers;
SmallPtrSet<SelectInst *, 8> SelectUsers;
- AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, P.beginOffset(),
+ AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
P.endOffset(), IsIntegerPromotable, VecTy,
PHIUsers, SelectUsers);
bool Promotable = true;
for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
E = PHIUsers.end();
I != E; ++I)
- if (!isSafePHIToSpeculate(**I, DL)) {
+ if (!isSafePHIToSpeculate(**I)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
E = SelectUsers.end();
I != E; ++I)
- if (!isSafeSelectToSpeculate(**I, DL)) {
+ if (!isSafeSelectToSpeculate(**I)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
PostPromotionWorklist.pop_back();
}
- return true;
+ return NewAI;
}
/// \brief Walks the slices of an alloca and form partitions based on them,
unsigned NumPartitions = 0;
bool Changed = false;
+ const DataLayout &DL = AI.getModule()->getDataLayout();
+ // First try to pre-split loads and stores.
Changed |= presplitLoadsAndStores(AI, AS);
+ // Now that we have identified any pre-splitting opportunities, mark any
+ // splittable (non-whole-alloca) loads and stores as unsplittable. If we fail
+ // to split these during pre-splitting, we want to force them to be
+ // rewritten into a partition.
+ bool IsSorted = true;
+ for (Slice &S : AS) {
+ if (!S.isSplittable())
+ continue;
+ // FIXME: We currently leave whole-alloca splittable loads and stores. This
+ // used to be the only splittable loads and stores and we need to be
+ // confident that the above handling of splittable loads and stores is
+ // completely sufficient before we forcibly disable the remaining handling.
+ if (S.beginOffset() == 0 &&
+ S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType()))
+ continue;
+ if (isa<LoadInst>(S.getUse()->getUser()) ||
+ isa<StoreInst>(S.getUse()->getUser())) {
+ S.makeUnsplittable();
+ IsSorted = false;
+ }
+ }
+ if (!IsSorted)
+ std::sort(AS.begin(), AS.end());
+
+ /// \brief Describes the allocas introduced by rewritePartition
+ /// in order to migrate the debug info.
+ struct Piece {
+ AllocaInst *Alloca;
+ uint64_t Offset;
+ uint64_t Size;
+ Piece(AllocaInst *AI, uint64_t O, uint64_t S)
+ : Alloca(AI), Offset(O), Size(S) {}
+ };
+ SmallVector<Piece, 4> Pieces;
+
// Rewrite each partition.
for (auto &P : AS.partitions()) {
- Changed |= rewritePartition(AI, AS, P);
+ if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
+ Changed = true;
+ if (NewAI != &AI) {
+ uint64_t SizeOfByte = 8;
+ uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType());
+ // Don't include any padding.
+ uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
+ Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size));
+ }
+ }
++NumPartitions;
}
MaxPartitionsPerAlloca =
std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
+ // Migrate debug information from the old alloca to the new alloca(s)
+ // and the individual partitions.
+ if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
+ auto *Var = DbgDecl->getVariable();
+ auto *Expr = DbgDecl->getExpression();
+ DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
+ /*AllowUnresolved*/ false);
+ bool IsSplit = Pieces.size() > 1;
+ for (auto Piece : Pieces) {
+ // Create a piece expression describing the new partition or reuse AI's
+ // expression if there is only one partition.
+ auto *PieceExpr = Expr;
+ if (IsSplit || Expr->isBitPiece()) {
+ // If this alloca is already a scalar replacement of a larger aggregate,
+ // Piece.Offset describes the offset inside the scalar.
+ uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
+ uint64_t Start = Offset + Piece.Offset;
+ uint64_t Size = Piece.Size;
+ if (Expr->isBitPiece()) {
+ uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize();
+ if (Start >= AbsEnd)
+ // No need to describe a SROAed padding.
+ continue;
+ Size = std::min(Size, AbsEnd - Start);
+ }
+ PieceExpr = DIB.createBitPieceExpression(Start, Size);
+ }
+
+ // Remove any existing dbg.declare intrinsic describing the same alloca.
+ if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca))
+ OldDDI->eraseFromParent();
+
+ DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(),
+ &AI);
+ }
+ }
return Changed;
}
AI.eraseFromParent();
return true;
}
+ const DataLayout &DL = AI.getModule()->getDataLayout();
// Skip alloca forms that this analysis can't handle.
if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
- DL->getTypeAllocSize(AI.getAllocatedType()) == 0)
+ DL.getTypeAllocSize(AI.getAllocatedType()) == 0)
return false;
bool Changed = false;
// First, split any FCA loads and stores touching this alloca to promote
// better splitting and promotion opportunities.
- AggLoadStoreRewriter AggRewriter(*DL);
+ AggLoadStoreRewriter AggRewriter(DL);
Changed |= AggRewriter.rewrite(AI);
// Build the slices using a recursive instruction-visiting builder.
- AllocaSlices AS(*DL, AI);
+ AllocaSlices AS(DL, AI);
DEBUG(AS.print(dbgs()));
if (AS.isEscaped())
return Changed;
DeadInsts.insert(U);
}
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
DeletedAllocas.insert(AI);
+ if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(AI))
+ DbgDecl->eraseFromParent();
+ }
++NumDeleted;
I->eraseFromParent();
if (DT && !ForceSSAUpdater) {
DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, *DT, nullptr, AT);
+ PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
PromotableAllocas.clear();
return true;
}
DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP) {
- DEBUG(dbgs() << " Skipping SROA -- no target data!\n");
- return false;
- }
- DL = &DLP->getDataLayout();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
- AT = &getAnalysis<AssumptionTracker>();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
- I != E; ++I)
+ I != E; ++I) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
Worklist.insert(AI);
+ }
bool Changed = false;
// A set of deleted alloca instruction pointers which should be removed from
}
void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AssumptionTracker>();
+ AU.addRequired<AssumptionCacheTracker>();
if (RequiresDomTree)
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();