#define DEBUG_TYPE "sroa"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Constants.h"
#include "llvm/DIBuilder.h"
+#include "llvm/DataLayout.h"
#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/IRBuilder.h"
+#include "llvm/InstVisitor.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstVisitor.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
class UseBuilder;
friend class AllocaPartitioning::UseBuilder;
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// \brief Handle to alloca instruction to simplify method interfaces.
AllocaInst &AI;
#endif
void insertUse(Instruction &I, int64_t Offset, uint64_t Size,
bool IsSplittable = false) {
- // Completely skip uses which have a zero size or don't overlap the
- // allocation.
- if (Size == 0 ||
- (Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
- (Offset < 0 && (uint64_t)-Offset >= Size)) {
+ // Completely skip uses which have a zero size or start either before or
+ // past the end of the allocation.
+ if (Size == 0 || Offset < 0 || (uint64_t)Offset >= AllocSize) {
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
- << " which starts past the end of the " << AllocSize
- << " byte alloca:\n"
+ << " which has zero size or starts outside of the "
+ << AllocSize << " byte alloca:\n"
<< " alloca: " << P.AI << "\n"
<< " use: " << I << "\n");
return;
}
- // Clamp the start to the beginning of the allocation.
- if (Offset < 0) {
- DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
- << " to start at the beginning of the alloca:\n"
- << " alloca: " << P.AI << "\n"
- << " use: " << I << "\n");
- Size -= (uint64_t)-Offset;
- Offset = 0;
- }
-
uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size;
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
+ // NOTE! This may appear superficially to be something we could ignore
+ // entirely, but that is not so! There may be PHI-node uses where some
+ // instructions are dead but not others. We can't completely ignore the
+ // PHI node, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
void insertUse(Instruction &User, int64_t Offset, uint64_t Size) {
// If the use has a zero size or extends outside of the allocation, record
// it as a dead use for elimination later.
- if (Size == 0 || (uint64_t)Offset >= AllocSize ||
- (Offset < 0 && (uint64_t)-Offset >= Size))
+ if (Size == 0 || Offset < 0 || (uint64_t)Offset >= AllocSize)
return markAsDead(User);
- // Clamp the start to the beginning of the allocation.
- if (Offset < 0) {
- Size -= (uint64_t)-Offset;
- Offset = 0;
- }
-
uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size;
// Clamp the end offset to the end of the allocation. Note that this is
AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
:
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
PointerEscapingInstr(0) {
/// \brief A collection of instructions to delete.
/// We try to batch deletions to simplify code and make things a bit more
/// efficient.
- SmallVector<Instruction *, 8> DeadInsts;
-
- /// \brief A set to prevent repeatedly marking an instruction split into many
- /// uses as dead. Only used to guard insertion into DeadInsts.
- SmallPtrSet<Instruction *, 4> DeadSplitInsts;
+ SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts;
/// \brief Post-promotion worklist.
///
do {
LoadInst *LI = Loads.pop_back_val();
LI->replaceAllUsesWith(NewPN);
- Pass.DeadInsts.push_back(LI);
+ Pass.DeadInsts.insert(LI);
} while (!Loads.empty());
// Inject loads into all of the pred blocks.
DEBUG(dbgs() << " speculated to: " << *V << "\n");
LI->replaceAllUsesWith(V);
- Pass.DeadInsts.push_back(LI);
+ Pass.DeadInsts.insert(LI);
}
}
};
EndIndex > Ty->getNumElements())
return false;
- // FIXME: We should build shuffle vector instructions to handle
- // non-element-sized accesses.
- if ((EndOffset - BeginOffset) != ElementSize &&
- (EndOffset - BeginOffset) != VecSize)
- return false;
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ uint64_t NumElements = EndIndex - BeginIndex;
+ Type *PartitionTy
+ = (NumElements == 1) ? Ty->getElementType()
+ : VectorType::get(Ty->getElementType(), NumElements);
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
if (MI->isVolatile())
} else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
- } else if (!isa<LoadInst>(I->U->getUser()) &&
- !isa<StoreInst>(I->U->getUser())) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LI->isVolatile())
+ return false;
+ if (!canConvertValue(TD, PartitionTy, LI->getType()))
+ return false;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ if (SI->isVolatile())
+ return false;
+ if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
+ return false;
+ } else {
return false;
}
}
AllocaPartitioning::const_use_iterator I,
AllocaPartitioning::const_use_iterator E) {
uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy);
+ // Don't create integer types larger than the maximum bitwidth.
+ if (SizeInBits > IntegerType::MAX_INT_BITS)
+ return false;
// Don't try to handle allocas with bit-padding.
if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
IntegerType *Ty, uint64_t Offset,
const Twine &Name) {
+ DEBUG(dbgs() << " start: " << *V << "\n");
IntegerType *IntTy = cast<IntegerType>(V->getType());
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
"Element extends past full value");
uint64_t ShAmt = 8*Offset;
if (DL.isBigEndian())
ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
- if (ShAmt)
+ if (ShAmt) {
V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
+ DEBUG(dbgs() << " shifted: " << *V << "\n");
+ }
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot extract to a larger integer!");
- if (Ty != IntTy)
+ if (Ty != IntTy) {
V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
+ DEBUG(dbgs() << " trunced: " << *V << "\n");
+ }
return V;
}
IntegerType *Ty = cast<IntegerType>(V->getType());
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot insert a larger integer!");
- if (Ty != IntTy)
+ DEBUG(dbgs() << " start: " << *V << "\n");
+ if (Ty != IntTy) {
V = IRB.CreateZExt(V, IntTy, Name + ".ext");
+ DEBUG(dbgs() << " extended: " << *V << "\n");
+ }
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
"Element store outside of alloca store");
uint64_t ShAmt = 8*Offset;
if (DL.isBigEndian())
ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
- if (ShAmt)
+ if (ShAmt) {
V = IRB.CreateShl(V, ShAmt, Name + ".shift");
+ DEBUG(dbgs() << " shifted: " << *V << "\n");
+ }
if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
+ DEBUG(dbgs() << " masked: " << *Old << "\n");
V = IRB.CreateOr(Old, V, Name + ".insert");
+ DEBUG(dbgs() << " inserted: " << *V << "\n");
}
return V;
}
return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset);
}
- ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) {
+ unsigned getIndex(uint64_t Offset) {
assert(VecTy && "Can only call getIndex when rewriting a vector");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
uint32_t Index = RelOffset / ElementSize;
assert(Index * ElementSize == RelOffset);
- return IRB.getInt32(Index);
+ return Index;
}
void deleteIfTriviallyDead(Value *V) {
Instruction *I = cast<Instruction>(V);
if (isInstructionTriviallyDead(I))
- Pass.DeadInsts.push_back(I);
+ Pass.DeadInsts.insert(I);
}
- bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
- Value *Result;
- if (LI.getType() == VecTy->getElementType() ||
- BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
- Result = IRB.CreateExtractElement(
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
- getIndex(IRB, BeginOffset), getName(".extract"));
- } else {
- Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".load"));
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+ if (NumElements == 1) {
+ V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
+ getName(".extract"));
+ DEBUG(dbgs() << " extract: " << *V << "\n");
+ } else if (NumElements < VecTy->getNumElements()) {
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(NumElements);
+ for (unsigned i = BeginIndex; i != EndIndex; ++i)
+ Mask.push_back(IRB.getInt32(i));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ getName(".extract"));
+ DEBUG(dbgs() << " shuffle: " << *V << "\n");
}
- if (Result->getType() != LI.getType())
- Result = convertValue(TD, IRB, Result, LI.getType());
- LI.replaceAllUsesWith(Result);
- Pass.DeadInsts.push_back(&LI);
-
- DEBUG(dbgs() << " to: " << *Result << "\n");
- return true;
+ return V;
}
- bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+ Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
V = convertValue(TD, IRB, V, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
- getName(".extract"));
- LI.replaceAllUsesWith(V);
- Pass.DeadInsts.push_back(&LI);
- DEBUG(dbgs() << " to: " << *V << "\n");
- return true;
+ if (Offset > 0 || EndOffset < NewAllocaEndOffset)
+ V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
+ getName(".extract"));
+ return V;
}
bool visitLoadInst(LoadInst &LI) {
IRBuilder<> IRB(&LI);
uint64_t Size = EndOffset - BeginOffset;
- if (Size < TD.getTypeStoreSize(LI.getType())) {
+ bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of the original allocation it's behavior is undefined. Rather
+ // than trying to transform it, just replace it with undef.
+ // FIXME: We should do something more clever for functions being
+ // instrumented by asan.
+ // FIXME: Eventually, once ASan and friends can flush out bugs here, this
+ // should be transformed to a load of null making it unreachable.
+ uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
+ if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
+ LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
+ Pass.DeadInsts.insert(&LI);
+ deleteIfTriviallyDead(OldOp);
+ DEBUG(dbgs() << " to: undef!!\n");
+ return true;
+ }
+
+ Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
+ bool IsPtrAdjusted = false;
+ Value *V;
+ if (VecTy) {
+ V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
+ } else if (IntTy && LI.getType()->isIntegerTy()) {
+ V = rewriteIntegerLoad(IRB, LI);
+ } else if (BeginOffset == NewAllocaBeginOffset &&
+ canConvertValue(TD, NewAllocaTy, LI.getType())) {
+ V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ LI.isVolatile(), getName(".load"));
+ } else {
+ Type *LTy = TargetTy->getPointerTo();
+ V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
+ getPartitionTypeAlign(TargetTy),
+ LI.isVolatile(), getName(".load"));
+ IsPtrAdjusted = true;
+ }
+ V = convertValue(TD, IRB, V, TargetTy);
+
+ if (IsSplitIntLoad) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(LI.getType()->getIntegerBitWidth() ==
TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
"Only alloca-wide loads can be split and recomposed");
- IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8);
- bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
- canConvertValue(TD, NewAllocaTy, NarrowTy);
- Value *V;
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
- if (IsConvertable)
- V = convertValue(TD, IRB,
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load")),
- NarrowTy);
- else
- V = IRB.CreateAlignedLoad(
- getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
- getPartitionTypeAlign(NarrowTy), getName(".load"));
// Create a placeholder value with the same type as LI to use as the
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
delete Placeholder;
- if (Pass.DeadSplitInsts.insert(&LI))
- Pass.DeadInsts.push_back(&LI);
- DEBUG(dbgs() << " to: " << *V << "\n");
- return IsConvertable;
- }
-
- if (VecTy)
- return rewriteVectorizedLoadInst(IRB, LI, OldOp);
- if (IntTy && LI.getType()->isIntegerTy())
- return rewriteIntegerLoad(IRB, LI);
-
- if (BeginOffset == NewAllocaBeginOffset &&
- canConvertValue(TD, NewAllocaTy, LI.getType())) {
- Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- LI.isVolatile(), getName(".load"));
- Value *NewV = convertValue(TD, IRB, NewLI, LI.getType());
- LI.replaceAllUsesWith(NewV);
- Pass.DeadInsts.push_back(&LI);
-
- DEBUG(dbgs() << " to: " << *NewLI << "\n");
- return !LI.isVolatile();
+ } else {
+ LI.replaceAllUsesWith(V);
}
- assert(!IntTy && "Invalid load found with int-op widening enabled");
-
- Value *NewPtr = getAdjustedAllocaPtr(IRB,
- LI.getPointerOperand()->getType());
- LI.setOperand(0, NewPtr);
- LI.setAlignment(getPartitionTypeAlign(LI.getType()));
- DEBUG(dbgs() << " to: " << LI << "\n");
-
+ Pass.DeadInsts.insert(&LI);
deleteIfTriviallyDead(OldOp);
- return NewPtr == &NewAI && !LI.isVolatile();
- }
-
- bool rewriteWideStoreInst(IRBuilder<> &IRB, StoreInst &SI, Type *ValueTy,
- unsigned Size) {
- assert(!SI.isVolatile());
- assert(ValueTy->isIntegerTy() &&
- "Only integer type loads and stores are split");
- assert(ValueTy->getIntegerBitWidth() ==
- TD.getTypeStoreSizeInBits(ValueTy) &&
- "Non-byte-multiple bit width");
- assert(ValueTy->getIntegerBitWidth() ==
- TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide stores can be split and recomposed");
- IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
- Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
- BeginOffset, getName(".extract"));
- StoreInst *NewSI;
- bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
- canConvertValue(TD, NarrowTy, NewAllocaTy);
- if (IsConvertable)
- NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
- &NewAI, NewAI.getAlignment());
- else
- NewSI = IRB.CreateAlignedStore(
- V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
- getPartitionTypeAlign(NarrowTy));
- (void)NewSI;
- if (Pass.DeadSplitInsts.insert(&SI))
- Pass.DeadInsts.push_back(&SI);
-
- DEBUG(dbgs() << " to: " << *NewSI << "\n");
- return IsConvertable;
- }
-
- bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI,
- Value *OldOp) {
- Value *V = SI.getValueOperand();
- Type *ValueTy = V->getType();
- if (ValueTy == ElementTy ||
- BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
- if (ValueTy != ElementTy)
- V = convertValue(TD, IRB, V, ElementTy);
+ DEBUG(dbgs() << " to: " << *V << "\n");
+ return !LI.isVolatile() && !IsPtrAdjusted;
+ }
+
+ bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+ StoreInst &SI, Value *OldOp) {
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+ Type *PartitionTy
+ = (NumElements == 1) ? ElementTy
+ : VectorType::get(ElementTy, NumElements);
+ if (V->getType() != PartitionTy)
+ V = convertValue(TD, IRB, V, PartitionTy);
+ if (NumElements < VecTy->getNumElements()) {
+ // We need to mix in the existing elements.
LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".load"));
- V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
- getName(".insert"));
- } else if (ValueTy != VecTy) {
- uint64_t Size = EndOffset - BeginOffset;
- if (Size < TD.getTypeStoreSize(ValueTy))
- return rewriteWideStoreInst(IRB, SI, ValueTy, Size);
-
+ if (NumElements == 1) {
+ V = IRB.CreateInsertElement(LI, V, IRB.getInt32(BeginIndex),
+ getName(".insert"));
+ DEBUG(dbgs() << " insert: " << *V << "\n");
+ } else {
+ // When inserting a smaller vector into the larger to store, we first
+ // use a shuffle vector to widen it with undef elements, and then
+ // a second shuffle vector to select between the loaded vector and the
+ // incoming vector.
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(VecTy->getNumElements());
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i - BeginIndex));
+ else
+ Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ getName(".expand"));
+ DEBUG(dbgs() << " shuffle1: " << *V << "\n");
+
+ Mask.clear();
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i));
+ else
+ Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
+ V = IRB.CreateShuffleVector(V, LI, ConstantVector::get(Mask),
+ getName("insert"));
+ DEBUG(dbgs() << " shuffle2: " << *V << "\n");
+ }
+ } else {
V = convertValue(TD, IRB, V, VecTy);
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
- Pass.DeadInsts.push_back(&SI);
+ Pass.DeadInsts.insert(&SI);
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
- bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
+ bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
- Value *V = SI.getValueOperand();
if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".oldload"));
}
V = convertValue(TD, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
- Pass.DeadInsts.push_back(&SI);
+ Pass.DeadInsts.insert(&SI);
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
assert(OldOp == OldPtr);
IRBuilder<> IRB(&SI);
- if (VecTy)
- return rewriteVectorizedStoreInst(IRB, SI, OldOp);
- Type *ValueTy = SI.getValueOperand()->getType();
-
- uint64_t Size = EndOffset - BeginOffset;
- if (Size < TD.getTypeStoreSize(ValueTy))
- return rewriteWideStoreInst(IRB, SI, ValueTy, Size);
-
- if (IntTy && ValueTy->isIntegerTy())
- return rewriteIntegerStore(IRB, SI);
+ Value *V = SI.getValueOperand();
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after promoting this alloca.
- if (ValueTy->isPointerTy())
- if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand()
- ->stripInBoundsOffsets()))
+ if (V->getType()->isPointerTy())
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
Pass.PostPromotionWorklist.insert(AI);
- if (BeginOffset == NewAllocaBeginOffset &&
- canConvertValue(TD, ValueTy, NewAllocaTy)) {
- Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy);
- StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(),
- SI.isVolatile());
- (void)NewSI;
- Pass.DeadInsts.push_back(&SI);
-
- DEBUG(dbgs() << " to: " << *NewSI << "\n");
- return !SI.isVolatile();
+ uint64_t Size = EndOffset - BeginOffset;
+ if (Size < TD.getTypeStoreSize(V->getType())) {
+ assert(!SI.isVolatile());
+ assert(V->getType()->isIntegerTy() &&
+ "Only integer type loads and stores are split");
+ assert(V->getType()->getIntegerBitWidth() ==
+ TD.getTypeStoreSizeInBits(V->getType()) &&
+ "Non-byte-multiple bit width");
+ assert(V->getType()->getIntegerBitWidth() ==
+ TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+ "Only alloca-wide stores can be split and recomposed");
+ IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+ V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
+ getName(".extract"));
}
- assert(!IntTy && "Invalid store found with int-op widening enabled");
-
- Value *NewPtr = getAdjustedAllocaPtr(IRB,
- SI.getPointerOperand()->getType());
- SI.setOperand(1, NewPtr);
- SI.setAlignment(getPartitionTypeAlign(SI.getValueOperand()->getType()));
- DEBUG(dbgs() << " to: " << SI << "\n");
+ if (VecTy)
+ return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+ if (IntTy && V->getType()->isIntegerTy())
+ return rewriteIntegerStore(IRB, V, SI);
+ StoreInst *NewSI;
+ if (BeginOffset == NewAllocaBeginOffset &&
+ canConvertValue(TD, V->getType(), NewAllocaTy)) {
+ V = convertValue(TD, IRB, V, NewAllocaTy);
+ NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+ SI.isVolatile());
+ } else {
+ Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo());
+ NewSI = IRB.CreateAlignedStore(V, NewPtr,
+ getPartitionTypeAlign(V->getType()),
+ SI.isVolatile());
+ }
+ (void)NewSI;
+ Pass.DeadInsts.insert(&SI);
deleteIfTriviallyDead(OldOp);
- return NewPtr == &NewAI && !SI.isVolatile();
+
+ DEBUG(dbgs() << " to: " << *NewSI << "\n");
+ return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
}
bool visitMemSetInst(MemSetInst &II) {
}
// Record this instruction for deletion.
- if (Pass.DeadSplitInsts.insert(&II))
- Pass.DeadInsts.push_back(&II);
+ Pass.DeadInsts.insert(&II);
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
NewAI.getAlignment(),
getName(".load")),
- V, getIndex(IRB, BeginOffset),
+ V, IRB.getInt32(getIndex(BeginOffset)),
getName(".insert")),
&NewAI, NewAI.getAlignment());
(void)Store;
return false;
}
// Record this instruction for deletion.
- if (Pass.DeadSplitInsts.insert(&II))
- Pass.DeadInsts.push_back(&II);
+ Pass.DeadInsts.insert(&II);
bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
EndOffset == NewAllocaEndOffset;
// We have to extract rather than load.
Src = IRB.CreateExtractElement(
IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
- getIndex(IRB, BeginOffset),
+ IRB.getInt32(getIndex(BeginOffset)),
getName(".copyextract"));
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
// We have to insert into a loaded copy before storing.
Src = IRB.CreateInsertElement(
IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
- Src, getIndex(IRB, BeginOffset),
+ Src, IRB.getInt32(getIndex(BeginOffset)),
getName(".insert"));
}
assert(II.getArgOperand(1) == OldPtr);
// Record this instruction for deletion.
- if (Pass.DeadSplitInsts.insert(&II))
- Pass.DeadInsts.push_back(&II);
+ Pass.DeadInsts.insert(&II);
ConstantInt *Size
= ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
DI != DE; ++DI) {
Changed = true;
(*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
- DeadInsts.push_back(*DI);
+ DeadInsts.insert(*DI);
}
for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(),
DE = P.dead_op_end();
if (Instruction *OldI = dyn_cast<Instruction>(OldV))
if (isInstructionTriviallyDead(OldI)) {
Changed = true;
- DeadInsts.push_back(OldI);
+ DeadInsts.insert(OldI);
}
}
/// We also record the alloca instructions deleted here so that they aren't
/// subsequently handed to mem2reg to promote.
void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
- DeadSplitInsts.clear();
while (!DeadInsts.empty()) {
Instruction *I = DeadInsts.pop_back_val();
DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
// Zero out the operand and see if it becomes trivially dead.
*OI = 0;
if (isInstructionTriviallyDead(U))
- DeadInsts.push_back(U);
+ DeadInsts.insert(U);
}
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))