#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
+STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
STATISTIC(NumGlobals, "Number of allocas copied from constant global");
/// The alloca to promote.
AllocaInst *AI;
+ /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
+ /// looping and avoid redundant work.
+ SmallPtrSet<PHINode*, 8> CheckedPHIs;
+
/// isUnsafe - This is set to true if the alloca cannot be SROA'd.
bool isUnsafe : 1;
bool isSafeAllocaToScalarRepl(AllocaInst *AI);
void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
+ void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
+ AllocaInfo &Info);
void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
const Type *MemOpType, bool isStore, AllocaInfo &Info,
- Instruction *TheAccess);
+ Instruction *TheAccess, bool AllowWholeAccess);
bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
const Type *&IdxTy);
/// optimization, which scans the uses of an alloca and determines if it can
/// rewrite it in terms of a single new alloca that can be mem2reg'd.
class ConvertToScalarInfo {
- /// AllocaSize - The size of the alloca being considered.
+ /// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
const TargetData &TD;
/// also declared as a vector, we do want to promote to a vector.
bool HadAVector;
+ /// HadNonMemTransferAccess - True if there is at least one access to the
+ /// alloca that is not a MemTransferInst. We don't want to turn structs into
+ /// large integers unless there is some potential for optimization.
+ bool HadNonMemTransferAccess;
+
public:
explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
- : AllocaSize(Size), TD(td) {
- IsNotTrivial = false;
- VectorTy = 0;
- HadAVector = false;
- }
+ : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0),
+ HadAVector(false), HadNonMemTransferAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
- void MergeInType(const Type *In, uint64_t Offset);
+ void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
+ bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
} else {
+ unsigned BitWidth = AllocaSize * 8;
+ if (!HadAVector && !HadNonMemTransferAccess &&
+ !TD.fitsInLegalInteger(BitWidth))
+ return 0;
+
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
- NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+ NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
ConvertUsesToScalar(AI, NewAI, 0);
/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
/// so far at the offset specified by Offset (which is specified in bytes).
///
-/// There are two cases we handle here:
+/// There are three cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
-/// 2) A fully general blob of memory, which we turn into some (potentially
+/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
+/// <2 x float> and <4 x float>. Here we turn element accesses into insert
+/// and extract element operations, and <2 x float> accesses into a cast to
+/// <2 x double>, an extract, and a cast back to <2 x float>.
+/// 3) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
+ bool IsLoadOrStore) {
// If we already decided to turn this into a blob of integer memory, there is
// nothing to be done.
if (VectorTy && VectorTy->isVoidTy())
// If the In type is a vector that is the same size as the alloca, see if it
// matches the existing VecTy.
if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
- // Remember if we saw a vector type.
- HadAVector = true;
-
- if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
- // If we're storing/loading a vector of the right size, allow it as a
- // vector. If this the first vector we see, remember the type so that
- // we know the element size. If this is a subsequent access, ignore it
- // even if it is a differing type but the same size. Worst case we can
- // bitcast the resultant vectors.
- if (VectorTy == 0)
- VectorTy = VInTy;
+ if (MergeInVectorType(VInTy, Offset))
return;
- }
} else if (In->isFloatTy() || In->isDoubleTy() ||
(In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+ // Full width accesses can be ignored, because they can always be turned
+ // into bitcasts.
+ unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+ if (IsLoadOrStore && EltSize == AllocaSize)
+ return;
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
- unsigned EltSize = In->getPrimitiveSizeInBits()/8;
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
(VectorTy == 0 ||
cast<VectorType>(VectorTy)->getElementType()
VectorTy = Type::getVoidTy(In->getContext());
}
+/// MergeInVectorType - Handles the vector case of MergeInType, returning true
+/// if the type was successfully merged and false otherwise.
+bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+ uint64_t Offset) {
+ // Remember if we saw a vector type.
+ HadAVector = true;
+
+ // TODO: Support nonzero offsets?
+ if (Offset != 0)
+ return false;
+
+ // Only allow vectors that are a power-of-2 away from the size of the alloca.
+ if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+ return false;
+
+ // If this the first vector we see, remember the type so that we know the
+ // element size.
+ if (!VectorTy) {
+ VectorTy = VInTy;
+ return true;
+ }
+
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ unsigned InBitWidth = VInTy->getBitWidth();
+
+ // Vectors of the same size can be converted using a simple bitcast.
+ if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+ return true;
+
+ const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
+ const Type *InElementTy = cast<VectorType>(VInTy)->getElementType();
+
+ // Do not allow mixed integer and floating-point accesses from vectors of
+ // different sizes.
+ if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+ return false;
+
+ if (ElementTy->isFloatingPointTy()) {
+ // Only allow floating-point vectors of different sizes if they have the
+ // same element type.
+ // TODO: This could be loosened a bit, but would anything benefit?
+ if (ElementTy != InElementTy)
+ return false;
+
+ // There are no arbitrary-precision floating-point types, which limits the
+ // number of legal vector types with larger element types that we can form
+ // to bitcast and extract a subvector.
+ // TODO: We could support some more cases with mixed fp128 and double here.
+ if (!(BitWidth == 64 || BitWidth == 128) ||
+ !(InBitWidth == 64 || InBitWidth == 128))
+ return false;
+ } else {
+ assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+ "or floating-point.");
+ unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+ unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+ // Do not allow integer types smaller than a byte or types whose widths are
+ // not a multiple of a byte.
+ if (BitWidth < 8 || InBitWidth < 8 ||
+ BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+ return false;
+ }
+
+ // Pick the largest of the two vector types.
+ if (InBitWidth > BitWidth)
+ VectorTy = VInTy;
+
+ return true;
+}
+
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
/// its accesses to a single vector type, return true and set VecTy to
/// the new type. If we could convert the alloca into a single promotable
// Don't touch MMX operations.
if (LI->getType()->isX86_MMXTy())
return false;
- MergeInType(LI->getType(), Offset);
+ HadNonMemTransferAccess = true;
+ MergeInType(LI->getType(), Offset, true);
continue;
}
// Don't touch MMX operations.
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
- MergeInType(SI->getOperand(0)->getType(), Offset);
+ HadNonMemTransferAccess = true;
+ MergeInType(SI->getOperand(0)->getType(), Offset, true);
continue;
}
if (!CanConvertToScalar(GEP, Offset+GEPOffset))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
continue;
}
!isa<ConstantInt>(MSI->getLength()))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
continue;
}
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
- AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, 0));
+ AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
- if (GetUnderlyingObject(MTI->getSource(), 0) != OrigAI) {
+ if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
- } else if (GetUnderlyingObject(MTI->getDest(), 0) != OrigAI) {
+ } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
}
}
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input type must be an integer or float, and
+/// the resulting type must be an integer, float or double.
+static const Type *getScaledElementType(const Type *OldTy,
+ unsigned NewBitWidth) {
+ assert((OldTy->isIntegerTy() || OldTy->isFloatTy()) && "Partial vector "
+ "accesses must be scaled from integer or float elements.");
+
+ LLVMContext &Context = OldTy->getContext();
+
+ if (OldTy->isIntegerTy())
+ return Type::getIntNTy(Context, NewBitWidth);
+ if (NewBitWidth == 32)
+ return Type::getFloatTy(Context);
+ if (NewBitWidth == 64)
+ return Type::getDoubleTy(Context);
+
+ llvm_unreachable("Invalid type for a partial vector access of an alloca!");
+}
+
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
/// or vector value FromVal, extracting the bits from the offset specified by
/// Offset. This returns the value, which is of type ToType.
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
- if (ToType->isVectorTy())
+ unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+ if (ToTypeSize == AllocaSize)
return Builder.CreateBitCast(FromVal, ToType, "tmp");
+ if (ToType->isVectorTy()) {
+ assert(isPowerOf2_64(AllocaSize / ToTypeSize) &&
+ "Partial vector access of an alloca must have a power-of-2 size "
+ "ratio.");
+ assert(Offset == 0 && "Can't extract a value of a smaller vector type "
+ "from a nonzero offset.");
+
+ const Type *ToElementTy = cast<VectorType>(ToType)->getElementType();
+ const Type *CastElementTy = getScaledElementType(ToElementTy,
+ ToTypeSize * 8);
+ unsigned NumCastVectorElements = AllocaSize / ToTypeSize;
+
+ LLVMContext &Context = FromVal->getContext();
+ const Type *CastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+ Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+ Type::getInt32Ty(Context), 0), "tmp");
+ return Builder.CreateBitCast(Extract, ToType, "tmp");
+ }
+
// Otherwise it must be an element access.
unsigned Elt = 0;
if (Offset) {
if (ValSize == VecSize)
return Builder.CreateBitCast(SV, AllocaType, "tmp");
+ if (SV->getType()->isVectorTy() && isPowerOf2_64(VecSize / ValSize)) {
+ assert(Offset == 0 && "Can't insert a value of a smaller vector type at "
+ "a nonzero offset.");
+
+ const Type *ToElementTy =
+ cast<VectorType>(SV->getType())->getElementType();
+ const Type *CastElementTy = getScaledElementType(ToElementTy, ValSize);
+ unsigned NumCastVectorElements = VecSize / ValSize;
+
+ LLVMContext &Context = SV->getContext();
+ const Type *OldCastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
+
+ Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+ Value *Insert =
+ Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+ Type::getInt32Ty(Context), 0), "tmp");
+ return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+ }
+
uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
// Must be an element insertion.
};
} // end anon namespace
+/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers and then
+/// select between the result, allowing the load of the alloca to be promoted.
+/// From this:
+/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// %V2 = load i32* %Other
+/// %V = select i1 %cond, i32 %V1, i32 %V2
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
+ bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
+ bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+
+ for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || LI->isVolatile()) return false;
+
+ // Both operands to the select need to be dereferencable, either absolutely
+ // (e.g. allocas) or at this point because we can see other accesses to it.
+ if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ }
+
+ return true;
+}
+
+/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers in the pred
+/// blocks and then PHI the results, allowing the load of the alloca to be
+/// promoted.
+/// From this:
+/// %P2 = phi [i32* %Alloca, i32* %Other]
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// ...
+/// %V2 = load i32* %Other
+/// ...
+/// %V = phi [i32 %V1, i32 %V2]
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
+ // For now, we can only do this promotion if the load is in the same block as
+ // the PHI, and if there are no stores between the phi and load.
+ // TODO: Allow recursive phi users.
+ // TODO: Allow stores.
+ BasicBlock *BB = PN->getParent();
+ unsigned MaxAlign = 0;
+ for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || LI->isVolatile()) return false;
+
+ // For now we only allow loads in the same block as the PHI. This is a
+ // common case that happens when instcombine merges two loads through a PHI.
+ if (LI->getParent() != BB) return false;
+
+ // Ensure that there are no instructions between the PHI and the load that
+ // could store.
+ for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ }
+
+ // Okay, we know that we have one or more loads in the same block as the PHI.
+ // We can transform this if it is safe to push the loads into the predecessor
+ // blocks. The only thing to watch out for is that we can't put a possibly
+ // trapping load in the predecessor if it is a critical edge.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+
+ // If the predecessor has a single successor, then the edge isn't critical.
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ Value *InVal = PN->getIncomingValue(i);
+
+ // If the InVal is an invoke in the pred, we can't put a load on the edge.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+ if (II->getParent() == Pred)
+ return false;
+
+ // If this pointer is always safe to load, or if we can prove that there is
+ // already a load in the block, then we can move the load to the pred block.
+ if (InVal->isDereferenceablePointer() ||
+ isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+
+/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
+/// direct (non-volatile) loads and stores to it. If the alloca is close but
+/// not quite there, this will transform the code to allow promotion. As such,
+/// it is a non-pure predicate.
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
+ SetVector<Instruction*, SmallVector<Instruction*, 4>,
+ SmallPtrSet<Instruction*, 4> > InstsToRewrite;
+
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile())
+ return false;
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI || SI->isVolatile())
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ continue;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
+ // If the condition being selected on is a constant, fold the select, yes
+ // this does (rarely) happen early on.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
+ Value *Result = SI->getOperand(1+CI->isZero());
+ SI->replaceAllUsesWith(Result);
+ SI->eraseFromParent();
+
+ // This is very rare and we just scrambled the use list of AI, start
+ // over completely.
+ return tryToMakeAllocaBePromotable(AI, TD);
+ }
+
+ // If it is safe to turn "load (select c, AI, ptr)" into a select of two
+ // loads, then we can transform this by rewriting the select.
+ if (!isSafeSelectToSpeculate(SI, TD))
+ return false;
+
+ InstsToRewrite.insert(SI);
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ if (PN->use_empty()) { // Dead PHIs can be stripped.
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
+ // in the pred blocks, then we can transform this by rewriting the PHI.
+ if (!isSafePHIToSpeculate(PN, TD))
+ return false;
+
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ return false;
+ }
+
+ // If there are no instructions to rewrite, then all uses are load/stores and
+ // we're done!
+ if (InstsToRewrite.empty())
+ return true;
+
+ // If we have instructions that need to be rewritten for this to be promotable
+ // take care of it now.
+ for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
+ // Selects in InstsToRewrite only have load uses. Rewrite each as two
+ // loads with a new select.
+ while (!SI->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(SI->use_back());
+
+ IRBuilder<> Builder(LI);
+ LoadInst *TrueLoad =
+ Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
+ LoadInst *FalseLoad =
+ Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t");
+
+ // Transfer alignment and TBAA info if present.
+ TrueLoad->setAlignment(LI->getAlignment());
+ FalseLoad->setAlignment(LI->getAlignment());
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+ TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ }
+
+ Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
+ V->takeName(LI);
+ LI->replaceAllUsesWith(V);
+ LI->eraseFromParent();
+ }
+
+ // Now that all the loads are gone, the select is gone too.
+ SI->eraseFromParent();
+ continue;
+ }
+
+ // Otherwise, we have a PHI node which allows us to push the loads into the
+ // predecessors.
+ PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
+ if (PN->use_empty()) {
+ PN->eraseFromParent();
+ continue;
+ }
+
+ const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+ PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
+ PN->getName()+".ld", PN);
+
+ // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // matter which one we get and if any differ, it doesn't matter.
+ LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+ MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+ unsigned Align = SomeLoad->getAlignment();
+
+ // Rewrite all loads of the PN to use the new PHI.
+ while (!PN->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(PN->use_back());
+ LI->replaceAllUsesWith(NewPN);
+ LI->eraseFromParent();
+ }
+
+ // Inject loads into all of the pred blocks. Keep track of which blocks we
+ // insert them into in case we have multiple edges from the same block.
+ DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ LoadInst *&Load = InsertedLoads[Pred];
+ if (Load == 0) {
+ Load = new LoadInst(PN->getIncomingValue(i),
+ PN->getName() + "." + Pred->getName(),
+ Pred->getTerminator());
+ Load->setAlignment(Align);
+ if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ }
+
+ NewPN->addIncoming(Load, Pred);
+ }
+
+ PN->eraseFromParent();
+ }
+
+ ++NumAdjusted;
+ return true;
+}
+
+
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
DominatorTree *DT = 0;
// the entry node
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (isAllocaPromotable(AI))
+ if (tryToMakeAllocaBePromotable(AI, TD))
Allocas.push_back(AI);
if (Allocas.empty()) break;
if (Length == 0)
return MarkUnsafe(Info, User);
isSafeMemAccess(Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == 0, Info, MI);
+ UI.getOperandNo() == 0, Info, MI,
+ true /*AllowWholeAccess*/);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
if (LI->isVolatile())
return MarkUnsafe(Info, User);
const Type *LIType = LI->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
- LIType, false, Info, LI);
+ LIType, false, Info, LI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
const Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
- SIType, true, Info, SI);
+ SIType, true, Info, SI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
+ } else {
+ return MarkUnsafe(Info, User);
+ }
+ if (Info.isUnsafe) return;
+ }
+}
+
+
+/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
+/// derived from the alloca, we can often still split the alloca into elements.
+/// This is useful if we have a large alloca where one element is phi'd
+/// together somewhere: we can SRoA and promote all the other elements even if
+/// we end up not being able to promote this one.
+///
+/// All we require is that the uses of the PHI do not index into other parts of
+/// the alloca. The most important use case for this is single load and stores
+/// that are PHI'd together, which can happen due to code sinking.
+void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
+ AllocaInfo &Info) {
+ // If we've already checked this PHI, don't do it again.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ if (!Info.CheckedPHIs.insert(PN))
+ return;
+
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafePHISelectUseForScalarRepl(BC, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ // Only allow "bitcast" GEPs for simplicity. We could generalize this,
+ // but would have to prove that we're staying inside of an element being
+ // promoted.
+ if (!GEPI->hasAllZeroIndices())
+ return MarkUnsafe(Info, User);
+ isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (LI->isVolatile())
+ return MarkUnsafe(Info, User);
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (SI->isVolatile() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
} else {
return MarkUnsafe(Info, User);
}
/// alloca or has an offset and size that corresponds to a component element
/// within it. The offset checked here may have been formed from a GEP with a
/// pointer bitcasted to a different type.
+///
+/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
+/// unit. If false, it only allows accesses known to be in a single element.
void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
const Type *MemOpType, bool isStore,
- AllocaInfo &Info, Instruction *TheAccess) {
+ AllocaInfo &Info, Instruction *TheAccess,
+ bool AllowWholeAccess) {
// Check if this is a load/store of the entire alloca.
- if (Offset == 0 &&
+ if (Offset == 0 && AllowWholeAccess &&
MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
// This can be safe for MemIntrinsics (where MemOpType is 0) and integer
// loads/stores (which are essentially the same as the MemIntrinsics with
/// instruction.
void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts) {
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI++);
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
RewriteBitCast(BC, AI, Offset, NewElts);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ continue;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
RewriteGEP(GEPI, AI, Offset, NewElts);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ continue;
+ }
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
uint64_t MemSize = Length->getZExtValue();
if (Offset == 0 &&
RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
// Otherwise the intrinsic can only touch a single element and the
// address operand will be updated, so nothing else needs to be done.
- } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ continue;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
const Type *LIType = LI->getType();
if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
// If this is a load of the entire alloca to an integer, rewrite it.
RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
}
- } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
Value *Val = SI->getOperand(0);
const Type *SIType = Val->getType();
if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
// If this is a store of the entire alloca from an integer, rewrite it.
RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
}
+ continue;
+ }
+
+ if (isa<SelectInst>(User) || isa<PHINode>(User)) {
+ // If we have a PHI user of the alloca itself (as opposed to a GEP or
+ // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
+ // the new pointer.
+ if (!isa<AllocaInst>(I)) continue;
+
+ assert(Offset == 0 && NewElts[0] &&
+ "Direct alloca use should have a zero offset");
+
+ // If we have a use of the alloca, we know the derived uses will be
+ // utilizing just the first element of the scalarized result. Insert a
+ // bitcast of the first alloca before the user as required.
+ AllocaInst *NewAI = NewElts[0];
+ BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
+ NewAI->moveBefore(BCI);
+ TheUse = BCI;
+ continue;
}
}
}
if (EltTy != ValTy) {
unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
- StoreVal = ConstantVector::get(&Elts[0], NumElts);
+ StoreVal = ConstantVector::get(Elts);
}
}
new StoreInst(StoreVal, EltPtr, MI);
return false;
}
}
+
return true;
}