#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
- struct VISIBILITY_HIDDEN SROA : public FunctionPass {
+ struct SROA : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
explicit SROA(signed T = -1) : FunctionPass(&ID) {
if (T == -1)
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
AU.addRequired<DominanceFrontier>();
- AU.addRequired<TargetData>();
AU.setPreservesCFG();
}
private:
TargetData *TD;
+ /// DeadInsts - Keep track of instructions we have made dead, so that
+ /// we can remove them after we are done working.
+ SmallVector<Value*, 32> DeadInsts;
+
/// AllocaInfo - When analyzing uses of an alloca instruction, this captures
/// information about the uses. All these fields are initialized to false
/// and set to true when something is learned.
/// isUnsafe - This is set to true if the alloca cannot be SROA'd.
bool isUnsafe : 1;
- /// needsCleanup - This is set to true if there is some use of the alloca
- /// that requires cleanup.
- bool needsCleanup : 1;
-
/// isMemCpySrc - This is true if this aggregate is memcpy'd from.
bool isMemCpySrc : 1;
bool isMemCpyDst : 1;
AllocaInfo()
- : isUnsafe(false), needsCleanup(false),
- isMemCpySrc(false), isMemCpyDst(false) {}
+ : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {}
};
unsigned SRThreshold;
void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
- int isSafeAllocaToScalarRepl(AllocationInst *AI);
-
- void isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
- AllocaInfo &Info);
- void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
- AllocaInfo &Info);
- void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
- unsigned OpNo, AllocaInfo &Info);
- void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI,
- AllocaInfo &Info);
+ bool isSafeAllocaToScalarRepl(AllocaInst *AI);
+
+ void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ AllocaInfo &Info);
+ void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset,
+ AllocaInfo &Info);
+ void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore, AllocaInfo &Info);
+ bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
+ uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
+ const Type *&IdxTy);
- void DoScalarReplacement(AllocationInst *AI,
- std::vector<AllocationInst*> &WorkList);
- void CleanupGEP(GetElementPtrInst *GEP);
- void CleanupAllocaUsers(AllocationInst *AI);
- AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base);
+ void DoScalarReplacement(AllocaInst *AI,
+ std::vector<AllocaInst*> &WorkList);
+ void DeleteDeadInstructions();
+ AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
- void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
- SmallVector<AllocaInst*, 32> &NewElts);
-
- void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
- AllocationInst *AI,
+ void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+ AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
- void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI,
+ void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
- void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
+ void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
uint64_t Offset, IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
uint64_t Offset, IRBuilder<> &Builder);
- static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI);
+ static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
};
}
bool SROA::runOnFunction(Function &F) {
- TD = &getAnalysis<TargetData>();
-
+ TD = getAnalysisIfAvailable<TargetData>();
+
bool Changed = performPromotion(F);
+
+ // FIXME: ScalarRepl currently depends on TargetData more than it
+ // theoretically needs to. It should be refactored in order to support
+ // target-independent IR. Until this is done, just skip the actual
+ // scalar-replacement portion of this pass.
+ if (!TD) return Changed;
+
while (1) {
bool LocalChange = performScalarRepl(F);
if (!LocalChange) break; // No need to repromote if no scalarrepl
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT, DF, F.getContext());
+ PromoteMemToReg(Allocas, DT, DF);
NumPromoted += Allocas.size();
Changed = true;
}
return Changed;
}
-/// getNumSAElements - Return the number of elements in the specific struct or
-/// array.
-static uint64_t getNumSAElements(const Type *T) {
+/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
+/// SROA. It must be a struct or array type with a small number of elements.
+static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
+ const Type *T = AI->getAllocatedType();
+ // Do not promote any struct into more than 32 separate vars.
if (const StructType *ST = dyn_cast<StructType>(T))
- return ST->getNumElements();
- return cast<ArrayType>(T)->getNumElements();
+ return ST->getNumElements() <= 32;
+ // Arrays are much less likely to be safe for SROA; only consider
+ // them if they are very small.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+ return AT->getNumElements() <= 8;
+ return false;
}
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
// them if they are only used by getelementptr instructions.
//
bool SROA::performScalarRepl(Function &F) {
- std::vector<AllocationInst*> WorkList;
+ std::vector<AllocaInst*> WorkList;
// Scan the entry basic block, adding any alloca's and mallocs to the worklist
BasicBlock &BB = F.getEntryBlock();
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
- if (AllocationInst *A = dyn_cast<AllocationInst>(I))
+ if (AllocaInst *A = dyn_cast<AllocaInst>(I))
WorkList.push_back(A);
// Process the worklist
bool Changed = false;
while (!WorkList.empty()) {
- AllocationInst *AI = WorkList.back();
+ AllocaInst *AI = WorkList.back();
WorkList.pop_back();
// Handle dead allocas trivially. These can be formed by SROA'ing arrays
// constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
// is only subsequently read.
if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
- DOUT << "Found alloca equal to global: " << *AI;
- DOUT << " memcpy = " << *TheCopy;
- Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
+ DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n');
+ Constant *TheSrc = cast<Constant>(TheCopy->getOperand(1));
AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
TheCopy->eraseFromParent(); // Don't mutate the global.
AI->eraseFromParent();
// Do not promote [0 x %struct].
if (AllocaSize == 0) continue;
+ // If the alloca looks like a good candidate for scalar replacement, and if
+ // all its users can be transformed, then split up the aggregate into its
+ // separate elements.
+ if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
+ DoScalarReplacement(AI, WorkList);
+ Changed = true;
+ continue;
+ }
+
// Do not promote any struct whose size is too big.
if (AllocaSize > SRThreshold) continue;
- if ((isa<StructType>(AI->getAllocatedType()) ||
- isa<ArrayType>(AI->getAllocatedType())) &&
- // Do not promote any struct into more than "32" separate vars.
- getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) {
- // Check that all of the users of the allocation are capable of being
- // transformed.
- switch (isSafeAllocaToScalarRepl(AI)) {
- default: llvm_unreachable("Unexpected value!");
- case 0: // Not safe to scalar replace.
- break;
- case 1: // Safe, but requires cleanup/canonicalizations first
- CleanupAllocaUsers(AI);
- // FALL THROUGH.
- case 3: // Safe to scalar replace.
- DoScalarReplacement(AI, WorkList);
- Changed = true;
- continue;
- }
- }
-
// If we can turn this aggregate value (potentially with casts) into a
// simple scalar value that can be mem2reg'd into a register value.
// IsNotTrivial tracks whether this is something that mem2reg could have
// random stuff that doesn't use vectors (e.g. <9 x double>) because then
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
- if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
- DOUT << "CONVERT TO VECTOR: " << *AI << " TYPE = " << *VectorTy <<"\n";
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+ DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
+ << *VectorTy << '\n');
// Create and insert the vector alloca.
NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin());
ConvertUsesToScalar(AI, NewAI, 0);
} else {
- DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n";
+ DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
/// predicate, do SROA now.
-void SROA::DoScalarReplacement(AllocationInst *AI,
- std::vector<AllocationInst*> &WorkList) {
- DOUT << "Found inst to SROA: " << *AI;
+void SROA::DoScalarReplacement(AllocaInst *AI,
+ std::vector<AllocaInst*> &WorkList) {
+ DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
SmallVector<AllocaInst*, 32> ElementAllocas;
if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
}
}
- // Now that we have created the alloca instructions that we want to use,
- // expand the getelementptr instructions to use them.
- //
- while (!AI->use_empty()) {
- Instruction *User = cast<Instruction>(AI->use_back());
- if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) {
- RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas);
- BCInst->eraseFromParent();
- continue;
- }
-
- // Replace:
- // %res = load { i32, i32 }* %alloc
- // with:
- // %load.0 = load i32* %alloc.0
- // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
- // %load.1 = load i32* %alloc.1
- // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
- // (Also works for arrays instead of structs)
- if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- Value *Insert = UndefValue::get(LI->getType());
- for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
- Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
- Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
- }
- LI->replaceAllUsesWith(Insert);
- LI->eraseFromParent();
- continue;
- }
+ // Now that we have created the new alloca instructions, rewrite all the
+ // uses of the old alloca.
+ RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
- // Replace:
- // store { i32, i32 } %val, { i32, i32 }* %alloc
- // with:
- // %val.0 = extractvalue { i32, i32 } %val, 0
- // store i32 %val.0, i32* %alloc.0
- // %val.1 = extractvalue { i32, i32 } %val, 1
- // store i32 %val.1, i32* %alloc.1
- // (Also works for arrays instead of structs)
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- Value *Val = SI->getOperand(0);
- for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
- Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
- new StoreInst(Extract, ElementAllocas[i], SI);
- }
- SI->eraseFromParent();
- continue;
- }
-
- GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User);
- // We now know that the GEP is of the form: GEP <ptr>, 0, <cst>
- unsigned Idx =
- (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
-
- assert(Idx < ElementAllocas.size() && "Index out of range?");
- AllocaInst *AllocaToUse = ElementAllocas[Idx];
-
- Value *RepValue;
- if (GEPI->getNumOperands() == 3) {
- // Do not insert a new getelementptr instruction with zero indices, only
- // to have it optimized out later.
- RepValue = AllocaToUse;
- } else {
- // We are indexing deeply into the structure, so we still need a
- // getelement ptr instruction to finish the indexing. This may be
- // expanded itself once the worklist is rerun.
- //
- SmallVector<Value*, 8> NewArgs;
- NewArgs.push_back(Constant::getNullValue(
- Type::getInt32Ty(AI->getContext())));
- NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());
- RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),
- NewArgs.end(), "", GEPI);
- RepValue->takeName(GEPI);
- }
-
- // If this GEP is to the start of the aggregate, check for memcpys.
- if (Idx == 0 && GEPI->hasAllZeroIndices())
- RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas);
-
- // Move all of the users over to the new GEP.
- GEPI->replaceAllUsesWith(RepValue);
- // Delete the old GEP
- GEPI->eraseFromParent();
- }
-
- // Finally, delete the Alloca instruction
+ // Now erase any instructions that were made dead while rewriting the alloca.
+ DeleteDeadInstructions();
AI->eraseFromParent();
+
NumReplaced++;
}
-
-/// isSafeElementUse - Check to see if this use is an allowed use for a
-/// getelementptr instruction of an array aggregate allocation. isFirstElt
-/// indicates whether Ptr is known to the start of the aggregate.
-///
-void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
- AllocaInfo &Info) {
- for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
- I != E; ++I) {
- Instruction *User = cast<Instruction>(*I);
- switch (User->getOpcode()) {
- case Instruction::Load: break;
- case Instruction::Store:
- // Store is ok if storing INTO the pointer, not storing the pointer
- if (User->getOperand(0) == Ptr) return MarkUnsafe(Info);
- break;
- case Instruction::GetElementPtr: {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(User);
- bool AreAllZeroIndices = isFirstElt;
- if (GEP->getNumOperands() > 1) {
- if (!isa<ConstantInt>(GEP->getOperand(1)) ||
- !cast<ConstantInt>(GEP->getOperand(1))->isZero())
- // Using pointer arithmetic to navigate the array.
- return MarkUnsafe(Info);
-
- if (AreAllZeroIndices)
- AreAllZeroIndices = GEP->hasAllZeroIndices();
- }
- isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
- if (Info.isUnsafe) return;
- break;
- }
- case Instruction::BitCast:
- if (isFirstElt) {
- isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info);
- if (Info.isUnsafe) return;
- break;
- }
- DOUT << " Transformation preventing inst: " << *User;
- return MarkUnsafe(Info);
- case Instruction::Call:
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
- if (isFirstElt) {
- isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info);
- if (Info.isUnsafe) return;
- break;
- }
+/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
+/// recursively including all their operands that become trivially dead.
+void SROA::DeleteDeadInstructions() {
+ while (!DeadInsts.empty()) {
+ Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ // Zero out the operand and see if it becomes trivially dead.
+ // (But, don't add allocas to the dead instruction list -- they are
+ // already on the worklist and will be deleted separately.)
+ *OI = 0;
+ if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
+ DeadInsts.push_back(U);
}
- DOUT << " Transformation preventing inst: " << *User;
- return MarkUnsafe(Info);
- default:
- DOUT << " Transformation preventing inst: " << *User;
- return MarkUnsafe(Info);
- }
- }
- return; // All users look ok :)
-}
-/// AllUsersAreLoads - Return true if all users of this value are loads.
-static bool AllUsersAreLoads(Value *Ptr) {
- for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
- I != E; ++I)
- if (cast<Instruction>(*I)->getOpcode() != Instruction::Load)
- return false;
- return true;
+ I->eraseFromParent();
+ }
}
+
+/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
+/// performing scalar replacement of alloca AI. The results are flagged in
+/// the Info parameter. Offset indicates the position within AI that is
+/// referenced by this instruction.
+void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ AllocaInfo &Info) {
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
-/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an
-/// aggregate allocation.
-///
-void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
- AllocaInfo &Info) {
- if (BitCastInst *C = dyn_cast<BitCastInst>(User))
- return isSafeUseOfBitCastedAllocation(C, AI, Info);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(User))
- if (!LI->isVolatile())
- return;// Loads (returning a first class aggregrate) are always rewritable
-
- if (StoreInst *SI = dyn_cast<StoreInst>(User))
- if (!SI->isVolatile() && SI->getOperand(0) != AI)
- return;// Store is ok if storing INTO the pointer, not storing the pointer
-
- GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User);
- if (GEPI == 0)
- return MarkUnsafe(Info);
-
- gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI);
-
- // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
- if (I == E ||
- I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) {
- return MarkUnsafe(Info);
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafeForScalarRepl(BC, AI, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ uint64_t GEPOffset = Offset;
+ isSafeGEP(GEPI, AI, GEPOffset, Info);
+ if (!Info.isUnsafe)
+ isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ if (Length)
+ isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
+ UI.getOperandNo() == 0, Info);
+ else
+ MarkUnsafe(Info);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (!LI->isVolatile()) {
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info);
+ } else
+ MarkUnsafe(Info);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (!SI->isVolatile() && SI->getOperand(0) != I) {
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info);
+ } else
+ MarkUnsafe(Info);
+ } else {
+ DEBUG(errs() << " Transformation preventing inst: " << *User << '\n');
+ MarkUnsafe(Info);
+ }
+ if (Info.isUnsafe) return;
}
+}
- ++I;
- if (I == E) return MarkUnsafe(Info); // ran out of GEP indices??
+/// isSafeGEP - Check if a GEP instruction can be handled for scalar
+/// replacement. It is safe when all the indices are constant, in-bounds
+/// references, and when the resulting offset corresponds to an element within
+/// the alloca type. The results are flagged in the Info parameter. Upon
+/// return, Offset is adjusted as specified by the GEP indices.
+void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
+ uint64_t &Offset, AllocaInfo &Info) {
+ gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
+ if (GEPIt == E)
+ return;
- bool IsAllZeroIndices = true;
-
- // If the first index is a non-constant index into an array, see if we can
- // handle it as a special case.
- if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
- if (!isa<ConstantInt>(I.getOperand())) {
- IsAllZeroIndices = 0;
- uint64_t NumElements = AT->getNumElements();
-
- // If this is an array index and the index is not constant, we cannot
- // promote... that is unless the array has exactly one or two elements in
- // it, in which case we CAN promote it, but we have to canonicalize this
- // out if this is the only problem.
- if ((NumElements == 1 || NumElements == 2) &&
- AllUsersAreLoads(GEPI)) {
- Info.needsCleanup = true;
- return; // Canonicalization required!
- }
- return MarkUnsafe(Info);
- }
- }
-
// Walk through the GEP type indices, checking the types that this indexes
// into.
- for (; I != E; ++I) {
+ for (; GEPIt != E; ++GEPIt) {
// Ignore struct elements, no extra checking needed for these.
- if (isa<StructType>(*I))
+ if ((*GEPIt)->isStructTy())
continue;
-
- ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand());
- if (!IdxVal) return MarkUnsafe(Info);
- // Are all indices still zero?
- IsAllZeroIndices &= IdxVal->isZero();
-
- if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
- // This GEP indexes an array. Verify that this is an in-range constant
- // integer. Specifically, consider A[0][i]. We cannot know that the user
- // isn't doing invalid things like allowing i to index an out-of-range
- // subscript that accesses A[1]. Because of this, we have to reject SROA
- // of any accesses into structs where any of the components are variables.
- if (IdxVal->getZExtValue() >= AT->getNumElements())
- return MarkUnsafe(Info);
- } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) {
- if (IdxVal->getZExtValue() >= VT->getNumElements())
- return MarkUnsafe(Info);
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
+ if (!IdxVal)
+ return MarkUnsafe(Info);
+ }
+
+ // Compute the offset due to this GEP and check if the alloca has a
+ // component element at that offset.
+ SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0))
+ MarkUnsafe(Info);
+}
+
+/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
+/// alloca or has an offset and size that corresponds to a component element
+/// within it. The offset checked here may have been formed from a GEP with a
+/// pointer bitcasted to a different type.
+void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore,
+ AllocaInfo &Info) {
+ // Check if this is a load/store of the entire alloca.
+ if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) {
+ bool UsesAggregateType = (MemOpType == AI->getAllocatedType());
+ // This is safe for MemIntrinsics (where MemOpType is 0), integer types
+ // (which are essentially the same as the MemIntrinsics, especially with
+ // regard to copying padding between elements), or references using the
+ // aggregate type of the alloca.
+ if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
+ if (!UsesAggregateType) {
+ if (isStore)
+ Info.isMemCpyDst = true;
+ else
+ Info.isMemCpySrc = true;
+ }
+ return;
}
}
-
- // If there are any non-simple uses of this getelementptr, make sure to reject
- // them.
- return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info);
+ // Check if the offset/size correspond to a component within the alloca type.
+ const Type *T = AI->getAllocatedType();
+ if (TypeHasComponent(T, Offset, MemSize))
+ return;
+
+ return MarkUnsafe(Info);
}
-/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory
-/// intrinsic can be promoted by SROA. At this point, we know that the operand
-/// of the memintrinsic is a pointer to the beginning of the allocation.
-void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
- unsigned OpNo, AllocaInfo &Info) {
- // If not constant length, give up.
- ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- if (!Length) return MarkUnsafe(Info);
-
- // If not the whole aggregate, give up.
- if (Length->getZExtValue() !=
- TD->getTypeAllocSize(AI->getType()->getElementType()))
- return MarkUnsafe(Info);
-
- // We only know about memcpy/memset/memmove.
- if (!isa<MemIntrinsic>(MI))
- return MarkUnsafe(Info);
-
- // Otherwise, we can transform it. Determine whether this is a memcpy/set
- // into or out of the aggregate.
- if (OpNo == 1)
- Info.isMemCpyDst = true;
- else {
- assert(OpNo == 2);
- Info.isMemCpySrc = true;
+/// TypeHasComponent - Return true if T has a component type with the
+/// specified offset and size. If Size is zero, do not check the size.
+bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
+ const Type *EltTy;
+ uint64_t EltSize;
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ const StructLayout *Layout = TD->getStructLayout(ST);
+ unsigned EltIdx = Layout->getElementContainingOffset(Offset);
+ EltTy = ST->getContainedType(EltIdx);
+ EltSize = TD->getTypeAllocSize(EltTy);
+ Offset -= Layout->getElementOffset(EltIdx);
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ EltTy = AT->getElementType();
+ EltSize = TD->getTypeAllocSize(EltTy);
+ if (Offset >= AT->getNumElements() * EltSize)
+ return false;
+ Offset %= EltSize;
+ } else {
+ return false;
}
+ if (Offset == 0 && (Size == 0 || EltSize == Size))
+ return true;
+ // Check if the component spans multiple elements.
+ if (Offset + Size > EltSize)
+ return false;
+ return TypeHasComponent(EltTy, Offset, Size);
}
-/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast
-/// are
-void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI,
- AllocaInfo &Info) {
- for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
- UI != E; ++UI) {
- if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) {
- isSafeUseOfBitCastedAllocation(BCU, AI, Info);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
- isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
- if (SI->isVolatile())
- return MarkUnsafe(Info);
-
- // If storing the entire alloca in one chunk through a bitcasted pointer
- // to integer, we can transform it. This happens (for example) when you
- // cast a {i32,i32}* to i64* and store through it. This is similar to the
- // memcpy case and occurs in various "byval" cases and emulated memcpys.
- if (isa<IntegerType>(SI->getOperand(0)->getType()) &&
- TD->getTypeAllocSize(SI->getOperand(0)->getType()) ==
- TD->getTypeAllocSize(AI->getType()->getElementType())) {
- Info.isMemCpyDst = true;
- continue;
- }
- return MarkUnsafe(Info);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
- if (LI->isVolatile())
- return MarkUnsafe(Info);
-
- // If loading the entire alloca in one chunk through a bitcasted pointer
- // to integer, we can transform it. This happens (for example) when you
- // cast a {i32,i32}* to i64* and load through it. This is similar to the
- // memcpy case and occurs in various "byval" cases and emulated memcpys.
- if (isa<IntegerType>(LI->getType()) &&
- TD->getTypeAllocSize(LI->getType()) ==
- TD->getTypeAllocSize(AI->getType()->getElementType())) {
- Info.isMemCpySrc = true;
- continue;
+/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
+/// the instruction I, which references it, to use the separate elements.
+/// Offset indicates the position within AI that is referenced by this
+/// instruction.
+void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ RewriteBitCast(BC, AI, Offset, NewElts);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ RewriteGEP(GEPI, AI, Offset, NewElts);
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ uint64_t MemSize = Length->getZExtValue();
+ if (Offset == 0 &&
+ MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
+ RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
+ // Otherwise the intrinsic can only touch a single element and the
+ // address operand will be updated, so nothing else needs to be done.
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ const Type *LIType = LI->getType();
+ if (LIType == AI->getAllocatedType()) {
+ // Replace:
+ // %res = load { i32, i32 }* %alloc
+ // with:
+ // %load.0 = load i32* %alloc.0
+ // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
+ // %load.1 = load i32* %alloc.1
+ // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
+ // (Also works for arrays instead of structs)
+ Value *Insert = UndefValue::get(LIType);
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ Value *Load = new LoadInst(NewElts[i], "load", LI);
+ Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
+ }
+ LI->replaceAllUsesWith(Insert);
+ DeadInsts.push_back(LI);
+ } else if (LIType->isIntegerTy() &&
+ TD->getTypeAllocSize(LIType) ==
+ TD->getTypeAllocSize(AI->getAllocatedType())) {
+ // If this is a load of the entire alloca to an integer, rewrite it.
+ RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
}
- return MarkUnsafe(Info);
- } else if (isa<DbgInfoIntrinsic>(UI)) {
- // If one user is DbgInfoIntrinsic then check if all users are
- // DbgInfoIntrinsics.
- if (OnlyUsedByDbgInfoIntrinsics(BC)) {
- Info.needsCleanup = true;
- return;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ Value *Val = SI->getOperand(0);
+ const Type *SIType = Val->getType();
+ if (SIType == AI->getAllocatedType()) {
+ // Replace:
+ // store { i32, i32 } %val, { i32, i32 }* %alloc
+ // with:
+ // %val.0 = extractvalue { i32, i32 } %val, 0
+ // store i32 %val.0, i32* %alloc.0
+ // %val.1 = extractvalue { i32, i32 } %val, 1
+ // store i32 %val.1, i32* %alloc.1
+ // (Also works for arrays instead of structs)
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
+ new StoreInst(Extract, NewElts[i], SI);
+ }
+ DeadInsts.push_back(SI);
+ } else if (SIType->isIntegerTy() &&
+ TD->getTypeAllocSize(SIType) ==
+ TD->getTypeAllocSize(AI->getAllocatedType())) {
+ // If this is a store of the entire alloca from an integer, rewrite it.
+ RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
}
- else
- MarkUnsafe(Info);
- }
- else {
- return MarkUnsafe(Info);
}
- if (Info.isUnsafe) return;
}
}
-/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes
-/// to its first element. Transform users of the cast to use the new values
-/// instead.
-void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
- SmallVector<AllocaInst*, 32> &NewElts) {
- Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end();
- while (UI != UE) {
- Instruction *User = cast<Instruction>(*UI++);
- if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) {
- RewriteBitCastUserOfAlloca(BCU, AI, NewElts);
- if (BCU->use_empty()) BCU->eraseFromParent();
- continue;
- }
+/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
+/// and recursively continue updating all of its uses.
+void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ RewriteForScalarRepl(BC, AI, Offset, NewElts);
+ if (BC->getOperand(0) != AI)
+ return;
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
- // This must be memcpy/memmove/memset of the entire aggregate.
- // Split into one per element.
- RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts);
- continue;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- // If this is a store of the entire alloca from an integer, rewrite it.
- RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
- continue;
- }
+ // The bitcast references the original alloca. Replace its uses with
+ // references to the first new element alloca.
+ Instruction *Val = NewElts[0];
+ if (Val->getType() != BC->getDestTy()) {
+ Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
+ Val->takeName(BC);
+ }
+ BC->replaceAllUsesWith(Val);
+ DeadInsts.push_back(BC);
+}
- if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- // If this is a load of the entire alloca to an integer, rewrite it.
- RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
- continue;
- }
-
- // Otherwise it must be some other user of a gep of the first pointer. Just
- // leave these alone.
- continue;
+/// FindElementAndOffset - Return the index of the element containing Offset
+/// within the specified type, which must be either a struct or an array.
+/// Sets T to the type of the element and Offset to the offset within that
+/// element. IdxTy is set to the type of the index result to be used in a
+/// GEP instruction.
+uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
+ const Type *&IdxTy) {
+ uint64_t Idx = 0;
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ const StructLayout *Layout = TD->getStructLayout(ST);
+ Idx = Layout->getElementContainingOffset(Offset);
+ T = ST->getContainedType(Idx);
+ Offset -= Layout->getElementOffset(Idx);
+ IdxTy = Type::getInt32Ty(T->getContext());
+ return Idx;
}
+ const ArrayType *AT = cast<ArrayType>(T);
+ T = AT->getElementType();
+ uint64_t EltSize = TD->getTypeAllocSize(T);
+ Idx = Offset / EltSize;
+ Offset -= Idx * EltSize;
+ IdxTy = Type::getInt64Ty(T->getContext());
+ return Idx;
+}
+
+/// RewriteGEP - Check if this GEP instruction moves the pointer across
+/// elements of the alloca that are being split apart, and if so, rewrite
+/// the GEP to be relative to the new element.
+void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ uint64_t OldOffset = Offset;
+ SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+ &Indices[0], Indices.size());
+
+ RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
+
+ const Type *T = AI->getAllocatedType();
+ const Type *IdxTy;
+ uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
+ if (GEPI->getOperand(0) == AI)
+ OldIdx = ~0ULL; // Force the GEP to be rewritten.
+
+ T = AI->getAllocatedType();
+ uint64_t EltOffset = Offset;
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+
+ // If this GEP does not move the pointer across elements of the alloca
+ // being split, then it does not needs to be rewritten.
+ if (Idx == OldIdx)
+ return;
+
+ const Type *i32Ty = Type::getInt32Ty(AI->getContext());
+ SmallVector<Value*, 8> NewArgs;
+ NewArgs.push_back(Constant::getNullValue(i32Ty));
+ while (EltOffset != 0) {
+ uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
+ NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
+ }
+ Instruction *Val = NewElts[Idx];
+ if (NewArgs.size() > 1) {
+ Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
+ NewArgs.end(), "", GEPI);
+ Val->takeName(GEPI);
+ }
+ if (Val->getType() != GEPI->getType())
+ Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
+ GEPI->replaceAllUsesWith(Val);
+ DeadInsts.push_back(GEPI);
}
/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
/// Rewrite it to copy or set the elements of the scalarized memory.
-void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
- AllocationInst *AI,
+void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+ AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts) {
-
// If this is a memcpy/memmove, construct the other pointer as the
// appropriate type. The "Other" pointer is the pointer that goes to memory
// that doesn't have anything to do with the alloca that we are promoting. For
LLVMContext &Context = MI->getContext();
unsigned MemAlignment = MI->getAlignment();
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
- if (BCInst == MTI->getRawDest())
+ if (Inst == MTI->getRawDest())
OtherPtr = MTI->getRawSource();
else {
- assert(BCInst == MTI->getRawSource());
+ assert(Inst == MTI->getRawSource());
OtherPtr = MTI->getRawDest();
}
}
-
+
// If there is an other pointer, we want to convert it to the same pointer
// type as AI has, so we can GEP through it safely.
if (OtherPtr) {
- // It is likely that OtherPtr is a bitcast, if so, remove it.
- if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr))
- OtherPtr = BC->getOperand(0);
- // All zero GEPs are effectively bitcasts.
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr))
- if (GEP->hasAllZeroIndices())
- OtherPtr = GEP->getOperand(0);
+
+ // Remove bitcasts and all-zero GEPs from OtherPtr. This is an
+ // optimization, but it's also required to detect the corner case where
+ // both pointer operands are referencing the same memory, and where
+ // OtherPtr may be a bitcast or GEP that currently being rewritten. (This
+ // function is only called for mem intrinsics that access the whole
+ // aggregate, so non-zero GEPs are not an issue here.)
+ while (1) {
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) {
+ OtherPtr = BC->getOperand(0);
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) {
+ // All zero GEPs are effectively bitcasts.
+ if (GEP->hasAllZeroIndices()) {
+ OtherPtr = GEP->getOperand(0);
+ continue;
+ }
+ }
+ break;
+ }
+ // Copying the alloca to itself is a no-op: just delete it.
+ if (OtherPtr == AI || OtherPtr == NewElts[0]) {
+ // This code will run twice for a no-op memcpy -- once for each operand.
+ // Put only one reference to MI on the DeadInsts list.
+ for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
+ E = DeadInsts.end(); I != E; ++I)
+ if (*I == MI) return;
+ DeadInsts.push_back(MI);
+ return;
+ }
if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr))
if (BCE->getOpcode() == Instruction::BitCast)
}
// Process each element of the aggregate.
- Value *TheFn = MI->getOperand(0);
+ Value *TheFn = MI->getCalledValue();
const Type *BytePtrTy = MI->getRawDest()->getType();
- bool SROADest = MI->getRawDest() == BCInst;
+ bool SROADest = MI->getRawDest() == Inst;
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
if (OtherPtr) {
Value *Idx[2] = { Zero,
ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
- OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
- OtherPtr->getNameStr()+"."+Twine(i),
- MI);
+ OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
+ OtherPtr->getName()+"."+Twine(i),
+ MI);
uint64_t EltOffset;
const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
if (const StructType *ST =
// If the stored element is zero (common case), just store a null
// constant.
Constant *StoreVal;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(1))) {
if (CI->isZero()) {
StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
} else {
// Convert the integer value to the appropriate type.
StoreVal = ConstantInt::get(Context, TotalVal);
- if (isa<PointerType>(ValTy))
+ if (ValTy->isPointerTy())
StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
- else if (ValTy->isFloatingPoint())
+ else if (ValTy->isFloatingPointTy())
StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
assert(StoreVal->getType() == ValTy && "Type mismatch!");
// Cast the element pointer to BytePtrTy.
if (EltPtr->getType() != BytePtrTy)
- EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI);
+ EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
// Cast the other pointer (if we have one) to BytePtrTy.
- if (OtherElt && OtherElt->getType() != BytePtrTy)
- OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(),
- MI);
+ if (OtherElt && OtherElt->getType() != BytePtrTy) {
+ // Preserve address space of OtherElt
+ const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
+ const PointerType* PTy = cast<PointerType>(BytePtrTy);
+ if (OtherPTy->getElementType() != PTy->getElementType()) {
+ Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
+ OtherPTy->getAddressSpace());
+ OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
+ OtherElt->getNameStr(), MI);
+ }
+ }
unsigned EltSize = TD->getTypeAllocSize(EltTy);
Value *Ops[] = {
SROADest ? EltPtr : OtherElt, // Dest ptr
SROADest ? OtherElt : EltPtr, // Src ptr
- ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+ ConstantInt::get(MI->getOperand(2)->getType(), EltSize), // Size
// Align
- ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign)
+ ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
+ MI->getVolatileCst()
};
- CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+ // In case we fold the address space overloaded memcpy of A to B
+ // with memcpy of B to C, change the function to be a memcpy of A to C.
+ const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
+ Ops[2]->getType() };
+ Module *M = MI->getParent()->getParent()->getParent();
+ TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
+ CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
} else {
assert(isa<MemSetInst>(MI));
Value *Ops[] = {
- EltPtr, MI->getOperand(2), // Dest, Value,
- ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
- Zero // Align
+ EltPtr, MI->getOperand(1), // Dest, Value,
+ ConstantInt::get(MI->getOperand(2)->getType(), EltSize), // Size
+ Zero, // Align
+ ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
};
- CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+ const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
+ Module *M = MI->getParent()->getParent()->getParent();
+ TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+ CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
}
}
- MI->eraseFromParent();
+ DeadInsts.push_back(MI);
}
-/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
+/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
/// overwrites the entire allocation. Extract out the pieces of the stored
/// integer and store them individually.
-void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
- AllocationInst *AI,
+void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts){
// Extract each element out of the integer according to its structure offset
// and store the element value to the individual alloca.
Value *SrcVal = SI->getOperand(0);
- const Type *AllocaEltTy = AI->getType()->getElementType();
+ const Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
- // If this isn't a store of an integer to the whole alloca, it may be a store
- // to the first element. Just ignore the store in this case and normal SROA
- // will handle it.
- if (!isa<IntegerType>(SrcVal->getType()) ||
- TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits)
- return;
// Handle tail padding by extending the operand
if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
SrcVal = new ZExtInst(SrcVal,
IntegerType::get(SI->getContext(), AllocaSizeBits),
"", SI);
- DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI;
+ DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
+ << '\n');
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
Value *DestField = NewElts[i];
if (EltVal->getType() == FieldTy) {
// Storing to an integer field of this size, just do it.
- } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
+ } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
} else {
Value *DestField = NewElts[i];
if (EltVal->getType() == ArrayEltTy) {
// Storing to an integer field of this size, just do it.
- } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
+ } else if (ArrayEltTy->isFloatingPointTy() ||
+ ArrayEltTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
} else {
}
}
- SI->eraseFromParent();
+ DeadInsts.push_back(SI);
}
-/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to
+/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
/// an integer. Load the individual pieces to form the aggregate value.
-void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
+void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts) {
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
- const Type *AllocaEltTy = AI->getType()->getElementType();
+ const Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
- // If this isn't a load of the whole alloca to an integer, it may be a load
- // of the first element. Just ignore the load in this case and normal SROA
- // will handle it.
- if (!isa<IntegerType>(LI->getType()) ||
- TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits)
- return;
-
- DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI;
+ DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
+ << '\n');
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
FieldSizeBits);
- if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
- !isa<VectorType>(FieldTy))
+ if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
+ !FieldTy->isVectorTy())
SrcField = new BitCastInst(SrcField,
PointerType::getUnqual(FieldIntTy),
"", LI);
ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
LI->replaceAllUsesWith(ResultVal);
- LI->eraseFromParent();
+ DeadInsts.push_back(LI);
}
-
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding, false otherwise.
static bool HasPadding(const Type *Ty, const TargetData &TD) {
/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
/// or 1 if safe after canonicalization has been performed.
-///
-int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) {
+bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// Loop over the use list of the alloca. We can only transform it if all of
// the users are safe to transform.
AllocaInfo Info;
- for (Value::use_iterator I = AI->use_begin(), E = AI->use_end();
- I != E; ++I) {
- isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info);
- if (Info.isUnsafe) {
- DOUT << "Cannot transform: " << *AI << " due to user: " << **I;
- return 0;
- }
+ isSafeForScalarRepl(AI, AI, 0, Info);
+ if (Info.isUnsafe) {
+ DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
+ return false;
}
// Okay, we know all the users are promotable. If the aggregate is a memcpy
// types, but may actually be used. In these cases, we refuse to promote the
// struct.
if (Info.isMemCpySrc && Info.isMemCpyDst &&
- HasPadding(AI->getType()->getElementType(), *TD))
- return 0;
-
- // If we require cleanup, return 1, otherwise return 3.
- return Info.needsCleanup ? 1 : 3;
-}
-
-/// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP
-/// is canonicalized here.
-void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
- gep_type_iterator I = gep_type_begin(GEPI);
- ++I;
-
- const ArrayType *AT = dyn_cast<ArrayType>(*I);
- if (!AT)
- return;
-
- uint64_t NumElements = AT->getNumElements();
-
- if (isa<ConstantInt>(I.getOperand()))
- return;
-
- if (NumElements == 1) {
- GEPI->setOperand(2,
- Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())));
- return;
- }
-
- assert(NumElements == 2 && "Unhandled case!");
- // All users of the GEP must be loads. At each use of the GEP, insert
- // two loads of the appropriate indexed GEP and select between them.
- Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(),
- Constant::getNullValue(I.getOperand()->getType()),
- "isone");
- // Insert the new GEP instructions, which are properly indexed.
- SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end());
- Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()));
- Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
- Indices.begin(),
- Indices.end(),
- GEPI->getName()+".0", GEPI);
- Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1);
- Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
- Indices.begin(),
- Indices.end(),
- GEPI->getName()+".1", GEPI);
- // Replace all loads of the variable index GEP with loads from both
- // indexes and a select.
- while (!GEPI->use_empty()) {
- LoadInst *LI = cast<LoadInst>(GEPI->use_back());
- Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI);
- Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI);
- Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI);
- LI->replaceAllUsesWith(R);
- LI->eraseFromParent();
- }
- GEPI->eraseFromParent();
-}
-
+ HasPadding(AI->getAllocatedType(), *TD))
+ return false;
-/// CleanupAllocaUsers - If SROA reported that it can promote the specified
-/// allocation, but only if cleaned up, perform the cleanups required.
-void SROA::CleanupAllocaUsers(AllocationInst *AI) {
- // At this point, we know that the end result will be SROA'd and promoted, so
- // we can insert ugly code if required so long as sroa+mem2reg will clean it
- // up.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E; ) {
- User *U = *UI++;
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
- CleanupGEP(GEPI);
- else {
- Instruction *I = cast<Instruction>(U);
- SmallVector<DbgInfoIntrinsic *, 2> DbgInUses;
- if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) {
- // Safe to remove debug info uses.
- while (!DbgInUses.empty()) {
- DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back();
- DI->eraseFromParent();
- }
- I->eraseFromParent();
- }
- }
- }
+ return true;
}
/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at
VecTy = VInTy;
return;
}
- } else if (In == Type::getFloatTy(Context) ||
- In == Type::getDoubleTy(Context) ||
- (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&
+ } else if (In->isFloatTy() || In->isDoubleTy() ||
+ (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
}
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
-/// its accesses to use a to single vector type, return true, and set VecTy to
+/// its accesses to a single vector type, return true and set VecTy to
/// the new type. If we could convert the alloca into a single promotable
/// integer, return true but set VecTy to VoidTy. Further, if the use is not a
/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
///
/// If we see at least one access to the value that is as a vector type, set the
/// SawVec flag.
-///
bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
bool &SawVec, uint64_t Offset,
unsigned AllocaSize) {
return false;
MergeInType(LI->getType(), Offset, VecTy,
AllocaSize, *TD, V->getContext());
- SawVec |= isa<VectorType>(LI->getType());
+ SawVec |= LI->getType()->isVectorTy();
continue;
}
if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
MergeInType(SI->getOperand(0)->getType(), Offset,
VecTy, AllocaSize, *TD, V->getContext());
- SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
+ SawVec |= SI->getOperand(0)->getType()->isVectorTy();
continue;
}
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
+ uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
&Indices[0], Indices.size());
// See if all uses can be converted.
if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset,
}
}
- // Ignore dbg intrinsic.
- if (isa<DbgInfoIntrinsic>(User))
- continue;
-
// Otherwise, we cannot handle this!
return false;
}
return true;
}
-
/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
/// directly. This happens when we are converting an "integer union" to a
/// single integer scalar, or when we are converting a "vector union" to a
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
+ uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
&Indices[0], Indices.size());
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
GEP->eraseFromParent();
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
assert(SI->getOperand(0) != Ptr && "Consistency error!");
- // FIXME: Remove once builder has Twine API.
- Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
+ Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
Builder);
Builder.CreateStore(New, NewAI);
SI->eraseFromParent();
+
+ // If the load we just inserted is now dead, then the inserted store
+ // overwrote the entire thing.
+ if (Old->use_empty())
+ Old->eraseFromParent();
continue;
}
for (unsigned i = 1; i != NumBytes; ++i)
APVal |= APVal << 8;
- // FIXME: Remove once builder has Twine API.
- Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
+ Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
Old, Offset, Builder);
Builder.CreateStore(New, NewAI);
+
+ // If the load we just inserted is now dead, then the memset overwrote
+ // the entire thing.
+ if (Old->use_empty())
+ Old->eraseFromParent();
}
MSI->eraseFromParent();
continue;
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
- AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject());
+ AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
- if (MTI->getSource()->getUnderlyingObject() != OrigAI) {
+ if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
- } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) {
+ } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
} else {
// Noop transfer. Src == Dst
}
-
MTI->eraseFromParent();
continue;
}
- // If user is a dbg info intrinsic then it is safe to remove it.
- if (isa<DbgInfoIntrinsic>(User)) {
- User->eraseFromParent();
- continue;
- }
-
llvm_unreachable("Unsupported operation!");
}
}
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
- if (isa<VectorType>(ToType))
+ if (ToType->isVectorTy())
return Builder.CreateBitCast(FromVal, ToType, "tmp");
// Otherwise it must be an element access.
LIBitWidth), "tmp");
// If the result is an integer, this is a trunc or bitcast.
- if (isa<IntegerType>(ToType)) {
+ if (ToType->isIntegerTy()) {
// Should be done.
- } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) {
+ } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
// Just do a bitcast, we know the sizes match up.
FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
} else {
return FromVal;
}
-
/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
/// or vector value "Old" at the offset specified by Offset.
///
unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
- if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
+ if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV,
IntegerType::get(SV->getContext(),SrcWidth), "tmp");
- else if (isa<PointerType>(SV->getType()))
+ else if (SV->getType()->isPointerTy())
SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
// Zero extend or truncate the value if needed.
static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
bool isOffset) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
- if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+ User *U = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U))
// Ignore non-volatile loads, they are always ok.
if (!LI->isVolatile())
continue;
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
// If uses of the bitcast are ok, we are ok.
if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
return false;
continue;
}
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
// If the GEP has all zero indices, it doesn't offset the pointer. If it
// doesn't, it does.
if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
// If this is isn't our memcpy/memmove, reject it as something we can't
// handle.
- if (!isa<MemTransferInst>(*UI))
+ if (!isa<MemTransferInst>(U))
return false;
// If we already have seen a copy, reject the second one.
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != 1) return false;
+ if (UI.getOperandNo() != 0) return false;
- MemIntrinsic *MI = cast<MemIntrinsic>(*UI);
+ MemIntrinsic *MI = cast<MemIntrinsic>(U);
// If the source of the memcpy/move is not a constant global, reject it.
- if (!PointsToConstantGlobal(MI->getOperand(2)))
+ if (!PointsToConstantGlobal(MI->getOperand(1)))
return false;
// Otherwise, the transform is safe. Remember the copy instruction.
/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
/// modified by a copy from a constant global. If we can prove this, we can
/// replace any uses of the alloca with uses of the global directly.
-Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocationInst *AI) {
+Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
Instruction *TheCopy = 0;
if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
return TheCopy;