X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FScalarReplAggregates.cpp;h=c58c858b960f5a2612a5ad73fa3b062640ebbdb4;hb=2ff961f66816daab8bbc58a19025161d969821c2;hp=98d5a027013fe940caa14cf11bbb527ee7d660cb;hpb=c570487d45f7426dc5f75c0309122d6f9330ecf7;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 98d5a027013..c58c858b960 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -27,19 +27,20 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; STATISTIC(NumReplaced, "Number of allocas broken up"); @@ -48,7 +49,7 @@ STATISTIC(NumConverted, "Number of aggregates converted to scalar"); STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { - struct VISIBILITY_HIDDEN SROA : public FunctionPass { + struct SROA : public FunctionPass { static char ID; // Pass identification, replacement for typeid explicit SROA(signed T = -1) : FunctionPass(&ID) { if (T == -1) @@ -67,13 +68,16 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.setPreservesCFG(); } private: TargetData *TD; + /// DeadInsts - Keep track of instructions we have made dead, so that + /// we can remove them after we are done working. + SmallVector DeadInsts; + /// AllocaInfo - When analyzing uses of an alloca instruction, this captures /// information about the uses. All these fields are initialized to false /// and set to true when something is learned. @@ -81,10 +85,6 @@ namespace { /// isUnsafe - This is set to true if the alloca cannot be SROA'd. bool isUnsafe : 1; - /// needsCleanup - This is set to true if there is some use of the alloca - /// that requires cleanup. - bool needsCleanup : 1; - /// isMemCpySrc - This is true if this aggregate is memcpy'd from. bool isMemCpySrc : 1; @@ -92,40 +92,42 @@ namespace { bool isMemCpyDst : 1; AllocaInfo() - : isUnsafe(false), needsCleanup(false), - isMemCpySrc(false), isMemCpyDst(false) {} + : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {} }; unsigned SRThreshold; void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; } - int isSafeAllocaToScalarRepl(AllocationInst *AI); - - void isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, - AllocaInfo &Info); - void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, - AllocaInfo &Info); - void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI, - unsigned OpNo, AllocaInfo &Info); - void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI, - AllocaInfo &Info); - - void DoScalarReplacement(AllocationInst *AI, - std::vector &WorkList); - void CleanupGEP(GetElementPtrInst *GEP); - void CleanupAllocaUsers(AllocationInst *AI); - AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base); + bool isSafeAllocaToScalarRepl(AllocaInst *AI); + + void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + AllocaInfo &Info); + void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, + AllocaInfo &Info); + void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, + const Type *MemOpType, bool isStore, AllocaInfo &Info); + bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); + uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, + const Type *&IdxTy); - void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, - SmallVector &NewElts); + void DoScalarReplacement(AllocaInst *AI, + std::vector &WorkList); + void DeleteDeadInstructions(); + AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, - AllocationInst *AI, + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, + AllocaInst *AI, SmallVector &NewElts); - void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI, + void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, SmallVector &NewElts); - void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, + void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector &NewElts); bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, @@ -135,7 +137,7 @@ namespace { uint64_t Offset, IRBuilder<> &Builder); Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, uint64_t Offset, IRBuilder<> &Builder); - static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI); + static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); }; } @@ -149,9 +151,16 @@ FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { bool SROA::runOnFunction(Function &F) { - TD = &getAnalysis(); - + TD = getAnalysisIfAvailable(); + bool Changed = performPromotion(F); + + // FIXME: ScalarRepl currently depends on TargetData more than it + // theoretically needs to. It should be refactored in order to support + // target-independent IR. Until this is done, just skip the actual + // scalar-replacement portion of this pass. + if (!TD) return Changed; + while (1) { bool LocalChange = performScalarRepl(F); if (!LocalChange) break; // No need to repromote if no scalarrepl @@ -193,12 +202,18 @@ bool SROA::performPromotion(Function &F) { return Changed; } -/// getNumSAElements - Return the number of elements in the specific struct or -/// array. -static uint64_t getNumSAElements(const Type *T) { +/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for +/// SROA. It must be a struct or array type with a small number of elements. +static bool ShouldAttemptScalarRepl(AllocaInst *AI) { + const Type *T = AI->getAllocatedType(); + // Do not promote any struct into more than 32 separate vars. if (const StructType *ST = dyn_cast(T)) - return ST->getNumElements(); - return cast(T)->getNumElements(); + return ST->getNumElements() <= 32; + // Arrays are much less likely to be safe for SROA; only consider + // them if they are very small. + if (const ArrayType *AT = dyn_cast(T)) + return AT->getNumElements() <= 8; + return false; } // performScalarRepl - This algorithm is a simple worklist driven algorithm, @@ -206,18 +221,18 @@ static uint64_t getNumSAElements(const Type *T) { // them if they are only used by getelementptr instructions. // bool SROA::performScalarRepl(Function &F) { - std::vector WorkList; + std::vector WorkList; // Scan the entry basic block, adding any alloca's and mallocs to the worklist BasicBlock &BB = F.getEntryBlock(); for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) - if (AllocationInst *A = dyn_cast(I)) + if (AllocaInst *A = dyn_cast(I)) WorkList.push_back(A); // Process the worklist bool Changed = false; while (!WorkList.empty()) { - AllocationInst *AI = WorkList.back(); + AllocaInst *AI = WorkList.back(); WorkList.pop_back(); // Handle dead allocas trivially. These can be formed by SROA'ing arrays @@ -237,9 +252,9 @@ bool SROA::performScalarRepl(Function &F) { // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { - DOUT << "Found alloca equal to global: " << *AI; - DOUT << " memcpy = " << *TheCopy; - Constant *TheSrc = cast(TheCopy->getOperand(2)); + DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); + Constant *TheSrc = cast(TheCopy->getOperand(1)); AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); TheCopy->eraseFromParent(); // Don't mutate the global. AI->eraseFromParent(); @@ -252,30 +267,22 @@ bool SROA::performScalarRepl(Function &F) { // transform the allocation instruction if it is an array allocation // (allocations OF arrays are ok though), and an allocation of a scalar // value cannot be decomposed at all. - uint64_t AllocaSize = TD->getTypePaddedSize(AI->getAllocatedType()); + uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); + + // Do not promote [0 x %struct]. + if (AllocaSize == 0) continue; + + // If the alloca looks like a good candidate for scalar replacement, and if + // all its users can be transformed, then split up the aggregate into its + // separate elements. + if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) { + DoScalarReplacement(AI, WorkList); + Changed = true; + continue; + } // Do not promote any struct whose size is too big. if (AllocaSize > SRThreshold) continue; - - if ((isa(AI->getAllocatedType()) || - isa(AI->getAllocatedType())) && - // Do not promote any struct into more than "32" separate vars. - getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) { - // Check that all of the users of the allocation are capable of being - // transformed. - switch (isSafeAllocaToScalarRepl(AI)) { - default: assert(0 && "Unexpected value!"); - case 0: // Not safe to scalar replace. - break; - case 1: // Safe, but requires cleanup/canonicalizations first - CleanupAllocaUsers(AI); - // FALL THROUGH. - case 3: // Safe to scalar replace. - DoScalarReplacement(AI, WorkList); - Changed = true; - continue; - } - } // If we can turn this aggregate value (potentially with casts) into a // simple scalar value that can be mem2reg'd into a register value. @@ -295,17 +302,18 @@ bool SROA::performScalarRepl(Function &F) { // random stuff that doesn't use vectors (e.g. <9 x double>) because then // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. - if (VectorTy && isa(VectorTy) && HadAVector) { - DOUT << "CONVERT TO VECTOR: " << *AI << " TYPE = " << *VectorTy <<"\n"; + if (VectorTy && VectorTy->isVectorTy() && HadAVector) { + DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " + << *VectorTy << '\n'); // Create and insert the vector alloca. - NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); + NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); } else { - DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"; + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - const Type *NewTy = IntegerType::get(AllocaSize*8); + const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); } @@ -324,16 +332,16 @@ bool SROA::performScalarRepl(Function &F) { /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl /// predicate, do SROA now. -void SROA::DoScalarReplacement(AllocationInst *AI, - std::vector &WorkList) { - DOUT << "Found inst to SROA: " << *AI; +void SROA::DoScalarReplacement(AllocaInst *AI, + std::vector &WorkList) { + DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); SmallVector ElementAllocas; if (const StructType *ST = dyn_cast(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, AI->getAlignment(), - AI->getName() + "." + utostr(i), AI); + AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing } @@ -343,408 +351,398 @@ void SROA::DoScalarReplacement(AllocationInst *AI, const Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), - AI->getName() + "." + utostr(i), AI); + AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing } } - // Now that we have created the alloca instructions that we want to use, - // expand the getelementptr instructions to use them. - // - while (!AI->use_empty()) { - Instruction *User = cast(AI->use_back()); - if (BitCastInst *BCInst = dyn_cast(User)) { - RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); - BCInst->eraseFromParent(); - continue; - } - - // Replace: - // %res = load { i32, i32 }* %alloc - // with: - // %load.0 = load i32* %alloc.0 - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 - // %load.1 = load i32* %alloc.1 - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 - // (Also works for arrays instead of structs) - if (LoadInst *LI = dyn_cast(User)) { - Value *Insert = UndefValue::get(LI->getType()); - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { - Value *Load = new LoadInst(ElementAllocas[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); - } - LI->replaceAllUsesWith(Insert); - LI->eraseFromParent(); - continue; - } + // Now that we have created the new alloca instructions, rewrite all the + // uses of the old alloca. + RewriteForScalarRepl(AI, AI, 0, ElementAllocas); - // Replace: - // store { i32, i32 } %val, { i32, i32 }* %alloc - // with: - // %val.0 = extractvalue { i32, i32 } %val, 0 - // store i32 %val.0, i32* %alloc.0 - // %val.1 = extractvalue { i32, i32 } %val, 1 - // store i32 %val.1, i32* %alloc.1 - // (Also works for arrays instead of structs) - if (StoreInst *SI = dyn_cast(User)) { - Value *Val = SI->getOperand(0); - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, ElementAllocas[i], SI); - } - SI->eraseFromParent(); - continue; - } - - GetElementPtrInst *GEPI = cast(User); - // We now know that the GEP is of the form: GEP , 0, - unsigned Idx = - (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); - - assert(Idx < ElementAllocas.size() && "Index out of range?"); - AllocaInst *AllocaToUse = ElementAllocas[Idx]; - - Value *RepValue; - if (GEPI->getNumOperands() == 3) { - // Do not insert a new getelementptr instruction with zero indices, only - // to have it optimized out later. - RepValue = AllocaToUse; - } else { - // We are indexing deeply into the structure, so we still need a - // getelement ptr instruction to finish the indexing. This may be - // expanded itself once the worklist is rerun. - // - SmallVector NewArgs; - NewArgs.push_back(Constant::getNullValue(Type::Int32Ty)); - NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); - RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), - NewArgs.end(), "", GEPI); - RepValue->takeName(GEPI); - } - - // If this GEP is to the start of the aggregate, check for memcpys. - if (Idx == 0 && GEPI->hasAllZeroIndices()) - RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); - - // Move all of the users over to the new GEP. - GEPI->replaceAllUsesWith(RepValue); - // Delete the old GEP - GEPI->eraseFromParent(); - } - - // Finally, delete the Alloca instruction + // Now erase any instructions that were made dead while rewriting the alloca. + DeleteDeadInstructions(); AI->eraseFromParent(); + NumReplaced++; } - -/// isSafeElementUse - Check to see if this use is an allowed use for a -/// getelementptr instruction of an array aggregate allocation. isFirstElt -/// indicates whether Ptr is known to the start of the aggregate. -/// -void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, - AllocaInfo &Info) { - for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); - I != E; ++I) { - Instruction *User = cast(*I); - switch (User->getOpcode()) { - case Instruction::Load: break; - case Instruction::Store: - // Store is ok if storing INTO the pointer, not storing the pointer - if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); - break; - case Instruction::GetElementPtr: { - GetElementPtrInst *GEP = cast(User); - bool AreAllZeroIndices = isFirstElt; - if (GEP->getNumOperands() > 1) { - if (!isa(GEP->getOperand(1)) || - !cast(GEP->getOperand(1))->isZero()) - // Using pointer arithmetic to navigate the array. - return MarkUnsafe(Info); - - if (AreAllZeroIndices) - AreAllZeroIndices = GEP->hasAllZeroIndices(); +/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, +/// recursively including all their operands that become trivially dead. +void SROA::DeleteDeadInstructions() { + while (!DeadInsts.empty()) { + Instruction *I = cast(DeadInsts.pop_back_val()); + + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *U = dyn_cast(*OI)) { + // Zero out the operand and see if it becomes trivially dead. + // (But, don't add allocas to the dead instruction list -- they are + // already on the worklist and will be deleted separately.) + *OI = 0; + if (isInstructionTriviallyDead(U) && !isa(U)) + DeadInsts.push_back(U); } - isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); - if (Info.isUnsafe) return; - break; - } - case Instruction::BitCast: - if (isFirstElt) { - isSafeUseOfBitCastedAllocation(cast(User), AI, Info); - if (Info.isUnsafe) return; - break; - } - DOUT << " Transformation preventing inst: " << *User; - return MarkUnsafe(Info); - case Instruction::Call: - if (MemIntrinsic *MI = dyn_cast(User)) { - if (isFirstElt) { - isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); - if (Info.isUnsafe) return; - break; - } - } - DOUT << " Transformation preventing inst: " << *User; - return MarkUnsafe(Info); - default: - DOUT << " Transformation preventing inst: " << *User; - return MarkUnsafe(Info); - } - } - return; // All users look ok :) -} -/// AllUsersAreLoads - Return true if all users of this value are loads. -static bool AllUsersAreLoads(Value *Ptr) { - for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); - I != E; ++I) - if (cast(*I)->getOpcode() != Instruction::Load) - return false; - return true; + I->eraseFromParent(); + } } + +/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to +/// performing scalar replacement of alloca AI. The results are flagged in +/// the Info parameter. Offset indicates the position within AI that is +/// referenced by this instruction. +void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + AllocaInfo &Info) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast(*UI); -/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an -/// aggregate allocation. -/// -void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, - AllocaInfo &Info) { - if (BitCastInst *C = dyn_cast(User)) - return isSafeUseOfBitCastedAllocation(C, AI, Info); - - if (LoadInst *LI = dyn_cast(User)) - if (!LI->isVolatile()) - return;// Loads (returning a first class aggregrate) are always rewritable - - if (StoreInst *SI = dyn_cast(User)) - if (!SI->isVolatile() && SI->getOperand(0) != AI) - return;// Store is ok if storing INTO the pointer, not storing the pointer - - GetElementPtrInst *GEPI = dyn_cast(User); - if (GEPI == 0) - return MarkUnsafe(Info); - - gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); - - // The GEP is not safe to transform if not of the form "GEP , 0, ". - if (I == E || - I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { - return MarkUnsafe(Info); + if (BitCastInst *BC = dyn_cast(User)) { + isSafeForScalarRepl(BC, AI, Offset, Info); + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { + uint64_t GEPOffset = Offset; + isSafeGEP(GEPI, AI, GEPOffset, Info); + if (!Info.isUnsafe) + isSafeForScalarRepl(GEPI, AI, GEPOffset, Info); + } else if (MemIntrinsic *MI = dyn_cast(User)) { + ConstantInt *Length = dyn_cast(MI->getLength()); + if (Length) + isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, + UI.getOperandNo() == 0, Info); + else + MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast(User)) { + if (!LI->isVolatile()) { + const Type *LIType = LI->getType(); + isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType), + LIType, false, Info); + } else + MarkUnsafe(Info); + } else if (StoreInst *SI = dyn_cast(User)) { + // Store is ok if storing INTO the pointer, not storing the pointer + if (!SI->isVolatile() && SI->getOperand(0) != I) { + const Type *SIType = SI->getOperand(0)->getType(); + isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType), + SIType, true, Info); + } else + MarkUnsafe(Info); + } else { + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + MarkUnsafe(Info); + } + if (Info.isUnsafe) return; } +} - ++I; - if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? +/// isSafeGEP - Check if a GEP instruction can be handled for scalar +/// replacement. It is safe when all the indices are constant, in-bounds +/// references, and when the resulting offset corresponds to an element within +/// the alloca type. The results are flagged in the Info parameter. Upon +/// return, Offset is adjusted as specified by the GEP indices. +void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, + uint64_t &Offset, AllocaInfo &Info) { + gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); + if (GEPIt == E) + return; - bool IsAllZeroIndices = true; - - // If the first index is a non-constant index into an array, see if we can - // handle it as a special case. - if (const ArrayType *AT = dyn_cast(*I)) { - if (!isa(I.getOperand())) { - IsAllZeroIndices = 0; - uint64_t NumElements = AT->getNumElements(); - - // If this is an array index and the index is not constant, we cannot - // promote... that is unless the array has exactly one or two elements in - // it, in which case we CAN promote it, but we have to canonicalize this - // out if this is the only problem. - if ((NumElements == 1 || NumElements == 2) && - AllUsersAreLoads(GEPI)) { - Info.needsCleanup = true; - return; // Canonicalization required! - } - return MarkUnsafe(Info); - } - } - // Walk through the GEP type indices, checking the types that this indexes // into. - for (; I != E; ++I) { + for (; GEPIt != E; ++GEPIt) { // Ignore struct elements, no extra checking needed for these. - if (isa(*I)) + if ((*GEPIt)->isStructTy()) continue; - - ConstantInt *IdxVal = dyn_cast(I.getOperand()); - if (!IdxVal) return MarkUnsafe(Info); - // Are all indices still zero? - IsAllZeroIndices &= IdxVal->isZero(); - - if (const ArrayType *AT = dyn_cast(*I)) { - // This GEP indexes an array. Verify that this is an in-range constant - // integer. Specifically, consider A[0][i]. We cannot know that the user - // isn't doing invalid things like allowing i to index an out-of-range - // subscript that accesses A[1]. Because of this, we have to reject SROA - // of any accesses into structs where any of the components are variables. - if (IdxVal->getZExtValue() >= AT->getNumElements()) - return MarkUnsafe(Info); - } else if (const VectorType *VT = dyn_cast(*I)) { - if (IdxVal->getZExtValue() >= VT->getNumElements()) - return MarkUnsafe(Info); + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); + if (!IdxVal) + return MarkUnsafe(Info); + } + + // Compute the offset due to this GEP and check if the alloca has a + // component element at that offset. + SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), + &Indices[0], Indices.size()); + if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) + MarkUnsafe(Info); +} + +/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI +/// alloca or has an offset and size that corresponds to a component element +/// within it. The offset checked here may have been formed from a GEP with a +/// pointer bitcasted to a different type. +void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, + const Type *MemOpType, bool isStore, + AllocaInfo &Info) { + // Check if this is a load/store of the entire alloca. + if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { + bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); + // This is safe for MemIntrinsics (where MemOpType is 0), integer types + // (which are essentially the same as the MemIntrinsics, especially with + // regard to copying padding between elements), or references using the + // aggregate type of the alloca. + if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) { + if (!UsesAggregateType) { + if (isStore) + Info.isMemCpyDst = true; + else + Info.isMemCpySrc = true; + } + return; } } - - // If there are any non-simple uses of this getelementptr, make sure to reject - // them. - return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); + // Check if the offset/size correspond to a component within the alloca type. + const Type *T = AI->getAllocatedType(); + if (TypeHasComponent(T, Offset, MemSize)) + return; + + return MarkUnsafe(Info); } -/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory -/// intrinsic can be promoted by SROA. At this point, we know that the operand -/// of the memintrinsic is a pointer to the beginning of the allocation. -void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI, - unsigned OpNo, AllocaInfo &Info) { - // If not constant length, give up. - ConstantInt *Length = dyn_cast(MI->getLength()); - if (!Length) return MarkUnsafe(Info); - - // If not the whole aggregate, give up. - if (Length->getZExtValue() != - TD->getTypePaddedSize(AI->getType()->getElementType())) - return MarkUnsafe(Info); - - // We only know about memcpy/memset/memmove. - if (!isa(MI)) - return MarkUnsafe(Info); - - // Otherwise, we can transform it. Determine whether this is a memcpy/set - // into or out of the aggregate. - if (OpNo == 1) - Info.isMemCpyDst = true; - else { - assert(OpNo == 2); - Info.isMemCpySrc = true; +/// TypeHasComponent - Return true if T has a component type with the +/// specified offset and size. If Size is zero, do not check the size. +bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { + const Type *EltTy; + uint64_t EltSize; + if (const StructType *ST = dyn_cast(T)) { + const StructLayout *Layout = TD->getStructLayout(ST); + unsigned EltIdx = Layout->getElementContainingOffset(Offset); + EltTy = ST->getContainedType(EltIdx); + EltSize = TD->getTypeAllocSize(EltTy); + Offset -= Layout->getElementOffset(EltIdx); + } else if (const ArrayType *AT = dyn_cast(T)) { + EltTy = AT->getElementType(); + EltSize = TD->getTypeAllocSize(EltTy); + if (Offset >= AT->getNumElements() * EltSize) + return false; + Offset %= EltSize; + } else { + return false; } + if (Offset == 0 && (Size == 0 || EltSize == Size)) + return true; + // Check if the component spans multiple elements. + if (Offset + Size > EltSize) + return false; + return TypeHasComponent(EltTy, Offset, Size); } -/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast -/// are -void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI, - AllocaInfo &Info) { - for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); - UI != E; ++UI) { - if (BitCastInst *BCU = dyn_cast(UI)) { - isSafeUseOfBitCastedAllocation(BCU, AI, Info); - } else if (MemIntrinsic *MI = dyn_cast(UI)) { - isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); - } else if (StoreInst *SI = dyn_cast(UI)) { - if (SI->isVolatile()) - return MarkUnsafe(Info); - - // If storing the entire alloca in one chunk through a bitcasted pointer - // to integer, we can transform it. This happens (for example) when you - // cast a {i32,i32}* to i64* and store through it. This is similar to the - // memcpy case and occurs in various "byval" cases and emulated memcpys. - if (isa(SI->getOperand(0)->getType()) && - TD->getTypePaddedSize(SI->getOperand(0)->getType()) == - TD->getTypePaddedSize(AI->getType()->getElementType())) { - Info.isMemCpyDst = true; - continue; - } - return MarkUnsafe(Info); - } else if (LoadInst *LI = dyn_cast(UI)) { - if (LI->isVolatile()) - return MarkUnsafe(Info); - - // If loading the entire alloca in one chunk through a bitcasted pointer - // to integer, we can transform it. This happens (for example) when you - // cast a {i32,i32}* to i64* and load through it. This is similar to the - // memcpy case and occurs in various "byval" cases and emulated memcpys. - if (isa(LI->getType()) && - TD->getTypePaddedSize(LI->getType()) == - TD->getTypePaddedSize(AI->getType()->getElementType())) { - Info.isMemCpySrc = true; - continue; +/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite +/// the instruction I, which references it, to use the separate elements. +/// Offset indicates the position within AI that is referenced by this +/// instruction. +void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast(*UI); + + if (BitCastInst *BC = dyn_cast(User)) { + RewriteBitCast(BC, AI, Offset, NewElts); + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { + RewriteGEP(GEPI, AI, Offset, NewElts); + } else if (MemIntrinsic *MI = dyn_cast(User)) { + ConstantInt *Length = dyn_cast(MI->getLength()); + uint64_t MemSize = Length->getZExtValue(); + if (Offset == 0 && + MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) + RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); + // Otherwise the intrinsic can only touch a single element and the + // address operand will be updated, so nothing else needs to be done. + } else if (LoadInst *LI = dyn_cast(User)) { + const Type *LIType = LI->getType(); + if (LIType == AI->getAllocatedType()) { + // Replace: + // %res = load { i32, i32 }* %alloc + // with: + // %load.0 = load i32* %alloc.0 + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 + // %load.1 = load i32* %alloc.1 + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 + // (Also works for arrays instead of structs) + Value *Insert = UndefValue::get(LIType); + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + Value *Load = new LoadInst(NewElts[i], "load", LI); + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + } + LI->replaceAllUsesWith(Insert); + DeadInsts.push_back(LI); + } else if (LIType->isIntegerTy() && + TD->getTypeAllocSize(LIType) == + TD->getTypeAllocSize(AI->getAllocatedType())) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); } - return MarkUnsafe(Info); - } else if (isa(UI)) { - // If one user is DbgInfoIntrinsic then check if all users are - // DbgInfoIntrinsics. - if (OnlyUsedByDbgInfoIntrinsics(BC)) { - Info.needsCleanup = true; - return; + } else if (StoreInst *SI = dyn_cast(User)) { + Value *Val = SI->getOperand(0); + const Type *SIType = Val->getType(); + if (SIType == AI->getAllocatedType()) { + // Replace: + // store { i32, i32 } %val, { i32, i32 }* %alloc + // with: + // %val.0 = extractvalue { i32, i32 } %val, 0 + // store i32 %val.0, i32* %alloc.0 + // %val.1 = extractvalue { i32, i32 } %val, 1 + // store i32 %val.1, i32* %alloc.1 + // (Also works for arrays instead of structs) + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); + new StoreInst(Extract, NewElts[i], SI); + } + DeadInsts.push_back(SI); + } else if (SIType->isIntegerTy() && + TD->getTypeAllocSize(SIType) == + TD->getTypeAllocSize(AI->getAllocatedType())) { + // If this is a store of the entire alloca from an integer, rewrite it. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); } - else - MarkUnsafe(Info); - } - else { - return MarkUnsafe(Info); } - if (Info.isUnsafe) return; } } -/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes -/// to its first element. Transform users of the cast to use the new values -/// instead. -void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, - SmallVector &NewElts) { - Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); - while (UI != UE) { - Instruction *User = cast(*UI++); - if (BitCastInst *BCU = dyn_cast(User)) { - RewriteBitCastUserOfAlloca(BCU, AI, NewElts); - if (BCU->use_empty()) BCU->eraseFromParent(); - continue; - } +/// RewriteBitCast - Update a bitcast reference to the alloca being replaced +/// and recursively continue updating all of its uses. +void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + RewriteForScalarRepl(BC, AI, Offset, NewElts); + if (BC->getOperand(0) != AI) + return; - if (MemIntrinsic *MI = dyn_cast(User)) { - // This must be memcpy/memmove/memset of the entire aggregate. - // Split into one per element. - RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); - continue; - } - - if (StoreInst *SI = dyn_cast(User)) { - // If this is a store of the entire alloca from an integer, rewrite it. - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); - continue; - } + // The bitcast references the original alloca. Replace its uses with + // references to the first new element alloca. + Instruction *Val = NewElts[0]; + if (Val->getType() != BC->getDestTy()) { + Val = new BitCastInst(Val, BC->getDestTy(), "", BC); + Val->takeName(BC); + } + BC->replaceAllUsesWith(Val); + DeadInsts.push_back(BC); +} - if (LoadInst *LI = dyn_cast(User)) { - // If this is a load of the entire alloca to an integer, rewrite it. - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); - continue; - } - - // Otherwise it must be some other user of a gep of the first pointer. Just - // leave these alone. - continue; +/// FindElementAndOffset - Return the index of the element containing Offset +/// within the specified type, which must be either a struct or an array. +/// Sets T to the type of the element and Offset to the offset within that +/// element. IdxTy is set to the type of the index result to be used in a +/// GEP instruction. +uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, + const Type *&IdxTy) { + uint64_t Idx = 0; + if (const StructType *ST = dyn_cast(T)) { + const StructLayout *Layout = TD->getStructLayout(ST); + Idx = Layout->getElementContainingOffset(Offset); + T = ST->getContainedType(Idx); + Offset -= Layout->getElementOffset(Idx); + IdxTy = Type::getInt32Ty(T->getContext()); + return Idx; } + const ArrayType *AT = cast(T); + T = AT->getElementType(); + uint64_t EltSize = TD->getTypeAllocSize(T); + Idx = Offset / EltSize; + Offset -= Idx * EltSize; + IdxTy = Type::getInt64Ty(T->getContext()); + return Idx; +} + +/// RewriteGEP - Check if this GEP instruction moves the pointer across +/// elements of the alloca that are being split apart, and if so, rewrite +/// the GEP to be relative to the new element. +void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + uint64_t OldOffset = Offset; + SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), + &Indices[0], Indices.size()); + + RewriteForScalarRepl(GEPI, AI, Offset, NewElts); + + const Type *T = AI->getAllocatedType(); + const Type *IdxTy; + uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); + if (GEPI->getOperand(0) == AI) + OldIdx = ~0ULL; // Force the GEP to be rewritten. + + T = AI->getAllocatedType(); + uint64_t EltOffset = Offset; + uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); + + // If this GEP does not move the pointer across elements of the alloca + // being split, then it does not needs to be rewritten. + if (Idx == OldIdx) + return; + + const Type *i32Ty = Type::getInt32Ty(AI->getContext()); + SmallVector NewArgs; + NewArgs.push_back(Constant::getNullValue(i32Ty)); + while (EltOffset != 0) { + uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); + NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); + } + Instruction *Val = NewElts[Idx]; + if (NewArgs.size() > 1) { + Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), + NewArgs.end(), "", GEPI); + Val->takeName(GEPI); + } + if (Val->getType() != GEPI->getType()) + Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI); + GEPI->replaceAllUsesWith(Val); + DeadInsts.push_back(GEPI); } /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, - AllocationInst *AI, +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, + AllocaInst *AI, SmallVector &NewElts) { - // If this is a memcpy/memmove, construct the other pointer as the // appropriate type. The "Other" pointer is the pointer that goes to memory // that doesn't have anything to do with the alloca that we are promoting. For // memset, this Value* stays null. Value *OtherPtr = 0; + LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy - if (BCInst == MTI->getRawDest()) + if (Inst == MTI->getRawDest()) OtherPtr = MTI->getRawSource(); else { - assert(BCInst == MTI->getRawSource()); + assert(Inst == MTI->getRawSource()); OtherPtr = MTI->getRawDest(); } } - + // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { - // It is likely that OtherPtr is a bitcast, if so, remove it. - if (BitCastInst *BC = dyn_cast(OtherPtr)) - OtherPtr = BC->getOperand(0); - // All zero GEPs are effectively bitcasts. - if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) - if (GEP->hasAllZeroIndices()) - OtherPtr = GEP->getOperand(0); + + // Remove bitcasts and all-zero GEPs from OtherPtr. This is an + // optimization, but it's also required to detect the corner case where + // both pointer operands are referencing the same memory, and where + // OtherPtr may be a bitcast or GEP that currently being rewritten. (This + // function is only called for mem intrinsics that access the whole + // aggregate, so non-zero GEPs are not an issue here.) + while (1) { + if (BitCastInst *BC = dyn_cast(OtherPtr)) { + OtherPtr = BC->getOperand(0); + continue; + } + if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { + // All zero GEPs are effectively bitcasts. + if (GEP->hasAllZeroIndices()) { + OtherPtr = GEP->getOperand(0); + continue; + } + } + break; + } + // Copying the alloca to itself is a no-op: just delete it. + if (OtherPtr == AI || OtherPtr == NewElts[0]) { + // This code will run twice for a no-op memcpy -- once for each operand. + // Put only one reference to MI on the DeadInsts list. + for (SmallVector::const_iterator I = DeadInsts.begin(), + E = DeadInsts.end(); I != E; ++I) + if (*I == MI) return; + DeadInsts.push_back(MI); + return; + } if (ConstantExpr *BCE = dyn_cast(OtherPtr)) if (BCE->getOpcode() == Instruction::BitCast) @@ -758,11 +756,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, } // Process each element of the aggregate. - Value *TheFn = MI->getOperand(0); + Value *TheFn = MI->getCalledValue(); const Type *BytePtrTy = MI->getRawDest()->getType(); - bool SROADest = MI->getRawDest() == BCInst; + bool SROADest = MI->getRawDest() == Inst; - Constant *Zero = Constant::getNullValue(Type::Int32Ty); + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. @@ -770,10 +768,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, unsigned OtherEltAlign = MemAlignment; if (OtherPtr) { - Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) }; - OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, - OtherPtr->getNameStr()+"."+utostr(i), - MI); + Value *Idx[2] = { Zero, + ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; + OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, + OtherPtr->getName()+"."+Twine(i), + MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast(OtherPtr->getType()); if (const StructType *ST = @@ -782,7 +781,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, } else { const Type *EltTy = cast(OtherPtr->getType())->getElementType(); - EltOffset = TD->getTypePaddedSize(EltTy)*i; + EltOffset = TD->getTypeAllocSize(EltTy)*i; } // The alignment of the other pointer is the guaranteed alignment of the @@ -815,15 +814,13 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, // If the stored element is zero (common case), just store a null // constant. Constant *StoreVal; - if (ConstantInt *CI = dyn_cast(MI->getOperand(2))) { + if (ConstantInt *CI = dyn_cast(MI->getOperand(1))) { if (CI->isZero()) { StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { // If EltTy is a vector type, get the element type. - const Type *ValTy = EltTy; - if (const VectorType *VTy = dyn_cast(ValTy)) - ValTy = VTy->getElementType(); - + const Type *ValTy = EltTy->getScalarType(); + // Construct an integer with the right value. unsigned EltSize = TD->getTypeSizeInBits(ValTy); APInt OneVal(EltSize, CI->getZExtValue()); @@ -835,10 +832,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, } // Convert the integer value to the appropriate type. - StoreVal = ConstantInt::get(TotalVal); - if (isa(ValTy)) + StoreVal = ConstantInt::get(Context, TotalVal); + if (ValTy->isPointerTy()) StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); - else if (ValTy->isFloatingPoint()) + else if (ValTy->isFloatingPointTy()) StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); assert(StoreVal->getType() == ValTy && "Type mismatch!"); @@ -858,57 +855,76 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, // Cast the element pointer to BytePtrTy. if (EltPtr->getType() != BytePtrTy) - EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI); + EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI); // Cast the other pointer (if we have one) to BytePtrTy. - if (OtherElt && OtherElt->getType() != BytePtrTy) - OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(), - MI); + if (OtherElt && OtherElt->getType() != BytePtrTy) { + // Preserve address space of OtherElt + const PointerType* OtherPTy = cast(OtherElt->getType()); + const PointerType* PTy = cast(BytePtrTy); + if (OtherPTy->getElementType() != PTy->getElementType()) { + Type *NewOtherPTy = PointerType::get(PTy->getElementType(), + OtherPTy->getAddressSpace()); + OtherElt = new BitCastInst(OtherElt, NewOtherPTy, + OtherElt->getNameStr(), MI); + } + } - unsigned EltSize = TD->getTypePaddedSize(EltTy); + unsigned EltSize = TD->getTypeAllocSize(EltTy); // Finally, insert the meminst for this element. if (isa(MI)) { Value *Ops[] = { SROADest ? EltPtr : OtherElt, // Dest ptr SROADest ? OtherElt : EltPtr, // Src ptr - ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size - ConstantInt::get(Type::Int32Ty, OtherEltAlign) // Align + ConstantInt::get(MI->getOperand(2)->getType(), EltSize), // Size + // Align + ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign), + MI->getVolatileCst() }; - CallInst::Create(TheFn, Ops, Ops + 4, "", MI); + // In case we fold the address space overloaded memcpy of A to B + // with memcpy of B to C, change the function to be a memcpy of A to C. + const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(), + Ops[2]->getType() }; + Module *M = MI->getParent()->getParent()->getParent(); + TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3); + CallInst::Create(TheFn, Ops, Ops + 5, "", MI); } else { assert(isa(MI)); Value *Ops[] = { - EltPtr, MI->getOperand(2), // Dest, Value, - ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size - Zero // Align + EltPtr, MI->getOperand(1), // Dest, Value, + ConstantInt::get(MI->getOperand(2)->getType(), EltSize), // Size + Zero, // Align + ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile }; - CallInst::Create(TheFn, Ops, Ops + 4, "", MI); + const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; + Module *M = MI->getParent()->getParent()->getParent(); + TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); + CallInst::Create(TheFn, Ops, Ops + 5, "", MI); } } - MI->eraseFromParent(); + DeadInsts.push_back(MI); } -/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that +/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that /// overwrites the entire allocation. Extract out the pieces of the stored /// integer and store them individually. -void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, - AllocationInst *AI, +void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, SmallVector &NewElts){ // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getType()->getElementType(); - uint64_t AllocaSizeBits = TD->getTypePaddedSizeInBits(AllocaEltTy); + const Type *AllocaEltTy = AI->getAllocatedType(); + uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - // If this isn't a store of an integer to the whole alloca, it may be a store - // to the first element. Just ignore the store in this case and normal SROA - // will handle it. - if (!isa(SrcVal->getType()) || - TD->getTypePaddedSizeInBits(SrcVal->getType()) != AllocaSizeBits) - return; + // Handle tail padding by extending the operand + if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) + SrcVal = new ZExtInst(SrcVal, + IntegerType::get(SI->getContext(), AllocaSizeBits), + "", SI); - DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI; + DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI + << '\n'); // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. @@ -921,7 +937,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, uint64_t Shift = Layout->getElementOffsetInBits(i); if (TD->isBigEndian()) - Shift = AllocaSizeBits-Shift-TD->getTypePaddedSizeInBits(FieldTy); + Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); Value *EltVal = SrcVal; if (Shift) { @@ -937,17 +953,19 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, if (FieldSizeBits == 0) continue; if (FieldSizeBits != AllocaSizeBits) - EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI); + EltVal = new TruncInst(EltVal, + IntegerType::get(SI->getContext(), FieldSizeBits), + "", SI); Value *DestField = NewElts[i]; if (EltVal->getType() == FieldTy) { // Storing to an integer field of this size, just do it. - } else if (FieldTy->isFloatingPoint() || isa(FieldTy)) { + } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { // Bitcast to the right element type (for fp/vector values). EltVal = new BitCastInst(EltVal, FieldTy, "", SI); } else { // Otherwise, bitcast the dest pointer (for aggregates). DestField = new BitCastInst(DestField, - PointerType::getUnqual(EltVal->getType()), + PointerType::getUnqual(EltVal->getType()), "", SI); } new StoreInst(EltVal, DestField, SI); @@ -956,7 +974,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, } else { const ArrayType *ATy = cast(AllocaEltTy); const Type *ArrayEltTy = ATy->getElementType(); - uint64_t ElementOffset = TD->getTypePaddedSizeInBits(ArrayEltTy); + uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); uint64_t Shift; @@ -979,17 +997,20 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, // Truncate down to an integer of the right size. if (ElementSizeBits != AllocaSizeBits) - EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI); + EltVal = new TruncInst(EltVal, + IntegerType::get(SI->getContext(), + ElementSizeBits),"",SI); Value *DestField = NewElts[i]; if (EltVal->getType() == ArrayEltTy) { // Storing to an integer field of this size, just do it. - } else if (ArrayEltTy->isFloatingPoint() || isa(ArrayEltTy)) { + } else if (ArrayEltTy->isFloatingPointTy() || + ArrayEltTy->isVectorTy()) { // Bitcast to the right element type (for fp/vector values). EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); } else { // Otherwise, bitcast the dest pointer (for aggregates). DestField = new BitCastInst(DestField, - PointerType::getUnqual(EltVal->getType()), + PointerType::getUnqual(EltVal->getType()), "", SI); } new StoreInst(EltVal, DestField, SI); @@ -1001,26 +1022,20 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, } } - SI->eraseFromParent(); + DeadInsts.push_back(SI); } -/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to +/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to /// an integer. Load the individual pieces to form the aggregate value. -void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, +void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getType()->getElementType(); - uint64_t AllocaSizeBits = TD->getTypePaddedSizeInBits(AllocaEltTy); + const Type *AllocaEltTy = AI->getAllocatedType(); + uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - // If this isn't a load of the whole alloca to an integer, it may be a load - // of the first element. Just ignore the load in this case and normal SROA - // will handle it. - if (!isa(LI->getType()) || - TD->getTypePaddedSizeInBits(LI->getType()) != AllocaSizeBits) - return; - - DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI; + DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI + << '\n'); // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. @@ -1030,10 +1045,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, Layout = TD->getStructLayout(EltSTy); } else { const Type *ArrayEltTy = cast(AllocaEltTy)->getElementType(); - ArrayEltBitOffset = TD->getTypePaddedSizeInBits(ArrayEltTy); + ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); } - - Value *ResultVal = Constant::getNullValue(LI->getType()); + + Value *ResultVal = + Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Load the value from the alloca. If the NewElt is an aggregate, cast @@ -1046,10 +1062,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - const IntegerType *FieldIntTy = IntegerType::get(FieldSizeBits); - if (!isa(FieldTy) && !FieldTy->isFloatingPoint() && - !isa(FieldTy)) - SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy), + const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), + FieldSizeBits); + if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && + !FieldTy->isVectorTy()) + SrcField = new BitCastInst(SrcField, + PointerType::getUnqual(FieldIntTy), "", LI); SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); @@ -1080,12 +1098,15 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); } - + + // Handle tail padding by truncating the result + if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) + ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); + LI->replaceAllUsesWith(ResultVal); - LI->eraseFromParent(); + DeadInsts.push_back(LI); } - /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { @@ -1124,25 +1145,21 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) { } else if (const VectorType *VTy = dyn_cast(Ty)) { return HasPadding(VTy->getElementType(), TD); } - return TD.getTypeSizeInBits(Ty) != TD.getTypePaddedSizeInBits(Ty); + return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); } /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, /// or 1 if safe after canonicalization has been performed. -/// -int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) { +bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // Loop over the use list of the alloca. We can only transform it if all of // the users are safe to transform. AllocaInfo Info; - for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); - I != E; ++I) { - isSafeUseOfAllocation(cast(*I), AI, Info); - if (Info.isUnsafe) { - DOUT << "Cannot transform: " << *AI << " due to user: " << **I; - return 0; - } + isSafeForScalarRepl(AI, AI, 0, Info); + if (Info.isUnsafe) { + DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); + return false; } // Okay, we know all the users are promotable. If the aggregate is a memcpy @@ -1151,88 +1168,10 @@ int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) { // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getType()->getElementType(), *TD)) - return 0; - - // If we require cleanup, return 1, otherwise return 3. - return Info.needsCleanup ? 1 : 3; -} - -/// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP -/// is canonicalized here. -void SROA::CleanupGEP(GetElementPtrInst *GEPI) { - gep_type_iterator I = gep_type_begin(GEPI); - ++I; - - const ArrayType *AT = dyn_cast(*I); - if (!AT) - return; - - uint64_t NumElements = AT->getNumElements(); - - if (isa(I.getOperand())) - return; - - if (NumElements == 1) { - GEPI->setOperand(2, Constant::getNullValue(Type::Int32Ty)); - return; - } - - assert(NumElements == 2 && "Unhandled case!"); - // All users of the GEP must be loads. At each use of the GEP, insert - // two loads of the appropriate indexed GEP and select between them. - Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(), - Constant::getNullValue(I.getOperand()->getType()), - "isone", GEPI); - // Insert the new GEP instructions, which are properly indexed. - SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); - Indices[1] = Constant::getNullValue(Type::Int32Ty); - Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".0", GEPI); - Indices[1] = ConstantInt::get(Type::Int32Ty, 1); - Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".1", GEPI); - // Replace all loads of the variable index GEP with loads from both - // indexes and a select. - while (!GEPI->use_empty()) { - LoadInst *LI = cast(GEPI->use_back()); - Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI); - Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI); - Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI); - LI->replaceAllUsesWith(R); - LI->eraseFromParent(); - } - GEPI->eraseFromParent(); -} - + HasPadding(AI->getAllocatedType(), *TD)) + return false; -/// CleanupAllocaUsers - If SROA reported that it can promote the specified -/// allocation, but only if cleaned up, perform the cleanups required. -void SROA::CleanupAllocaUsers(AllocationInst *AI) { - // At this point, we know that the end result will be SROA'd and promoted, so - // we can insert ugly code if required so long as sroa+mem2reg will clean it - // up. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E; ) { - User *U = *UI++; - if (GetElementPtrInst *GEPI = dyn_cast(U)) - CleanupGEP(GEPI); - else if (Instruction *I = dyn_cast(U)) { - SmallVector DbgInUses; - if (OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { - // Safe to remove debug info uses. - while (!DbgInUses.empty()) { - DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back(); - DI->eraseFromParent(); - } - I->eraseFromParent(); - } - } - } + return true; } /// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at @@ -1247,9 +1186,10 @@ void SROA::CleanupAllocaUsers(AllocationInst *AI) { /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, - unsigned AllocaSize, const TargetData &TD) { + unsigned AllocaSize, const TargetData &TD, + LLVMContext &Context) { // If this could be contributing to a vector, analyze it. - if (VecTy != Type::VoidTy) { // either null or a vector type. + if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type. // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. @@ -1262,8 +1202,8 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, VecTy = VInTy; return; } - } else if (In == Type::FloatTy || In == Type::DoubleTy || - (isa(In) && In->getPrimitiveSizeInBits() >= 8 && + } else if (In->isFloatTy() || In->isDoubleTy() || + (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is @@ -1283,11 +1223,11 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, // Otherwise, we have a case that we can't handle with an optimized vector // form. We can still turn this into a large integer. - VecTy = Type::VoidTy; + VecTy = Type::getVoidTy(Context); } /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all -/// its accesses to use a to single vector type, return true, and set VecTy to +/// its accesses to a single vector type, return true and set VecTy to /// the new type. If we could convert the alloca into a single promotable /// integer, return true but set VecTy to VoidTy. Further, if the use is not a /// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset @@ -1295,7 +1235,6 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, /// /// If we see at least one access to the value that is as a vector type, set the /// SawVec flag. -/// bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, bool &SawVec, uint64_t Offset, unsigned AllocaSize) { @@ -1306,16 +1245,18 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Don't break volatile loads. if (LI->isVolatile()) return false; - MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD); - SawVec |= isa(LI->getType()); + MergeInType(LI->getType(), Offset, VecTy, + AllocaSize, *TD, V->getContext()); + SawVec |= LI->getType()->isVectorTy(); continue; } if (StoreInst *SI = dyn_cast(User)) { // Storing the pointer, not into the value? if (SI->getOperand(0) == V || SI->isVolatile()) return 0; - MergeInType(SI->getOperand(0)->getType(), Offset, VecTy, AllocaSize, *TD); - SawVec |= isa(SI->getOperand(0)->getType()); + MergeInType(SI->getOperand(0)->getType(), Offset, + VecTy, AllocaSize, *TD, V->getContext()); + SawVec |= SI->getOperand(0)->getType()->isVectorTy(); continue; } @@ -1334,7 +1275,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), &Indices[0], Indices.size()); // See if all uses can be converted. if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, @@ -1350,8 +1291,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Store of constant value and constant size. if (isa(MSI->getValue()) && isa(MSI->getLength())) { - // FIXME (!): Why reset VecTy? - VecTy = Type::VoidTy; IsNotTrivial = true; continue; } @@ -1367,10 +1306,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, } } - // Ignore dbg intrinsic. - if (isa(User)) - continue; - // Otherwise, we cannot handle this! return false; } @@ -1378,7 +1313,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, return true; } - /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca /// directly. This happens when we are converting an "integer union" to a /// single integer scalar, or when we are converting a "vector union" to a @@ -1399,7 +1333,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), &Indices[0], Indices.size()); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); @@ -1420,11 +1354,16 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (StoreInst *SI = dyn_cast(User)) { assert(SI->getOperand(0) != Ptr && "Consistency error!"); - Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str()); + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, Builder); Builder.CreateStore(New, NewAI); SI->eraseFromParent(); + + // If the load we just inserted is now dead, then the inserted store + // overwrote the entire thing. + if (Old->use_empty()) + Old->eraseFromParent(); continue; } @@ -1433,20 +1372,28 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (MemSetInst *MSI = dyn_cast(User)) { assert(MSI->getRawDest() == Ptr && "Consistency error!"); unsigned NumBytes = cast(MSI->getLength())->getZExtValue(); - unsigned Val = cast(MSI->getValue())->getZExtValue(); - - // Compute the value replicated the right number of times. - APInt APVal(NumBytes*8, Val); + if (NumBytes != 0) { + unsigned Val = cast(MSI->getValue())->getZExtValue(); + + // Compute the value replicated the right number of times. + APInt APVal(NumBytes*8, Val); - // Splat the value if non-zero. - if (Val) - for (unsigned i = 1; i != NumBytes; ++i) - APVal |= APVal << 8; - - Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str()); - Value *New = ConvertScalar_InsertValue(ConstantInt::get(APVal), Old, - Offset, Builder); - Builder.CreateStore(New, NewAI); + // Splat the value if non-zero. + if (Val) + for (unsigned i = 1; i != NumBytes; ++i) + APVal |= APVal << 8; + + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); + Value *New = ConvertScalar_InsertValue( + ConstantInt::get(User->getContext(), APVal), + Old, Offset, Builder); + Builder.CreateStore(New, NewAI); + + // If the load we just inserted is now dead, then the memset overwrote + // the entire thing. + if (Old->use_empty()) + Old->eraseFromParent(); + } MSI->eraseFromParent(); continue; } @@ -1459,9 +1406,9 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { // If the source and destination are both to the same alloca, then this is // a noop copy-to-self, just delete it. Otherwise, emit a load and store // as appropriate. - AllocaInst *OrigAI = cast(Ptr->getUnderlyingObject()); + AllocaInst *OrigAI = cast(Ptr->getUnderlyingObject(0)); - if (MTI->getSource()->getUnderlyingObject() != OrigAI) { + if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) { // Dest must be OrigAI, change this to be a load from the original // pointer (bitcasted), then a store to our new alloca. assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); @@ -1471,7 +1418,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); SrcVal->setAlignment(MTI->getAlignment()); Builder.CreateStore(SrcVal, NewAI); - } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) { + } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) { // Src must be OrigAI, change this to be a load from NewAI then a store // through the original dest pointer (bitcasted). assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); @@ -1483,20 +1430,12 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { } else { // Noop transfer. Src == Dst } - MTI->eraseFromParent(); continue; } - // If user is a dbg info intrinsic then it is safe to remove it. - if (isa(User)) { - User->eraseFromParent(); - continue; - } - - assert(0 && "Unsupported operation!"); - abort(); + llvm_unreachable("Unsupported operation!"); } } @@ -1519,20 +1458,19 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. if (const VectorType *VTy = dyn_cast(FromVal->getType())) { - if (isa(ToType)) + if (ToType->isVectorTy()) return Builder.CreateBitCast(FromVal, ToType, "tmp"); // Otherwise it must be an element access. unsigned Elt = 0; if (Offset) { - unsigned EltSize = TD->getTypePaddedSizeInBits(VTy->getElementType()); + unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); Elt = Offset/EltSize; assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, - ConstantInt::get(Type::Int32Ty,Elt), - "tmp"); + Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( + Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); if (V->getType() != ToType) V = Builder.CreateBitCast(V, ToType, "tmp"); return V; @@ -1553,7 +1491,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, } if (const ArrayType *AT = dyn_cast(ToType)) { - uint64_t EltSize = TD->getTypePaddedSizeInBits(AT->getElementType()); + uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), @@ -1583,23 +1521,29 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // We do this to support (f.e.) loads off the end of a structure where // only some bits are used. if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) - FromVal = Builder.CreateLShr(FromVal, ConstantInt::get(FromVal->getType(), + FromVal = Builder.CreateLShr(FromVal, + ConstantInt::get(FromVal->getType(), ShAmt), "tmp"); else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) - FromVal = Builder.CreateShl(FromVal, ConstantInt::get(FromVal->getType(), + FromVal = Builder.CreateShl(FromVal, + ConstantInt::get(FromVal->getType(), -ShAmt), "tmp"); // Finally, unconditionally truncate the integer to the right width. unsigned LIBitWidth = TD->getTypeSizeInBits(ToType); if (LIBitWidth < NTy->getBitWidth()) - FromVal = Builder.CreateTrunc(FromVal, IntegerType::get(LIBitWidth), "tmp"); + FromVal = + Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), + LIBitWidth), "tmp"); else if (LIBitWidth > NTy->getBitWidth()) - FromVal = Builder.CreateZExt(FromVal, IntegerType::get(LIBitWidth), "tmp"); + FromVal = + Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), + LIBitWidth), "tmp"); // If the result is an integer, this is a trunc or bitcast. - if (isa(ToType)) { + if (ToType->isIntegerTy()) { // Should be done. - } else if (ToType->isFloatingPoint() || isa(ToType)) { + } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { // Just do a bitcast, we know the sizes match up. FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); } else { @@ -1610,7 +1554,6 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, return FromVal; } - /// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer /// or vector value "Old" at the offset specified by Offset. /// @@ -1626,23 +1569,28 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. const Type *AllocaType = Old->getType(); + LLVMContext &Context = Old->getContext(); if (const VectorType *VTy = dyn_cast(AllocaType)) { - // If the result alloca is a vector type, this is either an element - // access or a bitcast to another vector type. - if (isa(SV->getType())) { - SV = Builder.CreateBitCast(SV, AllocaType, "tmp"); - } else { - // Must be an element insertion. - unsigned Elt = Offset/TD->getTypePaddedSizeInBits(VTy->getElementType()); - - if (SV->getType() != VTy->getElementType()) - SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); - - SV = Builder.CreateInsertElement(Old, SV, - ConstantInt::get(Type::Int32Ty, Elt), - "tmp"); - } + uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy); + uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType()); + + // Changing the whole vector with memset or with an access of a different + // vector type? + if (ValSize == VecSize) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + + uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); + + // Must be an element insertion. + unsigned Elt = Offset/EltSize; + + if (SV->getType() != VTy->getElementType()) + SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); + + SV = Builder.CreateInsertElement(Old, SV, + ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), + "tmp"); return SV; } @@ -1659,7 +1607,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, } if (const ArrayType *AT = dyn_cast(SV->getType())) { - uint64_t EltSize = TD->getTypePaddedSizeInBits(AT->getElementType()); + uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); @@ -1673,10 +1621,11 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, unsigned DestWidth = TD->getTypeSizeInBits(AllocaType); unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); - if (SV->getType()->isFloatingPoint() || isa(SV->getType())) - SV = Builder.CreateBitCast(SV, IntegerType::get(SrcWidth), "tmp"); - else if (isa(SV->getType())) - SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(), "tmp"); + if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) + SV = Builder.CreateBitCast(SV, + IntegerType::get(SV->getContext(),SrcWidth), "tmp"); + else if (SV->getType()->isPointerTy()) + SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp"); // Zero extend or truncate the value if needed. if (SV->getType() != AllocaType) { @@ -1709,10 +1658,12 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, // only some bits in the structure are set. APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { - SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt), "tmp"); + SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), + ShAmt), "tmp"); Mask <<= ShAmt; } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { - SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt), "tmp"); + SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), + -ShAmt), "tmp"); Mask = Mask.lshr(-ShAmt); } @@ -1720,7 +1671,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, // in the new bits. if (SrcWidth != DestWidth) { assert(DestWidth > SrcWidth); - Old = Builder.CreateAnd(Old, ConstantInt::get(~Mask), "mask"); + Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); SV = Builder.CreateOr(Old, SV, "ins"); } return SV; @@ -1751,18 +1702,20 @@ static bool PointsToConstantGlobal(Value *V) { static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, bool isOffset) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { - if (LoadInst *LI = dyn_cast(*UI)) + User *U = cast(*UI); + + if (LoadInst *LI = dyn_cast(U)) // Ignore non-volatile loads, they are always ok. if (!LI->isVolatile()) continue; - if (BitCastInst *BCI = dyn_cast(*UI)) { + if (BitCastInst *BCI = dyn_cast(U)) { // If uses of the bitcast are ok, we are ok. if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) return false; continue; } - if (GetElementPtrInst *GEP = dyn_cast(*UI)) { + if (GetElementPtrInst *GEP = dyn_cast(U)) { // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, @@ -1773,7 +1726,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, // If this is isn't our memcpy/memmove, reject it as something we can't // handle. - if (!isa(*UI)) + if (!isa(U)) return false; // If we already have seen a copy, reject the second one. @@ -1784,12 +1737,12 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, if (isOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != 1) return false; + if (UI.getOperandNo() != 0) return false; - MemIntrinsic *MI = cast(*UI); + MemIntrinsic *MI = cast(U); // If the source of the memcpy/move is not a constant global, reject it. - if (!PointsToConstantGlobal(MI->getOperand(2))) + if (!PointsToConstantGlobal(MI->getOperand(1))) return false; // Otherwise, the transform is safe. Remember the copy instruction. @@ -1801,7 +1754,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only /// modified by a copy from a constant global. If we can prove this, we can /// replace any uses of the alloca with uses of the global directly. -Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocationInst *AI) { +Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { Instruction *TheCopy = 0; if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) return TheCopy;