X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FIPO%2FGlobalOpt.cpp;h=a77af549caa131579abb709f240966e921926781;hb=9ccaf53ada99c63737547c0235baeb8454b04e80;hp=49e9683a556f6ba86222f5e80dcf677e7ed437c5;hpb=b3d5a65d94ca017570fe86578423186c6a2f642e;p=oota-llvm.git diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 49e9683a556..a77af549caa 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -20,7 +20,6 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" @@ -60,7 +59,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - GlobalOpt() : ModulePass(&ID) {} + GlobalOpt() : ModulePass(ID) {} bool runOnModule(Module &M); @@ -75,7 +74,8 @@ namespace { } char GlobalOpt::ID = 0; -static RegisterPass X("globalopt", "Global Variable Optimizer"); +INITIALIZE_PASS(GlobalOpt, "globalopt", + "Global Variable Optimizer", false, false); ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } @@ -120,7 +120,7 @@ struct GlobalStatus { /// null/false. When the first accessing function is noticed, it is recorded. /// When a second different accessing function is noticed, /// HasMultipleAccessingFunctions is set to true. - Function *AccessingFunction; + const Function *AccessingFunction; bool HasMultipleAccessingFunctions; /// HasNonInstructionUser - Set to true if this global has a user that is not @@ -141,11 +141,12 @@ struct GlobalStatus { // by constants itself. Note that constants cannot be cyclic, so this test is // pretty easy to implement recursively. // -static bool SafeToDestroyConstant(Constant *C) { +static bool SafeToDestroyConstant(const Constant *C) { if (isa(C)) return false; - for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) - if (Constant *CU = dyn_cast(*UI)) { + for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; + ++UI) + if (const Constant *CU = dyn_cast(*UI)) { if (!SafeToDestroyConstant(CU)) return false; } else return false; @@ -157,26 +158,26 @@ static bool SafeToDestroyConstant(Constant *C) { /// structure. If the global has its address taken, return true to indicate we /// can't do anything with it. /// -static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, - SmallPtrSet &PHIUsers) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (ConstantExpr *CE = dyn_cast(*UI)) { +static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, + SmallPtrSet &PHIUsers) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + if (const ConstantExpr *CE = dyn_cast(U)) { GS.HasNonInstructionUser = true; - if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; - - } else if (Instruction *I = dyn_cast(*UI)) { + } else if (const Instruction *I = dyn_cast(U)) { if (!GS.HasMultipleAccessingFunctions) { - Function *F = I->getParent()->getParent(); + const Function *F = I->getParent()->getParent(); if (GS.AccessingFunction == 0) GS.AccessingFunction = F; else if (GS.AccessingFunction != F) GS.HasMultipleAccessingFunctions = true; } - if (LoadInst *LI = dyn_cast(I)) { + if (const LoadInst *LI = dyn_cast(I)) { GS.isLoaded = true; if (LI->isVolatile()) return true; // Don't hack on volatile loads. - } else if (StoreInst *SI = dyn_cast(I)) { + } else if (const StoreInst *SI = dyn_cast(I)) { // Don't allow a store OF the address, only stores TO the address. if (SI->getOperand(0) == V) return true; @@ -186,14 +187,14 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, // value, not an aggregate), keep more specific information about // stores. if (GS.StoredType != GlobalStatus::isStored) { - if (GlobalVariable *GV = dyn_cast(SI->getOperand(1))){ + if (const GlobalVariable *GV = dyn_cast( + SI->getOperand(1))) { Value *StoredVal = SI->getOperand(0); if (StoredVal == GV->getInitializer()) { if (GS.StoredType < GlobalStatus::isInitializerStored) GS.StoredType = GlobalStatus::isInitializerStored; } else if (isa(StoredVal) && cast(StoredVal)->getOperand(0) == GV) { - // G = G if (GS.StoredType < GlobalStatus::isInitializerStored) GS.StoredType = GlobalStatus::isInitializerStored; } else if (GS.StoredType < GlobalStatus::isStoredOnce) { @@ -213,25 +214,28 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, if (AnalyzeGlobal(I, GS, PHIUsers)) return true; } else if (isa(I)) { if (AnalyzeGlobal(I, GS, PHIUsers)) return true; - } else if (PHINode *PN = dyn_cast(I)) { + } else if (const PHINode *PN = dyn_cast(I)) { // PHI nodes we can check just like select or GEP instructions, but we // have to be careful about infinite recursion. if (PHIUsers.insert(PN)) // Not already visited. if (AnalyzeGlobal(I, GS, PHIUsers)) return true; GS.HasPHIUser = true; } else if (isa(I)) { + // Nothing to analyse. } else if (isa(I)) { - if (I->getOperand(1) == V) + const MemTransferInst *MTI = cast(I); + if (MTI->getArgOperand(0) == V) GS.StoredType = GlobalStatus::isStored; - if (I->getOperand(2) == V) + if (MTI->getArgOperand(1) == V) GS.isLoaded = true; } else if (isa(I)) { - assert(I->getOperand(1) == V && "Memset only takes one pointer!"); + assert(cast(I)->getArgOperand(0) == V && + "Memset only takes one pointer!"); GS.StoredType = GlobalStatus::isStored; } else { return true; // Any other non-load instruction might take address! } - } else if (Constant *C = dyn_cast(*UI)) { + } else if (const Constant *C = dyn_cast(U)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. if (!SafeToDestroyConstant(C)) @@ -241,12 +245,12 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, // Otherwise must be some other user. return true; } + } return false; } -static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx, - LLVMContext &Context) { +static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { ConstantInt *CI = dyn_cast(Idx); if (!CI) return 0; unsigned IdxV = CI->getZExtValue(); @@ -282,8 +286,7 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx, /// users of the global, cleaning up the obvious ones. This is largely just a /// quick scan over the use list to clean up the easy and obvious cruft. This /// returns true if it made a change. -static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, - LLVMContext &Context) { +static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { bool Changed = false; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { User *U = *UI++; @@ -304,11 +307,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Constant *SubInit = 0; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); - Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context); + Changed |= CleanupConstantGlobalUsers(CE, SubInit); } else if (CE->getOpcode() == Instruction::BitCast && - isa(CE->getType())) { + CE->getType()->isPointerTy()) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, 0, Context); + Changed |= CleanupConstantGlobalUsers(CE, 0); } if (CE->use_empty()) { @@ -322,11 +325,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Constant *SubInit = 0; if (!isa(GEP->getOperand(0))) { ConstantExpr *CE = - dyn_cast_or_null(ConstantFoldInstruction(GEP, Context)); + dyn_cast_or_null(ConstantFoldInstruction(GEP)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); } - Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context); + Changed |= CleanupConstantGlobalUsers(GEP, SubInit); if (GEP->use_empty()) { GEP->eraseFromParent(); @@ -344,7 +347,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, if (SafeToDestroyConstant(C)) { C->destroyConstant(); // This could have invalidated UI, start over from scratch. - CleanupConstantGlobalUsers(V, Init, Context); + CleanupConstantGlobalUsers(V, Init); return true; } } @@ -434,7 +437,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { else if (const VectorType *SubVectorTy = dyn_cast(*GEPI)) NumElements = SubVectorTy->getNumElements(); else { - assert(isa(*GEPI) && + assert((*GEPI)->isStructTy() && "Indexed GEP type is not array, vector, or struct!"); continue; } @@ -469,8 +472,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { /// behavior of the program in a more fine-grained way. We have determined that /// this transformation is safe already. We return the first global variable we /// insert so that the caller can reprocess it. -static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, - LLVMContext &Context) { +static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) return 0; @@ -492,11 +494,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::getInt32Ty(Context), i), - Context); + ConstantInt::get(Type::getInt32Ty(STy->getContext()), i)); assert(In && "Couldn't get element of initializer?"); - GlobalVariable *NGV = new GlobalVariable(Context, - STy->getElementType(i), false, + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, GlobalVariable::InternalLinkage, In, GV->getName()+"."+Twine(i), GV->isThreadLocal(), @@ -527,12 +527,10 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::getInt32Ty(Context), i), - Context); + ConstantInt::get(Type::getInt32Ty(Init->getContext()), i)); assert(In && "Couldn't get element of initializer?"); - GlobalVariable *NGV = new GlobalVariable(Context, - STy->getElementType(), false, + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, GlobalVariable::InternalLinkage, In, GV->getName()+"."+Twine(i), GV->isThreadLocal(), @@ -551,10 +549,10 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, if (NewGlobals.empty()) return 0; + + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); - DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV); - - Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context)); + Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); // Loop over all of the uses of the global, replacing the constantexpr geps, // with smaller constantexpr geps or direct references. @@ -619,67 +617,73 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified /// value will trap if the value is dynamically null. PHIs keeps track of any /// phi nodes we've seen to avoid reprocessing them. -static bool AllUsesOfValueWillTrapIfNull(Value *V, - SmallPtrSet &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (isa(*UI)) { +static bool AllUsesOfValueWillTrapIfNull(const Value *V, + SmallPtrSet &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + + if (isa(U)) { // Will trap. - } else if (StoreInst *SI = dyn_cast(*UI)) { + } else if (const StoreInst *SI = dyn_cast(U)) { if (SI->getOperand(0) == V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Storing the value. } - } else if (CallInst *CI = dyn_cast(*UI)) { - if (CI->getOperand(0) != V) { - //cerr << "NONTRAPPING USE: " << **UI; + } else if (const CallInst *CI = dyn_cast(U)) { + if (CI->getCalledValue() != V) { + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (InvokeInst *II = dyn_cast(*UI)) { - if (II->getOperand(0) != V) { - //cerr << "NONTRAPPING USE: " << **UI; + } else if (const InvokeInst *II = dyn_cast(U)) { + if (II->getCalledValue() != V) { + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (BitCastInst *CI = dyn_cast(*UI)) { + } else if (const BitCastInst *CI = dyn_cast(U)) { if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; - } else if (GetElementPtrInst *GEPI = dyn_cast(*UI)) { + } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; - } else if (PHINode *PN = dyn_cast(*UI)) { + } else if (const PHINode *PN = dyn_cast(U)) { // If we've already seen this phi node, ignore it, it has already been // checked. - if (PHIs.insert(PN)) - return AllUsesOfValueWillTrapIfNull(PN, PHIs); - } else if (isa(*UI) && + if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) + return false; + } else if (isa(U) && isa(UI->getOperand(1))) { - // Ignore setcc X, null + // Ignore icmp X, null } else { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; } + } return true; } /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads /// from GV will trap if the loaded value is null. Note that this also permits /// comparisons of the loaded value against null, as a special case. -static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI) - if (LoadInst *LI = dyn_cast(*UI)) { - SmallPtrSet PHIs; +static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) { + const User *U = *UI; + + if (const LoadInst *LI = dyn_cast(U)) { + SmallPtrSet PHIs; if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) return false; - } else if (isa(*UI)) { + } else if (isa(U)) { // Ignore stores to the global. } else { // We don't know or understand this user, bail out. - //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI; + //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; return false; } - + } return true; } -static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, - LLVMContext &Context) { +static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { bool Changed = false; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { Instruction *I = cast(*UI++); @@ -692,16 +696,17 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, Changed = true; } } else if (isa(I) || isa(I)) { - if (I->getOperand(0) == V) { + CallSite CS(I); + if (CS.getCalledValue() == V) { // Calling through the pointer! Turn into a direct call, but be careful // that the pointer is not also being passed as an argument. - I->setOperand(0, NewV); + CS.setCalledFunction(NewV); Changed = true; bool PassedAsArg = false; - for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) - if (I->getOperand(i) == V) { + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.getArgument(i) == V) { PassedAsArg = true; - I->setOperand(i, NewV); + CS.setArgument(i, NewV); } if (PassedAsArg) { @@ -712,7 +717,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, } else if (CastInst *CI = dyn_cast(I)) { Changed |= OptimizeAwayTrappingUsesOfValue(CI, ConstantExpr::getCast(CI->getOpcode(), - NewV, CI->getType()), Context); + NewV, CI->getType())); if (CI->use_empty()) { Changed = true; CI->eraseFromParent(); @@ -730,7 +735,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, if (Idxs.size() == GEPI->getNumOperands()-1) Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, ConstantExpr::getGetElementPtr(NewV, &Idxs[0], - Idxs.size()), Context); + Idxs.size())); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -746,8 +751,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, /// value stored into it. If there are uses of the loaded value that would trap /// if the loaded value is dynamically null, then we know that they cannot be /// reachable with a null optimize away the load. -static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, - LLVMContext &Context) { +static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { bool Changed = false; // Keep track of whether we are able to remove all the uses of the global @@ -758,7 +762,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ User *GlobalUser = *GUI++; if (LoadInst *LI = dyn_cast(GlobalUser)) { - Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context); + Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); // If we were able to delete all uses of the loads if (LI->use_empty()) { LI->eraseFromParent(); @@ -781,15 +785,15 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, } if (Changed) { - DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); + DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); ++NumGlobUses; } // If we nuked all of the loads, then none of the stores are needed either, // nor is the global. if (AllNonStoreUsesGone) { - DEBUG(errs() << " *** GLOBAL NOW DEAD!\n"); - CleanupConstantGlobalUsers(GV, 0, Context); + DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); + CleanupConstantGlobalUsers(GV, 0); if (GV->use_empty()) { GV->eraseFromParent(); ++NumDeleted; @@ -801,10 +805,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the /// instructions that are foldable. -static void ConstantPropUsersOf(Value *V, LLVMContext &Context) { +static void ConstantPropUsersOf(Value *V) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) if (Instruction *I = dyn_cast(*UI++)) - if (Constant *NewC = ConstantFoldInstruction(I, Context)) { + if (Constant *NewC = ConstantFoldInstruction(I)) { I->replaceAllUsesWith(NewC); // Advance UI to the next non-I use to avoid invalidating it! @@ -822,48 +826,48 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) { /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, - BitCastInst *BCI, - Value* NElems, - LLVMContext &Context, + const Type *AllocTy, + ConstantInt *NElements, TargetData* TD) { - DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV - << " CALL = " << *CI << " BCI = " << *BCI << '\n'); - - const Type *IntPtrTy = TD->getIntPtrType(Context); + DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - ConstantInt *NElements = cast(NElems); - if (NElements->getZExtValue() != 1) { - // If we have an array allocation, transform it to a single element - // allocation to make the code below simpler. - Type *NewTy = ArrayType::get(getMallocAllocatedType(CI), - NElements->getZExtValue()); - Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy); - Instruction* NewMI = cast(NewM); - Value* Indices[2]; - Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy); - Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, - NewMI->getName()+".el0", CI); - BCI->replaceAllUsesWith(NewGEP); - BCI->eraseFromParent(); - CI->eraseFromParent(); - BCI = cast(NewMI); - CI = extractMallocCallFromBitCast(NewMI); - } + const Type *GlobalType; + if (NElements->getZExtValue() == 1) + GlobalType = AllocTy; + else + // If we have an array allocation, the global variable is of an array. + GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - const Type *MAT = getMallocAllocatedType(CI); - Constant *Init = UndefValue::get(MAT); GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), - MAT, false, - GlobalValue::InternalLinkage, Init, + GlobalType, false, + GlobalValue::InternalLinkage, + UndefValue::get(GlobalType), GV->getName()+".body", GV, GV->isThreadLocal()); - // Anything that used the malloc now uses the global directly. - BCI->replaceAllUsesWith(NewGV); - + // If there are bitcast users of the malloc (which is typical, usually we have + // a malloc + bitcast) then replace them with uses of the new global. Update + // other users to use the global as well. + BitCastInst *TheBC = 0; + while (!CI->use_empty()) { + Instruction *User = cast(CI->use_back()); + if (BitCastInst *BCI = dyn_cast(User)) { + if (BCI->getType() == NewGV->getType()) { + BCI->replaceAllUsesWith(NewGV); + BCI->eraseFromParent(); + } else { + BCI->setOperand(0, NewGV); + } + } else { + if (TheBC == 0) + TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); + User->replaceUsesOfWith(CI, TheBC); + } + } + Constant *RepValue = NewGV; if (NewGV->getType() != GV->getType()->getElementType()) RepValue = ConstantExpr::getBitCast(RepValue, @@ -872,75 +876,75 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // If there is a comparison against null, we will insert a global bool to // keep track of whether the global was initialized yet or not. GlobalVariable *InitBool = - new GlobalVariable(Context, Type::getInt1Ty(Context), false, + new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, GlobalValue::InternalLinkage, - ConstantInt::getFalse(Context), GV->getName()+".init", - GV->isThreadLocal()); + ConstantInt::getFalse(GV->getContext()), + GV->getName()+".init", GV->isThreadLocal()); bool InitBoolUsed = false; // Loop over all uses of GV, processing them in turn. - std::vector Stores; - while (!GV->use_empty()) - if (LoadInst *LI = dyn_cast(GV->use_back())) { - while (!LI->use_empty()) { - Use &LoadUse = LI->use_begin().getUse(); - if (!isa(LoadUse.getUser())) - LoadUse = RepValue; - else { - ICmpInst *ICI = cast(LoadUse.getUser()); - // Replace the cmp X, 0 with a use of the bool value. - Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); - InitBoolUsed = true; - switch (ICI->getPredicate()) { - default: llvm_unreachable("Unknown ICmp Predicate!"); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - LV = ConstantInt::getFalse(Context); // X < null -> always false - break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: - case ICmpInst::ICMP_EQ: - LV = BinaryOperator::CreateNot(LV, "notinit", ICI); - break; - case ICmpInst::ICMP_NE: - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - break; // no change. - } - ICI->replaceAllUsesWith(LV); - ICI->eraseFromParent(); - } - } - LI->eraseFromParent(); - } else { - StoreInst *SI = cast(GV->use_back()); + while (!GV->use_empty()) { + if (StoreInst *SI = dyn_cast(GV->use_back())) { // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); + new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI); SI->eraseFromParent(); + continue; } + + LoadInst *LI = cast(GV->use_back()); + while (!LI->use_empty()) { + Use &LoadUse = LI->use_begin().getUse(); + if (!isa(LoadUse.getUser())) { + LoadUse = RepValue; + continue; + } + + ICmpInst *ICI = cast(LoadUse.getUser()); + // Replace the cmp X, 0 with a use of the bool value. + Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); + InitBoolUsed = true; + switch (ICI->getPredicate()) { + default: llvm_unreachable("Unknown ICmp Predicate!"); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: // X < null -> always false + LV = ConstantInt::getFalse(GV->getContext()); + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_EQ: + LV = BinaryOperator::CreateNot(LV, "notinit", ICI); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + break; // no change. + } + ICI->replaceAllUsesWith(LV); + ICI->eraseFromParent(); + } + LI->eraseFromParent(); + } // If the initialization boolean was used, insert it, otherwise delete it. if (!InitBoolUsed) { while (!InitBool->use_empty()) // Delete initializations - cast(InitBool->use_back())->eraseFromParent(); + cast(InitBool->use_back())->eraseFromParent(); delete InitBool; } else GV->getParent()->getGlobalList().insert(GV, InitBool); - - // Now the GV is dead, nuke it and the malloc. + // Now the GV is dead, nuke it and the malloc.. GV->eraseFromParent(); - BCI->eraseFromParent(); CI->eraseFromParent(); // To further other optimizations, loop over all users of NewGV and try to // constant prop them. This will promote GEP instructions with constant // indices into GEP constant-exprs, which will allow global-opt to hack on it. - ConstantPropUsersOf(NewGV, Context); + ConstantPropUsersOf(NewGV); if (RepValue != NewGV) - ConstantPropUsersOf(RepValue, Context); + ConstantPropUsersOf(RepValue); return NewGV; } @@ -949,29 +953,31 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, /// to make sure that there are no complex uses of V. We permit simple things /// like dereferencing the pointer, but not storing through the address, unless /// it is to the specified global. -static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, - GlobalVariable *GV, - SmallPtrSet &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *Inst = cast(*UI); - +static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, + const GlobalVariable *GV, + SmallPtrSet &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + const Instruction *Inst = cast(*UI); + if (isa(Inst) || isa(Inst)) { continue; // Fine, ignore. } - if (StoreInst *SI = dyn_cast(Inst)) { + if (const StoreInst *SI = dyn_cast(Inst)) { if (SI->getOperand(0) == V && SI->getOperand(1) != GV) return false; // Storing the pointer itself... bad. continue; // Otherwise, storing through it, or storing into GV... fine. } - if (isa(Inst)) { + // Must index into the array and into the struct. + if (isa(Inst) && Inst->getNumOperands() >= 3) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) return false; continue; } - if (PHINode *PN = dyn_cast(Inst)) { + if (const PHINode *PN = dyn_cast(Inst)) { // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI // cycles. if (PHIs.insert(PN)) @@ -980,7 +986,7 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, continue; } - if (BitCastInst *BCI = dyn_cast(Inst)) { + if (const BitCastInst *BCI = dyn_cast(Inst)) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) return false; continue; @@ -1039,23 +1045,24 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, /// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi /// of a load) are simple enough to perform heap SRA on. This permits GEP's /// that index through the array and struct field, icmps of null, and PHIs. -static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, - SmallPtrSet &LoadUsingPHIs, - SmallPtrSet &LoadUsingPHIsPerLoad) { +static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, + SmallPtrSet &LoadUsingPHIs, + SmallPtrSet &LoadUsingPHIsPerLoad) { // We permit two users of the load: setcc comparing against the null // pointer, and a getelementptr of a specific form. - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *User = cast(*UI); + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const Instruction *User = cast(*UI); // Comparison against null is ok. - if (ICmpInst *ICI = dyn_cast(User)) { + if (const ICmpInst *ICI = dyn_cast(User)) { if (!isa(ICI->getOperand(1))) return false; continue; } // getelementptr is also ok, but only a simple form. - if (GetElementPtrInst *GEPI = dyn_cast(User)) { + if (const GetElementPtrInst *GEPI = dyn_cast(User)) { // Must index into the array and into the struct. if (GEPI->getNumOperands() < 3) return false; @@ -1064,7 +1071,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, continue; } - if (PHINode *PN = dyn_cast(User)) { + if (const PHINode *PN = dyn_cast(User)) { if (!LoadUsingPHIsPerLoad.insert(PN)) // This means some phi nodes are dependent on each other. // Avoid infinite looping! @@ -1091,13 +1098,13 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from /// GV are simple enough to perform HeapSRA, return true. -static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, +static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, Instruction *StoredVal) { - SmallPtrSet LoadUsingPHIs; - SmallPtrSet LoadUsingPHIsPerLoad; - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; - ++UI) - if (LoadInst *LI = dyn_cast(*UI)) { + SmallPtrSet LoadUsingPHIs; + SmallPtrSet LoadUsingPHIsPerLoad; + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) + if (const LoadInst *LI = dyn_cast(*UI)) { if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, LoadUsingPHIsPerLoad)) return false; @@ -1109,16 +1116,16 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, // that all inputs the to the PHI nodes are in the same equivalence sets. // Check to verify that all operands of the PHIs are either PHIS that can be // transformed, loads from GV, or MI itself. - for (SmallPtrSet::iterator I = LoadUsingPHIs.begin(), - E = LoadUsingPHIs.end(); I != E; ++I) { - PHINode *PN = *I; + for (SmallPtrSet::const_iterator I = LoadUsingPHIs.begin() + , E = LoadUsingPHIs.end(); I != E; ++I) { + const PHINode *PN = *I; for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { Value *InVal = PN->getIncomingValue(op); // PHI of the stored value itself is ok. if (InVal == StoredVal) continue; - if (PHINode *InPN = dyn_cast(InVal)) { + if (const PHINode *InPN = dyn_cast(InVal)) { // One of the PHIs in our set is (optimistically) ok. if (LoadUsingPHIs.count(InPN)) continue; @@ -1126,7 +1133,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, } // Load from GV is ok. - if (LoadInst *LI = dyn_cast(InVal)) + if (const LoadInst *LI = dyn_cast(InVal)) if (LI->getOperand(0) == GV) continue; @@ -1142,8 +1149,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, DenseMap > &InsertedScalarizedValues, - std::vector > &PHIsToRewrite, - LLVMContext &Context) { + std::vector > &PHIsToRewrite) { std::vector &FieldVals = InsertedScalarizedValues[V]; if (FieldNo >= FieldVals.size()) @@ -1161,7 +1167,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, // a new Load of the scalarized global. Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, InsertedScalarizedValues, - PHIsToRewrite, Context), + PHIsToRewrite), LI->getName()+".f"+Twine(FieldNo), LI); } else if (PHINode *PN = dyn_cast(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct @@ -1185,16 +1191,14 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, /// the load, rewrite the derived value to use the HeapSRoA'd load. static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap > &InsertedScalarizedValues, - std::vector > &PHIsToRewrite, - LLVMContext &Context) { + std::vector > &PHIsToRewrite) { // If this is a comparison against null, handle it. if (ICmpInst *SCI = dyn_cast(LoadUser)) { assert(isa(SCI->getOperand(1))); // If we have a setcc of the loaded pointer, we can use a setcc of any // field. Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, - InsertedScalarizedValues, PHIsToRewrite, - Context); + InsertedScalarizedValues, PHIsToRewrite); Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, Constant::getNullValue(NPtr->getType()), @@ -1212,8 +1216,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // Load the pointer for this field. unsigned FieldNo = cast(GEPI->getOperand(2))->getZExtValue(); Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, - InsertedScalarizedValues, PHIsToRewrite, - Context); + InsertedScalarizedValues, PHIsToRewrite); // Create the new GEP idx vector. SmallVector GEPIdx; @@ -1245,8 +1248,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // users. for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { Instruction *User = cast(*UI++); - RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, - Context); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } } @@ -1256,13 +1258,11 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, /// AllGlobalLoadUsesSimpleEnoughForHeapSRA. static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap > &InsertedScalarizedValues, - std::vector > &PHIsToRewrite, - LLVMContext &Context) { + std::vector > &PHIsToRewrite) { for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); UI != E; ) { Instruction *User = cast(*UI++); - RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, - Context); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } if (Load->use_empty()) { @@ -1273,13 +1273,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, - CallInst *CI, BitCastInst* BCI, - Value* NElems, - LLVMContext &Context, - TargetData *TD) { - DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI - << " BITCAST = " << *BCI << '\n'); +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, + Value* NElems, TargetData *TD) { + DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); const Type* MAT = getMallocAllocatedType(CI); const StructType *STy = cast(MAT); @@ -1287,8 +1283,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, // it into GV). If there are other uses, change them to be uses of // the global to simplify later code. This also deletes the store // into GV. - ReplaceUsesOfMallocWithGlobal(BCI, GV); - + ReplaceUsesOfMallocWithGlobal(CI, GV); + // Okay, at this point, there are no users of the malloc. Insert N // new mallocs at the same place as CI, and N globals. std::vector FieldGlobals; @@ -1306,11 +1302,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); - Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), - FieldTy, NElems, - BCI->getName() + ".f" + Twine(FieldNo)); + unsigned TypeSize = TD->getTypeAllocSize(FieldTy); + if (const StructType *ST = dyn_cast(FieldTy)) + TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); + const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, + ConstantInt::get(IntPtrTy, TypeSize), + NElems, 0, + CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); - new StoreInst(NMI, NGV, BCI); + new StoreInst(NMI, NGV, CI); } // The tricky aspect of this transformation is handling the case when malloc @@ -1325,24 +1326,25 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, // if (F1) { free(F1); F1 = 0; } // if (F2) { free(F2); F2 = 0; } // } - Value *RunningOr = 0; + // The malloc can also fail if its argument is too large. + Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); + Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), + ConstantZero, "isneg"); for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { - Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i], - Constant::getNullValue(FieldMallocs[i]->getType()), - "isnull"); - if (!RunningOr) - RunningOr = Cond; // First seteq - else - RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI); + Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull"); + RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); } // Split the basic block at the old malloc. - BasicBlock *OrigBB = BCI->getParent(); - BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont"); + BasicBlock *OrigBB = CI->getParent(); + BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. - BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", + BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), + "malloc_ret_null", OrigBB->getParent()); // Remove the uncond branch from OrigBB to ContBB, turning it into a cond @@ -1357,9 +1359,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, Constant::getNullValue(GVVal->getType()), "tmp"); - BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", + BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", OrigBB->getParent()); - BasicBlock *NextBlock = BasicBlock::Create(Context, "next", + BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", OrigBB->getParent()); Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); @@ -1374,9 +1376,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, } BranchInst::Create(ContBB, NullPtrBlock); - - // CI and BCI are no longer needed, remove them. - BCI->eraseFromParent(); + + // CI is no longer needed, remove it. CI->eraseFromParent(); /// InsertedScalarizedLoads - As we process loads, if we can't immediately @@ -1394,8 +1395,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, Instruction *User = cast(*UI++); if (LoadInst *LI = dyn_cast(User)) { - RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, - Context); + RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); continue; } @@ -1426,7 +1426,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *InVal = PN->getIncomingValue(i); InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, - PHIsToRewrite, Context); + PHIsToRewrite); FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); } } @@ -1463,14 +1463,12 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, /// cast of malloc. static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, - BitCastInst *BCI, + const Type *AllocTy, Module::global_iterator &GVI, - TargetData *TD, - LLVMContext &Context) { - // If we can't figure out the type being malloced, then we can't optimize. - const Type *AllocTy = getMallocAllocatedType(CI); - assert(AllocTy); - + TargetData *TD) { + if (!TD) + return false; + // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1489,66 +1487,66 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // malloc to be stored into the specified global, loaded setcc'd, and // GEP'd. These are all things we could transform to using the global // for. - { - SmallPtrSet PHIs; - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) - return false; - } + SmallPtrSet PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) + return false; // If we have a global that is only initialized with a fixed size malloc, // transform the program to use global memory instead of malloc'd memory. // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. - Value *NElems = getMallocArraySize(CI, Context, TD); // We cannot optimize the malloc if we cannot determine malloc array size. - if (NElems) { - if (ConstantInt *NElements = dyn_cast(NElems)) - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (TD && - NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, Context, TD); - return true; - } - - // If the allocation is an array of structures, consider transforming this - // into multiple malloc'd arrays, one for each field. This is basically - // SRoA for malloc'd memory. - - // If this is an allocation of a fixed size array of structs, analyze as a - // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) - if (const ArrayType *AT = dyn_cast(AllocTy)) - AllocTy = AT->getElementType(); + Value *NElems = getMallocArraySize(CI, TD, true); + if (!NElems) + return false; + + if (ConstantInt *NElements = dyn_cast(NElems)) + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); + return true; + } - if (const StructType *AllocSTy = dyn_cast(AllocTy)) { - // This the structure has an unreasonable number of fields, leave it - // alone. - if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { - - // If this is a fixed size array, transform the Malloc to be an alloc of - // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = - dyn_cast(getMallocAllocatedType(CI))) { - Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), - AT->getNumElements()); - Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), - AllocSTy, NumElements, - BCI->getName()); - Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); - BCI->replaceAllUsesWith(Cast); - BCI->eraseFromParent(); - CI->eraseFromParent(); - BCI = cast(NewMI); - CI = extractMallocCallFromBitCast(NewMI); - } - - GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, Context, TD); - return true; - } + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) + if (const ArrayType *AT = dyn_cast(AllocTy)) + AllocTy = AT->getElementType(); + + const StructType *AllocSTy = dyn_cast(AllocTy); + if (!AllocSTy) + return false; + + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (const ArrayType *AT = dyn_cast(getMallocAllocatedType(CI))) { + const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); + Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); + Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); + Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, + AllocSize, NumElements, + 0, CI->getName()); + Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); + CI->replaceAllUsesWith(Cast); + CI->eraseFromParent(); + CI = dyn_cast(Malloc) ? + extractMallocCallFromBitCast(Malloc) : cast(Malloc); } + + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); + return true; } return false; @@ -1558,7 +1556,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // that only one value (besides its initializer) is ever stored to the global. static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, Module::global_iterator &GVI, - TargetData *TD, LLVMContext &Context) { + TargetData *TD) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1566,7 +1564,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, // only has one (non-null) value stored into it, then we can optimize any // users of the loaded value (often calls and loads) that would trap if the // value was null. - if (isa(GV->getInitializer()->getType()) && + if (GV->getInitializer()->getType()->isPointerTy() && GV->getInitializer()->isNullValue()) { if (Constant *SOVC = dyn_cast(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) @@ -1574,18 +1572,13 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. - if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context)) + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - if (getMallocAllocatedType(CI)) { - BitCastInst* BCI = NULL; - for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); - UI != E; ) - BCI = dyn_cast(cast(*UI++)); - if (BCI && - TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context)) - return true; - } + const Type* MallocType = getMallocAllocatedType(CI); + if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + GVI, TD)) + return true; } } @@ -1596,8 +1589,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// two values ever stored into GV are its initializer and OtherVal. See if we /// can shrink the global into a boolean and select between the two values /// whenever it is used. This exposes the values to other scalar optimizations. -static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, - LLVMContext &Context) { +static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { const Type *GVElType = GV->getType()->getElementType(); // If GVElType is already i1, it is already shrunk. If the type of the GV is @@ -1605,28 +1597,32 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, // between them is very expensive and unlikely to lead to later // simplification. In these cases, we typically end up with "cond ? v1 : v2" // where v1 and v2 both require constant pool loads, a big loss. - if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() || - isa(GVElType) || isa(GVElType)) + if (GVElType == Type::getInt1Ty(GV->getContext()) || + GVElType->isFloatingPointTy() || + GVElType->isPointerTy() || GVElType->isVectorTy()) return false; - + // Walk the use list of the global seeing if all the uses are load or store. // If there is anything else, bail out. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) - if (!isa(I) && !isa(I)) + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ + User *U = *I; + if (!isa(U) && !isa(U)) return false; - - DEBUG(errs() << " *** SHRINKING TO BOOL: " << *GV); + } + + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); // Create the new global, initializing it to false. - GlobalVariable *NewGV = new GlobalVariable(Context, - Type::getInt1Ty(Context), false, - GlobalValue::InternalLinkage, ConstantInt::getFalse(Context), + GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), + false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", GV->isThreadLocal()); GV->getParent()->getGlobalList().insert(GV, NewGV); Constant *InitVal = GV->getInitializer(); - assert(InitVal->getType() != Type::getInt1Ty(Context) && + assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && "No reason to shrink to bool!"); // If initialized to zero and storing one into the global, we can use a cast @@ -1643,14 +1639,15 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, // Only do this if we weren't storing a loaded value. Value *StoreVal; if (StoringOther || SI->getOperand(0) == InitVal) - StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther); + StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), + StoringOther); else { // Otherwise, we are storing a previously loaded copy. To do this, // change the copy from copying the original value to just copying the // bool. Instruction *StoredVal = cast(SI->getOperand(0)); - // If we're already replaced the input, StoredVal will be a cast or + // If we've already replaced the input, StoredVal will be a cast or // select instruction. If not, it will be a load of the original // global. if (LoadInst *LI = dyn_cast(StoredVal)) { @@ -1689,12 +1686,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, /// it if possible. If we make a change, return true. bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, Module::global_iterator &GVI) { - SmallPtrSet PHIUsers; + SmallPtrSet PHIUsers; GlobalStatus GS; GV->removeDeadConstantUsers(); if (GV->use_empty()) { - DEBUG(errs() << "GLOBAL DEAD: " << *GV); + DEBUG(dbgs() << "GLOBAL DEAD: " << *GV); GV->eraseFromParent(); ++NumDeleted; return true; @@ -1702,24 +1699,26 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (!AnalyzeGlobal(GV, GS, PHIUsers)) { #if 0 - cerr << "Global: " << *GV; - cerr << " isLoaded = " << GS.isLoaded << "\n"; - cerr << " StoredType = "; + DEBUG(dbgs() << "Global: " << *GV); + DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); + DEBUG(dbgs() << " StoredType = "); switch (GS.StoredType) { - case GlobalStatus::NotStored: cerr << "NEVER STORED\n"; break; - case GlobalStatus::isInitializerStored: cerr << "INIT STORED\n"; break; - case GlobalStatus::isStoredOnce: cerr << "STORED ONCE\n"; break; - case GlobalStatus::isStored: cerr << "stored\n"; break; + case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; + case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); + break; + case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; + case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; } if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) - cerr << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"; + DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - cerr << " AccessingFunction = " << GS.AccessingFunction->getName() - << "\n"; - cerr << " HasMultipleAccessingFunctions = " - << GS.HasMultipleAccessingFunctions << "\n"; - cerr << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n"; - cerr << "\n"; + DEBUG(dbgs() << " AccessingFunction = " + << GS.AccessingFunction->getName() << "\n"); + DEBUG(dbgs() << " HasMultipleAccessingFunctions = " + << GS.HasMultipleAccessingFunctions << "\n"); + DEBUG(dbgs() << " HasNonInstructionUser = " + << GS.HasNonInstructionUser<<"\n"); + DEBUG(dbgs() << "\n"); #endif // If this is a first class global and has only one accessing function @@ -1737,13 +1736,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GS.AccessingFunction->getName() == "main" && GS.AccessingFunction->hasExternalLinkage() && GV->getType()->getAddressSpace() == 0) { - DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV); - Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); + Instruction& FirstI = const_cast(*GS.AccessingFunction + ->getEntryBlock().begin()); const Type* ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment - AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), FirstI); + AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); if (!isa(GV->getInitializer())) - new StoreInst(GV->getInitializer(), Alloca, FirstI); + new StoreInst(GV->getInitializer(), Alloca, &FirstI); GV->replaceAllUsesWith(Alloca); GV->eraseFromParent(); @@ -1754,12 +1754,11 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.isLoaded) { - DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV); + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); // Delete any stores we can find to the global. We may not be able to // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), - GV->getContext()); + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); // If the global is dead now, delete it. if (GV->use_empty()) { @@ -1770,15 +1769,15 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return Changed; } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DEBUG(errs() << "MARKING CONSTANT: " << *GV); + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); GV->setConstant(true); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext()); + CleanupConstantGlobalUsers(GV, GV->getInitializer()); // If the global is dead now, just nuke it. if (GV->use_empty()) { - DEBUG(errs() << " *** Marking constant allowed us to simplify " + DEBUG(dbgs() << " *** Marking constant allowed us to simplify " << "all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; @@ -1788,8 +1787,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return true; } else if (!GV->getInitializer()->getType()->isSingleValueType()) { if (TargetData *TD = getAnalysisIfAvailable()) - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD, - GV->getContext())) { + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { GVI = FirstNewGV; // Don't skip the newly produced globals! return true; } @@ -1804,11 +1802,10 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->setInitializer(SOVConstant); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), - GV->getContext()); + CleanupConstantGlobalUsers(GV, GV->getInitializer()); if (GV->use_empty()) { - DEBUG(errs() << " *** Substituting initializer allowed us to " + DEBUG(dbgs() << " *** Substituting initializer allowed us to " << "simplify all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; @@ -1822,14 +1819,13 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, - getAnalysisIfAvailable(), - GV->getContext())) + getAnalysisIfAvailable())) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a // boolean. if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) - if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) { + if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { ++NumShrunkToBool; return true; } @@ -1876,9 +1872,8 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { if (!F->hasName() && !F->isDeclaration()) F->setLinkage(GlobalValue::InternalLinkage); F->removeDeadConstantUsers(); - if (F->use_empty() && (F->hasLocalLinkage() || - F->hasLinkOnceLinkage())) { - M.getFunctionList().erase(F); + if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) { + F->eraseFromParent(); Changed = true; ++NumFnDeleted; } else if (F->hasLocalLinkage()) { @@ -1914,6 +1909,15 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { // Global variables without names cannot be referenced outside this module. if (!GV->hasName() && !GV->isDeclaration()) GV->setLinkage(GlobalValue::InternalLinkage); + // Simplify the initializer. + if (GV->hasInitializer()) + if (ConstantExpr *CE = dyn_cast(GV->getInitializer())) { + TargetData *TD = getAnalysisIfAvailable(); + Constant *New = ConstantFoldConstantExpression(CE, TD); + if (New && New != CE) + GV->setInitializer(New); + } + // Do more involved optimizations if the global is internal. if (!GV->isConstant() && GV->hasLocalLinkage() && GV->hasInitializer()) Changed |= ProcessInternalGlobal(GV, GVI); @@ -1932,11 +1936,11 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { if (!ATy) return 0; const StructType *STy = dyn_cast(ATy->getElementType()); if (!STy || STy->getNumElements() != 2 || - STy->getElementType(0) != Type::getInt32Ty(M.getContext())) return 0; + !STy->getElementType(0)->isIntegerTy(32)) return 0; const PointerType *PFTy = dyn_cast(STy->getElementType(1)); if (!PFTy) return 0; const FunctionType *FTy = dyn_cast(PFTy->getElementType()); - if (!FTy || FTy->getReturnType() != Type::getVoidTy(M.getContext()) || + if (!FTy || !FTy->getReturnType()->isVoidTy() || FTy->isVarArg() || FTy->getNumParams() != 0) return 0; @@ -1982,11 +1986,10 @@ static std::vector ParseGlobalCtors(GlobalVariable *GV) { /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the /// specified array, returning the new global to use. static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, - const std::vector &Ctors, - LLVMContext &Context) { + const std::vector &Ctors) { // If we made a change, reassemble the initializer list. std::vector CSVals; - CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535)); + CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); CSVals.push_back(0); // Create the new init list. @@ -1995,12 +1998,14 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, if (Ctors[i]) { CSVals[1] = Ctors[i]; } else { - const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false); + const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), + false); const PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); - CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647); + CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), + 2147483647); } - CAList.push_back(ConstantStruct::get(Context, CSVals, false)); + CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); } // Create the array initializer. @@ -2016,8 +2021,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } // Create the new global and insert it next to the existing list. - GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(), - GCL->isConstant(), + GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", GCL->isThreadLocal()); GCL->getParent()->getGlobalList().insert(GCL, NGV); @@ -2051,7 +2055,7 @@ static Constant *getVal(DenseMap &ComputedValues, /// enough for us to understand. In particular, if it is a cast of something, /// we punt. We basically just support direct accesses to globals and GEP's of /// globals. This should be kept up to date with CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) { +static bool isSimpleEnoughPointerToCommit(Constant *C) { // Conservatively, avoid aggregate types. This is because we don't // want to worry about them partially overlapping other stores. if (!cast(C->getType())->getElementType()->isSingleValueType()) @@ -2074,7 +2078,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) { return false; // The first index must be zero. - ConstantInt *CI = dyn_cast(*next(CE->op_begin())); + ConstantInt *CI = dyn_cast(*llvm::next(CE->op_begin())); if (!CI || !CI->isZero()) return false; // The remaining indices must be compile-time known integers within the @@ -2091,16 +2095,15 @@ static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) { /// initializer. This returns 'Init' modified to reflect 'Val' stored into it. /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, - ConstantExpr *Addr, unsigned OpNo, - LLVMContext &Context) { + ConstantExpr *Addr, unsigned OpNo) { // Base case of the recursion. if (OpNo == Addr->getNumOperands()) { assert(Val->getType() == Init->getType() && "Type mismatch!"); return Val; } + std::vector Elts; if (const StructType *STy = dyn_cast(Init->getType())) { - std::vector Elts; // Break up the constant into its elements. if (ConstantStruct *CS = dyn_cast(Init)) { @@ -2121,61 +2124,67 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, ConstantInt *CU = cast(Addr->getOperand(OpNo)); unsigned Idx = CU->getZExtValue(); assert(Idx < STy->getNumElements() && "Struct index out of range!"); - Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context); + Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); // Return the modified struct. - return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked()); + return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), + STy->isPacked()); } else { ConstantInt *CI = cast(Addr->getOperand(OpNo)); - const ArrayType *ATy = cast(Init->getType()); + const SequentialType *InitTy = cast(Init->getType()); + uint64_t NumElts; + if (const ArrayType *ATy = dyn_cast(InitTy)) + NumElts = ATy->getNumElements(); + else + NumElts = cast(InitTy)->getNumElements(); + + // Break up the array into elements. - std::vector Elts; if (ConstantArray *CA = dyn_cast(Init)) { for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) Elts.push_back(cast(*i)); + } else if (ConstantVector *CV = dyn_cast(Init)) { + for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) + Elts.push_back(cast(*i)); } else if (isa(Init)) { - Constant *Elt = Constant::getNullValue(ATy->getElementType()); - Elts.assign(ATy->getNumElements(), Elt); - } else if (isa(Init)) { - Constant *Elt = UndefValue::get(ATy->getElementType()); - Elts.assign(ATy->getNumElements(), Elt); + Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); } else { - llvm_unreachable("This code is out of sync with " + assert(isa(Init) && "This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); + Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); } - assert(CI->getZExtValue() < ATy->getNumElements()); + assert(CI->getZExtValue() < NumElts); Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context); - return ConstantArray::get(ATy, Elts); + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + + if (Init->getType()->isArrayTy()) + return ConstantArray::get(cast(InitTy), Elts); + else + return ConstantVector::get(&Elts[0], Elts.size()); } } /// CommitValueTo - We have decided that Addr (which satisfies the predicate /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. -static void CommitValueTo(Constant *Val, Constant *Addr, - LLVMContext &Context) { +static void CommitValueTo(Constant *Val, Constant *Addr) { if (GlobalVariable *GV = dyn_cast(Addr)) { assert(GV->hasInitializer()); GV->setInitializer(Val); return; } - + ConstantExpr *CE = cast(Addr); GlobalVariable *GV = cast(CE->getOperand(0)); - - Constant *Init = GV->getInitializer(); - Init = EvaluateStoreInto(Init, Val, CE, 2, Context); - GV->setInitializer(Init); + GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); } /// ComputeLoadResult - Return the value that would be computed by a load from /// P after the stores reflected by 'memory' have been performed. If we can't /// decide, return null. static Constant *ComputeLoadResult(Constant *P, - const DenseMap &Memory, - LLVMContext &Context) { + const DenseMap &Memory) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. DenseMap::const_iterator I = Memory.find(P); @@ -2213,8 +2222,6 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) return false; - LLVMContext &Context = F->getContext(); - CallStack.push_back(F); /// Values - As we compute SSA register values, we store their contents here. @@ -2241,7 +2248,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (StoreInst *SI = dyn_cast(CurInst)) { if (SI->isVolatile()) return false; // no volatile accesses. Constant *Ptr = getVal(Values, SI->getOperand(1)); - if (!isSimpleEnoughPointerToCommit(Ptr, Context)) + if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. return false; Constant *Val = getVal(Values, SI->getOperand(0)); @@ -2259,8 +2266,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, getVal(Values, CI->getOperand(0)), CI->getType()); } else if (SelectInst *SI = dyn_cast(CurInst)) { - InstResult = - ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), + InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), getVal(Values, SI->getOperand(1)), getVal(Values, SI->getOperand(2))); } else if (GetElementPtrInst *GEP = dyn_cast(CurInst)) { @@ -2275,12 +2281,12 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } else if (LoadInst *LI = dyn_cast(CurInst)) { if (LI->isVolatile()) return false; // no volatile accesses. InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), - MutatedMemory, Context); + MutatedMemory); if (InstResult == 0) return false; // Could not evaluate load. } else if (AllocaInst *AI = dyn_cast(CurInst)) { if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. const Type *Ty = AI->getType()->getElementType(); - AllocaTmps.push_back(new GlobalVariable(Context, Ty, false, + AllocaTmps.push_back(new GlobalVariable(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); @@ -2294,14 +2300,16 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } // Cannot handle inline asm. - if (isa(CI->getOperand(0))) return false; + if (isa(CI->getCalledValue())) return false; // Resolve function pointers. - Function *Callee = dyn_cast(getVal(Values, CI->getOperand(0))); + Function *Callee = dyn_cast(getVal(Values, + CI->getCalledValue())); if (!Callee) return false; // Cannot resolve. SmallVector Formals; - for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); + CallSite CS(CI); + for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) Formals.push_back(getVal(Values, *i)); @@ -2413,12 +2421,12 @@ static bool EvaluateStaticConstructor(Function *F) { MutatedMemory, AllocaTmps); if (EvalSuccess) { // We succeeded at evaluation: commit the result. - DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" + DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" << F->getName() << "' to " << MutatedMemory.size() << " stores.\n"); for (DenseMap::iterator I = MutatedMemory.begin(), E = MutatedMemory.end(); I != E; ++I) - CommitValueTo(I->second, I->first, F->getContext()); + CommitValueTo(I->second, I->first); } // At this point, we are done interpreting. If we created any 'alloca' @@ -2475,7 +2483,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { if (!MadeChange) return false; - GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext()); + GCL = InstallGlobalCtors(GCL, Ctors); return true; } @@ -2504,29 +2512,28 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { Changed = true; } - // If the aliasee has internal linkage, give it the name and linkage - // of the alias, and delete the alias. This turns: - // define internal ... @f(...) - // @a = alias ... @f - // into: - // define ... @a(...) - if (!Target->hasLocalLinkage()) - continue; - - // The transform is only useful if the alias does not have internal linkage. - if (J->hasLocalLinkage()) - continue; + // If the alias is externally visible, we may still be able to simplify it. + if (!J->hasLocalLinkage()) { + // If the aliasee has internal linkage, give it the name and linkage + // of the alias, and delete the alias. This turns: + // define internal ... @f(...) + // @a = alias ... @f + // into: + // define ... @a(...) + if (!Target->hasLocalLinkage()) + continue; - // Do not perform the transform if multiple aliases potentially target the - // aliasee. This check also ensures that it is safe to replace the section - // and other attributes of the aliasee with those of the alias. - if (!hasOneUse) - continue; + // Do not perform the transform if multiple aliases potentially target the + // aliasee. This check also ensures that it is safe to replace the section + // and other attributes of the aliasee with those of the alias. + if (!hasOneUse) + continue; - // Give the aliasee the name, linkage and other attributes of the alias. - Target->takeName(J); - Target->setLinkage(J->getLinkage()); - Target->GlobalValue::copyAttributesFrom(J); + // Give the aliasee the name, linkage and other attributes of the alias. + Target->takeName(J); + Target->setLinkage(J->getLinkage()); + Target->GlobalValue::copyAttributesFrom(J); + } // Delete the alias. M.getAliasList().erase(J);