X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FInstCombine%2FInstCombineLoadStoreAlloca.cpp;h=dd2889de405e0561fdde4c04ca70bf0788c9c193;hp=5b6cf4a4a83f3daa85443a80f62d53732074aa2c;hb=7a0ec464f16e761602ac9c4e1f610029c0346745;hpb=d7cc8b839cba201b552698646b624da7a79ede8e diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 5b6cf4a4a83..dd2889de405 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -11,15 +11,20 @@ // //===----------------------------------------------------------------------===// -#include "InstCombine.h" -#include "llvm/IntrinsicInst.h" +#include "InstCombineInternal.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Target/TargetData.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumDeadStore, "Number of dead stores eliminated"); STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); @@ -29,10 +34,13 @@ STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); static bool pointsToConstantGlobal(Value *V) { if (GlobalVariable *GV = dyn_cast(V)) return GV->isConstant(); - if (ConstantExpr *CE = dyn_cast(V)) + + if (ConstantExpr *CE = dyn_cast(V)) { if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::AddrSpaceCast || CE->getOpcode() == Instruction::GetElementPtr) return pointsToConstantGlobal(CE->getOperand(0)); + } return false; } @@ -45,95 +53,104 @@ static bool pointsToConstantGlobal(Value *V) { /// can optimize this. static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - SmallVectorImpl &ToDelete, - bool IsOffset = false) { + SmallVectorImpl &ToDelete) { // We track lifetime intrinsics as we encounter them. If we decide to go // ahead and replace the value with the global, this lets the caller quickly // eliminate the markers. - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { - User *U = cast(*UI); + SmallVector, 35> ValuesToInspect; + ValuesToInspect.push_back(std::make_pair(V, false)); + while (!ValuesToInspect.empty()) { + auto ValuePair = ValuesToInspect.pop_back_val(); + const bool IsOffset = ValuePair.second; + for (auto &U : ValuePair.first->uses()) { + Instruction *I = cast(U.getUser()); + + if (LoadInst *LI = dyn_cast(I)) { + // Ignore non-volatile loads, they are always ok. + if (!LI->isSimple()) return false; + continue; + } - if (LoadInst *LI = dyn_cast(U)) { - // Ignore non-volatile loads, they are always ok. - if (!LI->isSimple()) return false; - continue; - } + if (isa(I) || isa(I)) { + // If uses of the bitcast are ok, we are ok. + ValuesToInspect.push_back(std::make_pair(I, IsOffset)); + continue; + } + if (GetElementPtrInst *GEP = dyn_cast(I)) { + // If the GEP has all zero indices, it doesn't offset the pointer. If it + // doesn't, it does. + ValuesToInspect.push_back( + std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices())); + continue; + } - if (BitCastInst *BCI = dyn_cast(U)) { - // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) - return false; - continue; - } - if (GetElementPtrInst *GEP = dyn_cast(U)) { - // If the GEP has all zero indices, it doesn't offset the pointer. If it - // doesn't, it does. - if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, - IsOffset || !GEP->hasAllZeroIndices())) - return false; - continue; - } + if (auto CS = CallSite(I)) { + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(&U)) + continue; - if (CallSite CS = U) { - // If this is the function being called then we treat it like a load and - // ignore it. - if (CS.isCallee(UI)) - continue; + unsigned DataOpNo = CS.getDataOperandNo(&U); + bool IsArgOperand = CS.isArgOperand(&U); - // If this is a readonly/readnone call site, then we know it is just a - // load (but one that potentially returns the value itself), so we can - // ignore it if we know that the value isn't captured. - unsigned ArgNo = CS.getArgumentNo(UI); - if (CS.onlyReadsMemory() && - (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) - continue; + // Inalloca arguments are clobbered by the call. + if (IsArgOperand && CS.isInAllocaArgument(DataOpNo)) + return false; - // If this is being passed as a byval argument, the caller is making a - // copy, so it is only a read of the alloca. - if (CS.isByValArgument(ArgNo)) - continue; - } + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo))) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + if (IsArgOperand && CS.isByValArgument(DataOpNo)) + continue; + } - // Lifetime intrinsics can be handled by the caller. - if (IntrinsicInst *II = dyn_cast(U)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - assert(II->use_empty() && "Lifetime markers have no result to use!"); - ToDelete.push_back(II); - continue; + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast(I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + ToDelete.push_back(II); + continue; + } } - } - // If this is isn't our memcpy/memmove, reject it as something we can't - // handle. - MemTransferInst *MI = dyn_cast(U); - if (MI == 0) - return false; + // If this is isn't our memcpy/memmove, reject it as something we can't + // handle. + MemTransferInst *MI = dyn_cast(I); + if (!MI) + return false; - // If the transfer is using the alloca as a source of the transfer, then - // ignore it since it is a load (unless the transfer is volatile). - if (UI.getOperandNo() == 1) { - if (MI->isVolatile()) return false; - continue; - } + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (U.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } - // If we already have seen a copy, reject the second one. - if (TheCopy) return false; + // If we already have seen a copy, reject the second one. + if (TheCopy) return false; - // If the pointer has been offset from the start of the alloca, we can't - // safely handle this. - if (IsOffset) return false; + // If the pointer has been offset from the start of the alloca, we can't + // safely handle this. + if (IsOffset) return false; - // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != 0) return false; + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (U.getOperandNo() != 0) return false; - // If the source of the memcpy/move is not a constant global, reject it. - if (!pointsToConstantGlobal(MI->getSource())) - return false; + // If the source of the memcpy/move is not a constant global, reject it. + if (!pointsToConstantGlobal(MI->getSource())) + return false; - // Otherwise, the transform is safe. Remember the copy instruction. - TheCopy = MI; + // Otherwise, the transform is safe. Remember the copy instruction. + TheCopy = MI; + } } return true; } @@ -144,86 +161,81 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, static MemTransferInst * isOnlyCopiedFromConstantGlobal(AllocaInst *AI, SmallVectorImpl &ToDelete) { - MemTransferInst *TheCopy = 0; + MemTransferInst *TheCopy = nullptr; if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) return TheCopy; - return 0; + return nullptr; } -/// getPointeeAlignment - Compute the minimum alignment of the value pointed -/// to by the given pointer. -static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { - if (ConstantExpr *CE = dyn_cast(V)) - if (CE->getOpcode() == Instruction::BitCast || - (CE->getOpcode() == Instruction::GetElementPtr && - cast(CE)->hasAllZeroIndices())) - return getPointeeAlignment(CE->getOperand(0), TD); +static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { + // Check for array size of 1 (scalar allocation). + if (!AI.isArrayAllocation()) { + // i32 1 is the canonical array size for scalar allocations. + if (AI.getArraySize()->getType()->isIntegerTy(32)) + return nullptr; + + // Canonicalize it. + Value *V = IC.Builder->getInt32(1); + AI.setOperand(0, V); + return &AI; + } - if (GlobalVariable *GV = dyn_cast(V)) - if (!GV->isDeclaration()) - return TD.getPreferredAlignment(GV); + // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 + if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); + AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName()); + New->setAlignment(AI.getAlignment()); - if (PointerType *PT = dyn_cast(V->getType())) - return TD.getABITypeAlignment(PT->getElementType()); + // Scan to the end of the allocation instructions, to skip over a block of + // allocas if possible...also skip interleaved debug info + // + BasicBlock::iterator It(New); + while (isa(*It) || isa(*It)) + ++It; - return 0; -} + // Now that I is pointing to the first non-allocation-inst in the block, + // insert our getelementptr instruction... + // + Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); + Value *NullIdx = Constant::getNullValue(IdxTy); + Value *Idx[2] = {NullIdx, NullIdx}; + Instruction *GEP = + GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); + IC.InsertNewInstBefore(GEP, *It); + + // Now make everything use the getelementptr instead of the original + // allocation. + return IC.ReplaceInstUsesWith(AI, GEP); + } + + if (isa(AI.getArraySize())) + return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); -Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. - if (TD) { - Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); - if (AI.getArraySize()->getType() != IntPtrTy) { - Value *V = Builder->CreateIntCast(AI.getArraySize(), - IntPtrTy, false); - AI.setOperand(0, V); - return &AI; - } + Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); + if (AI.getArraySize()->getType() != IntPtrTy) { + Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); + AI.setOperand(0, V); + return &AI; } - // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 - if (AI.isArrayAllocation()) { // Check C != 1 - if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { - Type *NewTy = - ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); - New->setAlignment(AI.getAlignment()); - - // Scan to the end of the allocation instructions, to skip over a block of - // allocas if possible...also skip interleaved debug info - // - BasicBlock::iterator It = New; - while (isa(*It) || isa(*It)) ++It; - - // Now that I is pointing to the first non-allocation-inst in the block, - // insert our getelementptr instruction... - // - Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); - Value *Idx[2]; - Idx[0] = NullIdx; - Idx[1] = NullIdx; - Instruction *GEP = - GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub"); - InsertNewInstBefore(GEP, *It); - - // Now make everything use the getelementptr instead of the original - // allocation. - return ReplaceInstUsesWith(AI, GEP); - } else if (isa(AI.getArraySize())) { - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - } - } + return nullptr; +} + +Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { + if (auto *I = simplifyAllocaArraySize(*this, AI)) + return I; - if (TD && AI.getAllocatedType()->isSized()) { + if (AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) - AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); + AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType())); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should // allocate and return a unique pointer, even for a zero byte allocation. - if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) { + if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) { // For a zero sized alloca there is no point in doing an array allocation. // This is helpful if the array size is a complicated expression not used // elsewhere. @@ -241,7 +253,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // dominance as the array size was forced to a constant earlier already. AllocaInst *EntryAI = dyn_cast(FirstInst); if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || - TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { + DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { AI.moveBefore(FirstInst); return &AI; } @@ -250,7 +262,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( - TD->getPrefTypeAlignment(EntryAI->getAllocatedType())); + DL.getPrefTypeAlignment(EntryAI->getAllocatedType())); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. @@ -264,26 +276,30 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } - // Check to see if this allocation is only modified by a memcpy/memmove from - // a constant global whose alignment is equal to or exceeds that of the - // allocation. If this is the case, we can change all users to use - // the constant global instead. This is commonly produced by the CFE by - // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' - // is only subsequently read. - SmallVector ToDelete; - if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { - if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { - DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) - EraseInstFromFunction(*ToDelete[i]); - Constant *TheSrc = cast(Copy->getSource()); - Instruction *NewI - = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, - AI.getType())); - EraseInstFromFunction(*Copy); - ++NumGlobalCopies; - return NewI; + if (AI.getAlignment()) { + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + SmallVector ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { + unsigned SourceAlign = getOrEnforceKnownAlignment( + Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT); + if (AI.getAlignment() <= SourceAlign) { + DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + EraseInstFromFunction(*ToDelete[i]); + Constant *TheSrc = cast(Copy->getSource()); + Constant *Cast + = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); + Instruction *NewI = ReplaceInstUsesWith(AI, Cast); + EraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } } } @@ -292,95 +308,509 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { return visitAllocSite(AI); } +/// \brief Helper to combine a load to a new type. +/// +/// This just does the work of combining a load to a new type. It handles +/// metadata, etc., and returns the new instruction. The \c NewTy should be the +/// loaded *value* type. This will convert it to a pointer, cast the operand to +/// that pointer type, load it, etc. +/// +/// Note that this will create all of the instructions with whatever insert +/// point the \c InstCombiner currently is using. +static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy, + const Twine &Suffix = "") { + Value *Ptr = LI.getPointerOperand(); + unsigned AS = LI.getPointerAddressSpace(); + SmallVector, 8> MD; + LI.getAllMetadata(MD); + + LoadInst *NewLoad = IC.Builder->CreateAlignedLoad( + IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)), + LI.getAlignment(), LI.getName() + Suffix); + MDBuilder MDB(NewLoad->getContext()); + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + // Note, essentially every kind of metadata should be preserved here! This + // routine is supposed to clone a load instruction changing *only its type*. + // The only metadata it makes sense to drop is metadata which is invalidated + // when the pointer type changes. This should essentially never be the case + // in LLVM, but we explicitly switch over only known metadata to be + // conservatively correct. If you are adding metadata to LLVM which pertains + // to loads, you almost certainly want to add it here. + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_prof: + case LLVMContext::MD_fpmath: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_invariant_load: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_nontemporal: + case LLVMContext::MD_mem_parallel_loop_access: + // All of these directly apply. + NewLoad->setMetadata(ID, N); + break; + + case LLVMContext::MD_nonnull: + // This only directly applies if the new type is also a pointer. + if (NewTy->isPointerTy()) { + NewLoad->setMetadata(ID, N); + break; + } + // If it's integral now, translate it to !range metadata. + if (NewTy->isIntegerTy()) { + auto *ITy = cast(NewTy); + auto *NullInt = ConstantExpr::getPtrToInt( + ConstantPointerNull::get(cast(Ptr->getType())), ITy); + auto *NonNullInt = + ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); + NewLoad->setMetadata(LLVMContext::MD_range, + MDB.createRange(NonNullInt, NullInt)); + } + break; + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + // These only directly apply if the new type is also a pointer. + if (NewTy->isPointerTy()) + NewLoad->setMetadata(ID, N); + break; + case LLVMContext::MD_range: + // FIXME: It would be nice to propagate this in some way, but the type + // conversions make it hard. If the new type is a pointer, we could + // translate it to !nonnull metadata. + break; + } + } + return NewLoad; +} + +/// \brief Combine a store to a new type. +/// +/// Returns the newly created store instruction. +static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) { + Value *Ptr = SI.getPointerOperand(); + unsigned AS = SI.getPointerAddressSpace(); + SmallVector, 8> MD; + SI.getAllMetadata(MD); + + StoreInst *NewStore = IC.Builder->CreateAlignedStore( + V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), + SI.getAlignment()); + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + // Note, essentially every kind of metadata should be preserved here! This + // routine is supposed to clone a store instruction changing *only its + // type*. The only metadata it makes sense to drop is metadata which is + // invalidated when the pointer type changes. This should essentially + // never be the case in LLVM, but we explicitly switch over only known + // metadata to be conservatively correct. If you are adding metadata to + // LLVM which pertains to stores, you almost certainly want to add it + // here. + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_prof: + case LLVMContext::MD_fpmath: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_nontemporal: + case LLVMContext::MD_mem_parallel_loop_access: + // All of these directly apply. + NewStore->setMetadata(ID, N); + break; + + case LLVMContext::MD_invariant_load: + case LLVMContext::MD_nonnull: + case LLVMContext::MD_range: + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + // These don't apply for stores. + break; + } + } + + return NewStore; +} + +/// \brief Combine loads to match the type of value their uses after looking +/// through intervening bitcasts. +/// +/// The core idea here is that if the result of a load is used in an operation, +/// we should load the type most conducive to that operation. For example, when +/// loading an integer and converting that immediately to a pointer, we should +/// instead directly load a pointer. +/// +/// However, this routine must never change the width of a load or the number of +/// loads as that would introduce a semantic change. This combine is expected to +/// be a semantic no-op which just allows loads to more closely model the types +/// of their consuming operations. +/// +/// Currently, we also refuse to change the precise type used for an atomic load +/// or a volatile load. This is debatable, and might be reasonable to change +/// later. However, it is risky in case some backend or other part of LLVM is +/// relying on the exact type loaded to select appropriate atomic operations. +static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { + // FIXME: We could probably with some care handle both volatile and atomic + // loads here but it isn't clear that this is important. + if (!LI.isSimple()) + return nullptr; + + if (LI.use_empty()) + return nullptr; + + Type *Ty = LI.getType(); + const DataLayout &DL = IC.getDataLayout(); + + // Try to canonicalize loads which are only ever stored to operate over + // integers instead of any other type. We only do this when the loaded type + // is sized and has a size exactly the same as its store size and the store + // size is a legal integer type. + if (!Ty->isIntegerTy() && Ty->isSized() && + DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) && + DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) { + if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) { + auto *SI = dyn_cast(U); + return SI && SI->getPointerOperand() != &LI; + })) { + LoadInst *NewLoad = combineLoadToNewType( + IC, LI, + Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty))); + // Replace all the stores with stores of the newly loaded value. + for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { + auto *SI = cast(*UI++); + IC.Builder->SetInsertPoint(SI); + combineStoreToNewValue(IC, *SI, NewLoad); + IC.EraseInstFromFunction(*SI); + } + assert(LI.use_empty() && "Failed to remove all users of the load!"); + // Return the old load so the combiner can delete it safely. + return &LI; + } + } -/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. -static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, - const TargetData *TD) { - User *CI = cast(LI.getOperand(0)); - Value *CastOp = CI->getOperand(0); - - PointerType *DestTy = cast(CI->getType()); - Type *DestPTy = DestTy->getElementType(); - if (PointerType *SrcTy = dyn_cast(CastOp->getType())) { - - // If the address spaces don't match, don't eliminate the cast. - if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) - return 0; - - Type *SrcPTy = SrcTy->getElementType(); - - if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || - DestPTy->isVectorTy()) { - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (ArrayType *ASrcTy = dyn_cast(SrcPTy)) - if (Constant *CSrc = dyn_cast(CastOp)) - if (ASrcTy->getNumElements() != 0) { - Value *Idxs[2]; - Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); - Idxs[1] = Idxs[0]; - CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); - SrcTy = cast(CastOp->getType()); - SrcPTy = SrcTy->getElementType(); - } - - if (IC.getTargetData() && - (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || - SrcPTy->isVectorTy()) && - // Do not allow turning this into a load of an integer, which is then - // casted to a pointer, this pessimizes pointer analysis a lot. - (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) && - IC.getTargetData()->getTypeSizeInBits(SrcPTy) == - IC.getTargetData()->getTypeSizeInBits(DestPTy)) { - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before the load, cast - // the result of the loaded value. - LoadInst *NewLoad = - IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); - NewLoad->setAlignment(LI.getAlignment()); - NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); - // Now cast the result of the load. - return new BitCastInst(NewLoad, LI.getType()); + // Fold away bit casts of the loaded value by loading the desired type. + // We can do this for BitCastInsts as well as casts from and to pointer types, + // as long as those are noops (i.e., the source or dest type have the same + // bitwidth as the target's pointers). + if (LI.hasOneUse()) + if (auto* CI = dyn_cast(LI.user_back())) { + if (CI->isNoopCast(DL)) { + LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy()); + CI->replaceAllUsesWith(NewLoad); + IC.EraseInstFromFunction(*CI); + return &LI; } } + + // FIXME: We should also canonicalize loads of vectors when their elements are + // cast to other types. + return nullptr; +} + +static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { + // FIXME: We could probably with some care handle both volatile and atomic + // stores here but it isn't clear that this is important. + if (!LI.isSimple()) + return nullptr; + + Type *T = LI.getType(); + if (!T->isAggregateType()) + return nullptr; + + assert(LI.getAlignment() && "Alignment must be set at this point"); + + if (auto *ST = dyn_cast(T)) { + // If the struct only have one element, we unpack. + unsigned Count = ST->getNumElements(); + if (Count == 1) { + LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U), + ".unpack"); + return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue( + UndefValue::get(T), NewLoad, 0, LI.getName())); + } + + // We don't want to break loads with padding here as we'd loose + // the knowledge that padding exists for the rest of the pipeline. + const DataLayout &DL = IC.getDataLayout(); + auto *SL = DL.getStructLayout(ST); + if (SL->hasPadding()) + return nullptr; + + auto Name = LI.getName(); + SmallString<16> LoadName = Name; + LoadName += ".unpack"; + SmallString<16> EltName = Name; + EltName += ".elt"; + auto *Addr = LI.getPointerOperand(); + Value *V = UndefValue::get(T); + auto *IdxType = Type::getInt32Ty(ST->getContext()); + auto *Zero = ConstantInt::get(IdxType, 0); + for (unsigned i = 0; i < Count; i++) { + Value *Indices[2] = { + Zero, + ConstantInt::get(IdxType, i), + }; + auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), EltName); + auto *L = IC.Builder->CreateAlignedLoad(Ptr, LI.getAlignment(), + LoadName); + V = IC.Builder->CreateInsertValue(V, L, i); + } + + V->setName(Name); + return IC.ReplaceInstUsesWith(LI, V); + } + + if (auto *AT = dyn_cast(T)) { + // If the array only have one element, we unpack. + if (AT->getNumElements() == 1) { + LoadInst *NewLoad = combineLoadToNewType(IC, LI, AT->getElementType(), + ".unpack"); + return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue( + UndefValue::get(T), NewLoad, 0, LI.getName())); + } + } + + return nullptr; +} + +// If we can determine that all possible objects pointed to by the provided +// pointer value are, not only dereferenceable, but also definitively less than +// or equal to the provided maximum size, then return true. Otherwise, return +// false (constant global values and allocas fall into this category). +// +// FIXME: This should probably live in ValueTracking (or similar). +static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, + const DataLayout &DL) { + SmallPtrSet Visited; + SmallVector Worklist(1, V); + + do { + Value *P = Worklist.pop_back_val(); + P = P->stripPointerCasts(); + + if (!Visited.insert(P).second) + continue; + + if (SelectInst *SI = dyn_cast(P)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + if (PHINode *PN = dyn_cast(P)) { + for (Value *IncValue : PN->incoming_values()) + Worklist.push_back(IncValue); + continue; + } + + if (GlobalAlias *GA = dyn_cast(P)) { + if (GA->mayBeOverridden()) + return false; + Worklist.push_back(GA->getAliasee()); + continue; + } + + // If we know how big this object is, and it is less than MaxSize, continue + // searching. Otherwise, return false. + if (AllocaInst *AI = dyn_cast(P)) { + if (!AI->getAllocatedType()->isSized()) + return false; + + ConstantInt *CS = dyn_cast(AI->getArraySize()); + if (!CS) + return false; + + uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType()); + // Make sure that, even if the multiplication below would wrap as an + // uint64_t, we still do the right thing. + if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize)) + return false; + continue; + } + + if (GlobalVariable *GV = dyn_cast(P)) { + if (!GV->hasDefinitiveInitializer() || !GV->isConstant()) + return false; + + uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType()); + if (InitSize > MaxSize) + return false; + continue; + } + + return false; + } while (!Worklist.empty()); + + return true; +} + +// If we're indexing into an object of a known size, and the outer index is +// not a constant, but having any value but zero would lead to undefined +// behavior, replace it with zero. +// +// For example, if we have: +// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4 +// ... +// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x +// ... = load i32* %arrayidx, align 4 +// Then we know that we can replace %x in the GEP with i64 0. +// +// FIXME: We could fold any GEP index to zero that would cause UB if it were +// not zero. Currently, we only handle the first such index. Also, we could +// also search through non-zero constant indices if we kept track of the +// offsets those indices implied. +static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, + Instruction *MemI, unsigned &Idx) { + if (GEPI->getNumOperands() < 2) + return false; + + // Find the first non-zero index of a GEP. If all indices are zero, return + // one past the last index. + auto FirstNZIdx = [](const GetElementPtrInst *GEPI) { + unsigned I = 1; + for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) { + Value *V = GEPI->getOperand(I); + if (const ConstantInt *CI = dyn_cast(V)) + if (CI->isZero()) + continue; + + break; + } + + return I; + }; + + // Skip through initial 'zero' indices, and find the corresponding pointer + // type. See if the next index is not a constant. + Idx = FirstNZIdx(GEPI); + if (Idx == GEPI->getNumOperands()) + return false; + if (isa(GEPI->getOperand(Idx))) + return false; + + SmallVector Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx); + Type *AllocTy = GetElementPtrInst::getIndexedType( + cast(GEPI->getOperand(0)->getType()->getScalarType()) + ->getElementType(), + Ops); + if (!AllocTy || !AllocTy->isSized()) + return false; + const DataLayout &DL = IC.getDataLayout(); + uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy); + + // If there are more indices after the one we might replace with a zero, make + // sure they're all non-negative. If any of them are negative, the overall + // address being computed might be before the base address determined by the + // first non-zero index. + auto IsAllNonNegative = [&]() { + for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) { + bool KnownNonNegative, KnownNegative; + IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative, + KnownNegative, 0, MemI); + if (KnownNonNegative) + continue; + return false; + } + + return true; + }; + + // FIXME: If the GEP is not inbounds, and there are extra indices after the + // one we'll replace, those could cause the address computation to wrap + // (rendering the IsAllNonNegative() check below insufficient). We can do + // better, ignoring zero indices (and other indices we can prove small + // enough not to wrap). + if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds()) + return false; + + // Note that isObjectSizeLessThanOrEq will return true only if the pointer is + // also known to be dereferenceable. + return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) && + IsAllNonNegative(); +} + +// If we're indexing into an object with a variable index for the memory +// access, but the object has only one element, we can assume that the index +// will always be zero. If we replace the GEP, return it. +template +static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, + T &MemI) { + if (GetElementPtrInst *GEPI = dyn_cast(Ptr)) { + unsigned Idx; + if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) { + Instruction *NewGEPI = GEPI->clone(); + NewGEPI->setOperand(Idx, + ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0)); + NewGEPI->insertBefore(GEPI); + MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI); + return NewGEPI; + } } - return 0; + + return nullptr; } Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); + // Try to canonicalize the loaded type. + if (Instruction *Res = combineLoadToOperationType(*this, LI)) + return Res; + // Attempt to improve the alignment. - if (TD) { - unsigned KnownAlign = - getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD); - unsigned LoadAlign = LI.getAlignment(); - unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : - TD->getABITypeAlignment(LI.getType()); - - if (KnownAlign > EffectiveLoadAlign) - LI.setAlignment(KnownAlign); - else if (LoadAlign == 0) - LI.setAlignment(EffectiveLoadAlign); + unsigned KnownAlign = getOrEnforceKnownAlignment( + Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT); + unsigned LoadAlign = LI.getAlignment(); + unsigned EffectiveLoadAlign = + LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType()); + + if (KnownAlign > EffectiveLoadAlign) + LI.setAlignment(KnownAlign); + else if (LoadAlign == 0) + LI.setAlignment(EffectiveLoadAlign); + + // Replace GEP indices if possible. + if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) { + Worklist.Add(NewGEPI); + return &LI; } - // load (cast X) --> cast (load X) iff safe. - if (isa(Op)) - if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) - return Res; - // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. - if (!LI.isSimple()) return 0; - + if (!LI.isSimple()) return nullptr; + + if (Instruction *Res = unpackLoadToAggregate(*this, LI)) + return Res; + // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. - BasicBlock::iterator BBI = &LI; - if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) - return ReplaceInstUsesWith(LI, AvailableVal); + BasicBlock::iterator BBI(LI); + AAMDNodes AATags; + if (Value *AvailableVal = + FindAvailableLoadedValue(Op, LI.getParent(), BBI, + DefMaxInstsToScan, AA, &AATags)) { + if (LoadInst *NLI = dyn_cast(AvailableVal)) { + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null}; + combineMetadata(NLI, &LI, KnownIDs); + }; + + return ReplaceInstUsesWith( + LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), + LI.getName() + ".cast")); + } // load(gep null, ...) -> unreachable if (GetElementPtrInst *GEPI = dyn_cast(Op)) { @@ -395,7 +825,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } - } + } // load null/undef -> unreachable // TODO: Consider a target hook for valid address spaces for this xform. @@ -409,12 +839,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } - // Instcombine load (constantexpr_cast global) -> cast (load global) - if (ConstantExpr *CE = dyn_cast(Op)) - if (CE->isCast()) - if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) - return Res; - if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and @@ -429,8 +853,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (SelectInst *SI = dyn_cast(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). unsigned Align = LI.getAlignment(); - if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && - isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { + if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) && + isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) { LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), SI->getOperand(1)->getName()+".val"); LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), @@ -441,108 +865,121 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } // load (select (cond, null, P)) -> load P - if (Constant *C = dyn_cast(SI->getOperand(1))) - if (C->isNullValue()) { - LI.setOperand(0, SI->getOperand(2)); - return &LI; - } + if (isa(SI->getOperand(1)) && + LI.getPointerAddressSpace() == 0) { + LI.setOperand(0, SI->getOperand(2)); + return &LI; + } // load (select (cond, P, null)) -> load P - if (Constant *C = dyn_cast(SI->getOperand(2))) - if (C->isNullValue()) { - LI.setOperand(0, SI->getOperand(1)); - return &LI; - } + if (isa(SI->getOperand(2)) && + LI.getPointerAddressSpace() == 0) { + LI.setOperand(0, SI->getOperand(1)); + return &LI; + } } } - return 0; + return nullptr; } -/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P -/// when possible. This makes it generally easy to do alias analysis and/or -/// SROA/mem2reg of the memory object. -static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { - User *CI = cast(SI.getOperand(1)); - Value *CastOp = CI->getOperand(0); - - Type *DestPTy = cast(CI->getType())->getElementType(); - PointerType *SrcTy = dyn_cast(CastOp->getType()); - if (SrcTy == 0) return 0; - - Type *SrcPTy = SrcTy->getElementType(); - - if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) - return 0; - - /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" - /// to its first element. This allows us to handle things like: - /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) - /// on 32-bit hosts. - SmallVector NewGEPIndices; - - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { - // Index through pointer. - Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); - NewGEPIndices.push_back(Zero); - - while (1) { - if (StructType *STy = dyn_cast(SrcPTy)) { - if (!STy->getNumElements()) /* Struct can be empty {} */ - break; - NewGEPIndices.push_back(Zero); - SrcPTy = STy->getElementType(0); - } else if (ArrayType *ATy = dyn_cast(SrcPTy)) { - NewGEPIndices.push_back(Zero); - SrcPTy = ATy->getElementType(); - } else { - break; - } +/// \brief Combine stores to match the type of value being stored. +/// +/// The core idea here is that the memory does not have any intrinsic type and +/// where we can we should match the type of a store to the type of value being +/// stored. +/// +/// However, this routine must never change the width of a store or the number of +/// stores as that would introduce a semantic change. This combine is expected to +/// be a semantic no-op which just allows stores to more closely model the types +/// of their incoming values. +/// +/// Currently, we also refuse to change the precise type used for an atomic or +/// volatile store. This is debatable, and might be reasonable to change later. +/// However, it is risky in case some backend or other part of LLVM is relying +/// on the exact type stored to select appropriate atomic operations. +/// +/// \returns true if the store was successfully combined away. This indicates +/// the caller must erase the store instruction. We have to let the caller erase +/// the store instruction as otherwise there is no way to signal whether it was +/// combined or not: IC.EraseInstFromFunction returns a null pointer. +static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { + // FIXME: We could probably with some care handle both volatile and atomic + // stores here but it isn't clear that this is important. + if (!SI.isSimple()) + return false; + + Value *V = SI.getValueOperand(); + + // Fold away bit casts of the stored value by storing the original type. + if (auto *BC = dyn_cast(V)) { + V = BC->getOperand(0); + combineStoreToNewValue(IC, SI, V); + return true; + } + + // FIXME: We should also canonicalize loads of vectors when their elements are + // cast to other types. + return false; +} + +static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { + // FIXME: We could probably with some care handle both volatile and atomic + // stores here but it isn't clear that this is important. + if (!SI.isSimple()) + return false; + + Value *V = SI.getValueOperand(); + Type *T = V->getType(); + + if (!T->isAggregateType()) + return false; + + if (auto *ST = dyn_cast(T)) { + // If the struct only have one element, we unpack. + unsigned Count = ST->getNumElements(); + if (Count == 1) { + V = IC.Builder->CreateExtractValue(V, 0); + combineStoreToNewValue(IC, SI, V); + return true; + } + + // We don't want to break loads with padding here as we'd loose + // the knowledge that padding exists for the rest of the pipeline. + const DataLayout &DL = IC.getDataLayout(); + auto *SL = DL.getStructLayout(ST); + if (SL->hasPadding()) + return false; + + SmallString<16> EltName = V->getName(); + EltName += ".elt"; + auto *Addr = SI.getPointerOperand(); + SmallString<16> AddrName = Addr->getName(); + AddrName += ".repack"; + auto *IdxType = Type::getInt32Ty(ST->getContext()); + auto *Zero = ConstantInt::get(IdxType, 0); + for (unsigned i = 0; i < Count; i++) { + Value *Indices[2] = { + Zero, + ConstantInt::get(IdxType, i), + }; + auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), AddrName); + auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); + IC.Builder->CreateStore(Val, Ptr); } - - SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); + + return true; } - if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) - return 0; - - // If the pointers point into different address spaces or if they point to - // values with different sizes, we can't do the transformation. - if (!IC.getTargetData() || - SrcTy->getAddressSpace() != - cast(CI->getType())->getAddressSpace() || - IC.getTargetData()->getTypeSizeInBits(SrcPTy) != - IC.getTargetData()->getTypeSizeInBits(DestPTy)) - return 0; - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before - // the store, cast the value to be stored. - Value *NewCast; - Value *SIOp0 = SI.getOperand(0); - Instruction::CastOps opcode = Instruction::BitCast; - Type* CastSrcTy = SIOp0->getType(); - Type* CastDstTy = SrcPTy; - if (CastDstTy->isPointerTy()) { - if (CastSrcTy->isIntegerTy()) - opcode = Instruction::IntToPtr; - } else if (CastDstTy->isIntegerTy()) { - if (SIOp0->getType()->isPointerTy()) - opcode = Instruction::PtrToInt; + if (auto *AT = dyn_cast(T)) { + // If the array only have one element, we unpack. + if (AT->getNumElements() == 1) { + V = IC.Builder->CreateExtractValue(V, 0); + combineStoreToNewValue(IC, SI, V); + return true; + } } - - // SIOp0 is a pointer to aggregate and this is a store to the first field, - // emit a GEP to index into its first field. - if (!NewGEPIndices.empty()) - CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); - - NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, - SIOp0->getName()+".c"); - SI.setOperand(0, NewCast); - SI.setOperand(1, CastOp); - return &SI; + + return false; } /// equivalentAddressValues - Test if A and B will obviously have the same @@ -556,7 +993,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { static bool equivalentAddressValues(Value *A, Value *B) { // Test if the values are trivially equivalent. if (A == B) return true; - + // Test if the values come form identical arithmetic instructions. // This uses isIdenticalToWhenDefined instead of isIdenticalTo because // its only used to compare two uses within the same basic block, which @@ -569,7 +1006,7 @@ static bool equivalentAddressValues(Value *A, Value *B) { if (Instruction *BI = dyn_cast(B)) if (cast(A)->isIdenticalToWhenDefined(BI)) return true; - + // Otherwise they may not be equivalent. return false; } @@ -578,29 +1015,40 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); + // Try to canonicalize the stored type. + if (combineStoreToValueType(*this, SI)) + return EraseInstFromFunction(SI); + // Attempt to improve the alignment. - if (TD) { - unsigned KnownAlign = - getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()), - TD); - unsigned StoreAlign = SI.getAlignment(); - unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : - TD->getABITypeAlignment(Val->getType()); - - if (KnownAlign > EffectiveStoreAlign) - SI.setAlignment(KnownAlign); - else if (StoreAlign == 0) - SI.setAlignment(EffectiveStoreAlign); + unsigned KnownAlign = getOrEnforceKnownAlignment( + Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT); + unsigned StoreAlign = SI.getAlignment(); + unsigned EffectiveStoreAlign = + StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); + + if (KnownAlign > EffectiveStoreAlign) + SI.setAlignment(KnownAlign); + else if (StoreAlign == 0) + SI.setAlignment(EffectiveStoreAlign); + + // Try to canonicalize the stored type. + if (unpackStoreToAggregate(*this, SI)) + return EraseInstFromFunction(SI); + + // Replace GEP indices if possible. + if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) { + Worklist.Add(NewGEPI); + return &SI; } - // Don't hack volatile/atomic stores. - // FIXME: Some bits are legal for atomic stores; needs refactoring. - if (!SI.isSimple()) return 0; + // Don't hack volatile/ordered stores. + // FIXME: Some bits are legal for ordered atomic stores; needs refactoring. + if (!SI.isUnordered()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. if (Ptr->hasOneUse()) { - if (isa(Ptr)) + if (isa(Ptr)) return EraseInstFromFunction(SI); if (GetElementPtrInst *GEP = dyn_cast(Ptr)) { if (isa(GEP->getOperand(0))) { @@ -613,7 +1061,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. - BasicBlock::iterator BBI = &SI; + BasicBlock::iterator BBI(SI); for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; --ScanInsts) { --BBI; @@ -623,11 +1071,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { (isa(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; - } - + } + if (StoreInst *PrevSI = dyn_cast(BBI)) { // Prev store isn't volatile, and stores to the same location? - if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), + if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1))) { ++NumDeadStore; ++BBI; @@ -636,20 +1084,21 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } break; } - + // If this is a load, we have to stop. However, if the loaded value is from // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast(BBI)) { - if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - LI->isSimple()) + if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) { + assert(SI.isUnordered() && "can't eliminate ordering operation"); return EraseInstFromFunction(SI); - + } + // Otherwise, this is a load from some other location. Stores before it // may not be dead. break; } - + // Don't skip over loads or things that can modify memory. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; @@ -662,28 +1111,21 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (Instruction *U = dyn_cast(Val)) Worklist.Add(U); // Dropped a use. } - return 0; // Do not modify these! + return nullptr; // Do not modify these! } // store undef, Ptr -> noop if (isa(Val)) return EraseInstFromFunction(SI); - // If the pointer destination is a cast, see if we can fold the cast into the - // source instead. - if (isa(Ptr)) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - if (ConstantExpr *CE = dyn_cast(Ptr)) - if (CE->isCast()) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - - + // The code below needs to be audited and adjusted for unordered atomics + if (!SI.isSimple()) + return nullptr; + // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. - BBI = &SI; + BBI = SI.getIterator(); do { ++BBI; } while (isa(BBI) || @@ -691,9 +1133,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (BranchInst *BI = dyn_cast(BBI)) if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) - return 0; // xform done! - - return 0; + return nullptr; // xform done! + + return nullptr; } /// SimplifyStoreAtEndOfBlock - Turn things like: @@ -706,24 +1148,24 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { /// bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BasicBlock *StoreBB = SI.getParent(); - + // Check to see if the successor block has exactly two incoming edges. If // so, see if the other predecessor contains a store to the same location. // if so, insert a PHI node (if needed) and move the stores down. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); - + // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); BasicBlock *P = *PI; - BasicBlock *OtherBB = 0; + BasicBlock *OtherBB = nullptr; if (P != StoreBB) OtherBB = P; if (++PI == pred_end(DestBB)) return false; - + P = *PI; if (P != StoreBB) { if (OtherBB) @@ -739,14 +1181,14 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { return false; // Verify that the other block ends in a branch and is not otherwise empty. - BasicBlock::iterator BBI = OtherBB->getTerminator(); + BasicBlock::iterator BBI(OtherBB->getTerminator()); BranchInst *OtherBr = dyn_cast(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; - + // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. - StoreInst *OtherStore = 0; + StoreInst *OtherStore = nullptr; if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. @@ -765,10 +1207,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { } else { // Otherwise, the other block ended with a conditional branch. If one of the // destinations is StoreBB, then we have the if/then case. - if (OtherBr->getSuccessor(0) != StoreBB && + if (OtherBr->getSuccessor(0) != StoreBB && OtherBr->getSuccessor(1) != StoreBB) return false; - + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an // if/then triangle. See if there is a store to the same ptr as SI that // lives in OtherBB. @@ -786,7 +1228,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BBI == OtherBB->begin()) return false; } - + // In order to eliminate the store in OtherBr, we have to // make sure nothing reads or overwrites the stored value in // StoreBB. @@ -796,7 +1238,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { return false; } } - + // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { @@ -805,7 +1247,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); } - + // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstInsertionPt(); @@ -815,7 +1257,15 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { SI.getOrdering(), SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); - NewSI->setDebugLoc(OtherStore->getDebugLoc()); + NewSI->setDebugLoc(OtherStore->getDebugLoc()); + + // If the two stores had AA tags, merge them. + AAMDNodes AATags; + SI.getAAMetadata(AATags); + if (AATags) { + OtherStore->getAAMetadata(AATags, /* Merge = */ true); + NewSI->setAAMetadata(AATags); + } // Nuke the old stores. EraseInstFromFunction(SI);