X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FInstCombine%2FInstCombineLoadStoreAlloca.cpp;h=47406b9a1632086d6e30e35b3a7c99da061002a5;hp=3c70f442647a9a05d69167cbf8fec110cba304c9;hb=ec185d074d50cc9e8af23605862d5fa0a87851dc;hpb=2b5c8d67d2b20599eda9d1770f3ccce0e6417533 diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 3c70f442647..47406b9a163 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/IR/DataLayout.h" @@ -90,21 +91,23 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (CS.isCallee(&U)) continue; + unsigned DataOpNo = CS.getDataOperandNo(&U); + bool IsArgOperand = CS.isArgOperand(&U); + // Inalloca arguments are clobbered by the call. - unsigned ArgNo = CS.getArgumentNo(&U); - if (CS.isInAllocaArgument(ArgNo)) + if (IsArgOperand && CS.isInAllocaArgument(DataOpNo)) return false; // If this is a readonly/readnone call site, then we know it is just a // load (but one that potentially returns the value itself), so we can // ignore it if we know that the value isn't captured. if (CS.onlyReadsMemory() && - (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) + (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo))) continue; // If this is being passed as a byval argument, the caller is making a // copy, so it is only a read of the alloca. - if (CS.isByValArgument(ArgNo)) + if (IsArgOperand && CS.isByValArgument(DataOpNo)) continue; } @@ -186,7 +189,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info // - BasicBlock::iterator It = New; + BasicBlock::iterator It(New); while (isa(*It) || isa(*It)) ++It; @@ -367,7 +370,13 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT MDB.createRange(NonNullInt, NullInt)); } break; - + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + // These only directly apply if the new type is also a pointer. + if (NewTy->isPointerTy()) + NewLoad->setMetadata(ID, N); + break; case LLVMContext::MD_range: // FIXME: It would be nice to propagate this in some way, but the type // conversions make it hard. If the new type is a pointer, we could @@ -418,6 +427,9 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value case LLVMContext::MD_invariant_load: case LLVMContext::MD_nonnull: case LLVMContext::MD_range: + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: // These don't apply for stores. break; } @@ -515,12 +527,42 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { if (auto *ST = dyn_cast(T)) { // If the struct only have one element, we unpack. - if (ST->getNumElements() == 1) { + unsigned Count = ST->getNumElements(); + if (Count == 1) { LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U), ".unpack"); return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue( UndefValue::get(T), NewLoad, 0, LI.getName())); } + + // We don't want to break loads with padding here as we'd loose + // the knowledge that padding exists for the rest of the pipeline. + const DataLayout &DL = IC.getDataLayout(); + auto *SL = DL.getStructLayout(ST); + if (SL->hasPadding()) + return nullptr; + + auto Name = LI.getName(); + SmallString<16> LoadName = Name; + LoadName += ".unpack"; + SmallString<16> EltName = Name; + EltName += ".elt"; + auto *Addr = LI.getPointerOperand(); + Value *V = UndefValue::get(T); + auto *IdxType = Type::getInt32Ty(ST->getContext()); + auto *Zero = ConstantInt::get(IdxType, 0); + for (unsigned i = 0; i < Count; i++) { + Value *Indices[2] = { + Zero, + ConstantInt::get(IdxType, i), + }; + auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), EltName); + auto *L = IC.Builder->CreateLoad(ST->getTypeAtIndex(i), Ptr, LoadName); + V = IC.Builder->CreateInsertValue(V, L, i); + } + + V->setName(Name); + return IC.ReplaceInstUsesWith(LI, V); } if (auto *AT = dyn_cast(T)) { @@ -748,19 +790,19 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. - BasicBlock::iterator BBI = &LI; + BasicBlock::iterator BBI(LI); AAMDNodes AATags; - if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI, - 6, AA, &AATags)) { + if (Value *AvailableVal = + FindAvailableLoadedValue(Op, LI.getParent(), BBI, + DefMaxInstsToScan, AA, &AATags)) { if (LoadInst *NLI = dyn_cast(AvailableVal)) { unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_range, - LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull, - }; + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null}; combineMetadata(NLI, &LI, KnownIDs); }; @@ -822,7 +864,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } // load (select (cond, null, P)) -> load P - if (isa(SI->getOperand(1)) && + if (isa(SI->getOperand(1)) && LI.getPointerAddressSpace() == 0) { LI.setOperand(0, SI->getOperand(2)); return &LI; @@ -893,11 +935,38 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { if (auto *ST = dyn_cast(T)) { // If the struct only have one element, we unpack. - if (ST->getNumElements() == 1) { + unsigned Count = ST->getNumElements(); + if (Count == 1) { V = IC.Builder->CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } + + // We don't want to break loads with padding here as we'd loose + // the knowledge that padding exists for the rest of the pipeline. + const DataLayout &DL = IC.getDataLayout(); + auto *SL = DL.getStructLayout(ST); + if (SL->hasPadding()) + return false; + + SmallString<16> EltName = V->getName(); + EltName += ".elt"; + auto *Addr = SI.getPointerOperand(); + SmallString<16> AddrName = Addr->getName(); + AddrName += ".repack"; + auto *IdxType = Type::getInt32Ty(ST->getContext()); + auto *Zero = ConstantInt::get(IdxType, 0); + for (unsigned i = 0; i < Count; i++) { + Value *Indices[2] = { + Zero, + ConstantInt::get(IdxType, i), + }; + auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), AddrName); + auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); + IC.Builder->CreateStore(Val, Ptr); + } + + return true; } if (auto *AT = dyn_cast(T)) { @@ -971,9 +1040,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { return &SI; } - // Don't hack volatile/atomic stores. - // FIXME: Some bits are legal for atomic stores; needs refactoring. - if (!SI.isSimple()) return nullptr; + // Don't hack volatile/ordered stores. + // FIXME: Some bits are legal for ordered atomic stores; needs refactoring. + if (!SI.isUnordered()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. @@ -991,7 +1060,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. - BasicBlock::iterator BBI = &SI; + BasicBlock::iterator BBI(SI); for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; --ScanInsts) { --BBI; @@ -1005,7 +1074,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (StoreInst *PrevSI = dyn_cast(BBI)) { // Prev store isn't volatile, and stores to the same location? - if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), + if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1))) { ++NumDeadStore; ++BBI; @@ -1019,9 +1088,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast(BBI)) { - if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - LI->isSimple()) + if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) { + assert(SI.isUnordered() && "can't eliminate ordering operation"); return EraseInstFromFunction(SI); + } // Otherwise, this is a load from some other location. Stores before it // may not be dead. @@ -1047,10 +1117,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (isa(Val)) return EraseInstFromFunction(SI); + // The code below needs to be audited and adjusted for unordered atomics + if (!SI.isSimple()) + return nullptr; + // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. - BBI = &SI; + BBI = SI.getIterator(); do { ++BBI; } while (isa(BBI) || @@ -1106,7 +1180,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { return false; // Verify that the other block ends in a branch and is not otherwise empty. - BasicBlock::iterator BBI = OtherBB->getTerminator(); + BasicBlock::iterator BBI(OtherBB->getTerminator()); BranchInst *OtherBr = dyn_cast(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false;