From 06511264f806cf2a55a090a555dc91a2a2e10a36 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 8 Jan 2011 20:54:51 +0000 Subject: [PATCH] enhance memcpyopt to merge a store and a subsequent memset into a single larger memset. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123086 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 136 +++++++++++++--------- test/Transforms/MemCpyOpt/form-memset.ll | 18 +++ 2 files changed, 101 insertions(+), 53 deletions(-) diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9f8e860daab..a4f60878e6f 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -121,7 +121,7 @@ struct MemsetRange { unsigned Alignment; /// TheStores - The actual stores that make up this range. - SmallVector TheStores; + SmallVector TheStores; bool isProfitableToUseMemset(const TargetData &TD) const; @@ -131,10 +131,19 @@ struct MemsetRange { bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { // If we found more than 8 stores to merge or 64 bytes, use memset. if (TheStores.size() >= 8 || End-Start >= 64) return true; + + // If there is nothing to merge, don't do anything. + if (TheStores.size() < 2) return false; + + // If any of the stores are a memset, then it is always good to extend the + // memset. + for (unsigned i = 0, e = TheStores.size(); i != e; ++i) + if (!isa(TheStores[i])) + return true; // Assume that the code generator is capable of merging pairs of stores // together if it wants to. - if (TheStores.size() <= 2) return false; + if (TheStores.size() == 2) return false; // If we have fewer than 8 stores, it can still be worthwhile to do this. // For example, merging 4 i8 stores into an i32 store is useful almost always. @@ -174,26 +183,44 @@ public: const_iterator end() const { return Ranges.end(); } bool empty() const { return Ranges.empty(); } - void addStore(int64_t OffsetFromFirst, StoreInst *SI); - void addInst(int64_t OffsetFromFirst, Instruction *Inst) { - addStore(OffsetFromFirst, cast(Inst)); + if (StoreInst *SI = dyn_cast(Inst)) + addStore(OffsetFromFirst, SI); + else + addMemSet(OffsetFromFirst, cast(Inst)); + } + + void addStore(int64_t OffsetFromFirst, StoreInst *SI) { + int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); + + addRange(OffsetFromFirst, StoreSize, + SI->getPointerOperand(), SI->getAlignment(), SI); + } + + void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { + int64_t Size = cast(MSI->getLength())->getZExtValue(); + addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); } + + void addRange(int64_t Start, int64_t Size, Value *Ptr, + unsigned Alignment, Instruction *Inst); + }; } // end anon namespace -/// addStore - Add a new store to the MemsetRanges data structure. This adds a +/// addRange - Add a new store to the MemsetRanges data structure. This adds a /// new range for the specified store at the specified offset, merging into /// existing ranges as appropriate. -void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { - int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType()); - - // Do a linear search of the ranges to see if this can be joined and/or to - // find the insertion point in the list. We keep the ranges sorted for - // simplicity here. This is a linear search of a linked list, which is ugly, - // however the number of ranges is limited, so this won't get crazy slow. +/// +/// Do a linear search of the ranges to see if this can be joined and/or to +/// find the insertion point in the list. We keep the ranges sorted for +/// simplicity here. This is a linear search of a linked list, which is ugly, +/// however the number of ranges is limited, so this won't get crazy slow. +void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, + unsigned Alignment, Instruction *Inst) { + int64_t End = Start+Size; range_iterator I = Ranges.begin(), E = Ranges.end(); while (I != E && Start > I->End) @@ -206,14 +233,14 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { MemsetRange &R = *Ranges.insert(I, MemsetRange()); R.Start = Start; R.End = End; - R.StartPtr = SI->getPointerOperand(); - R.Alignment = SI->getAlignment(); - R.TheStores.push_back(SI); + R.StartPtr = Ptr; + R.Alignment = Alignment; + R.TheStores.push_back(Inst); return; } - + // This store overlaps with I, add it. - I->TheStores.push_back(SI); + I->TheStores.push_back(Inst); // At this point, we may have an interval that completely contains our store. // If so, just add it to the interval and return. @@ -228,8 +255,8 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { // stopped on *it*. if (Start < I->Start) { I->Start = Start; - I->StartPtr = SI->getPointerOperand(); - I->Alignment = SI->getAlignment(); + I->StartPtr = Ptr; + I->Alignment = Alignment; } // Now we know that Start <= I->End and Start >= I->Start (so the startpoint @@ -314,8 +341,6 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { if (TD == 0) return 0; - AliasAnalysis &AA = getAnalysis(); - // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks @@ -324,37 +349,43 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, BasicBlock::iterator BI = StartInst; for (++BI; !isa(BI); ++BI) { - if (isa(BI) || isa(BI)) { - // If the call is readnone, ignore it, otherwise bail out. We don't even - // allow readonly here because we don't want something like: + if (!isa(BI) && !isa(BI)) { + // If the instruction is readnone, ignore it, otherwise bail out. We + // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). - if (AA.getModRefBehavior(CallSite(BI)) == - AliasAnalysis::DoesNotAccessMemory) - continue; - - // TODO: If this is a memset, try to join it in. - - break; - } else if (isa(BI) || isa(BI)) - break; - - // If this is a non-store instruction it is fine, ignore it. - StoreInst *NextStore = dyn_cast(BI); - if (NextStore == 0) continue; - - // If this is a store, see if we can merge it in. - if (NextStore->isVolatile()) break; - - // Check to see if this stored value is of the same byte-splattable value. - if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) - break; + if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) + break; + continue; + } - // Check to see if this store is to a constant offset from the start ptr. - int64_t Offset; - if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) - break; + if (StoreInst *NextStore = dyn_cast(BI)) { + // If this is a store, see if we can merge it in. + if (NextStore->isVolatile()) break; - Ranges.addStore(Offset, NextStore); + // Check to see if this stored value is of the same byte-splattable value. + if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) + break; + + // Check to see if this store is to a constant offset from the start ptr. + int64_t Offset; + if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) + break; + + Ranges.addStore(Offset, NextStore); + } else { + MemSetInst *MSI = cast(BI); + + if (MSI->isVolatile() || ByteVal != MSI->getValue() || + !isa(MSI->getLength())) + break; + + // Check to see if this store is to a constant offset from the start ptr. + int64_t Offset; + if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD)) + break; + + Ranges.addMemSet(Offset, MSI); + } } // If we have no ranges, then we just had a single store with nothing that @@ -406,7 +437,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, dbgs() << "With: " << *AMemSet << '\n'); // Zap all the stores. - for (SmallVector::const_iterator + for (SmallVector::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) (*SI)->eraseFromParent(); @@ -573,8 +604,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis(); - if (AA.getModRefInfo(C, cpyDest, srcSize) != - AliasAnalysis::NoModRef) + if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef) return false; // All the checks have passed, so do the transformation. diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll index 2760e731c91..04bd6742efe 100644 --- a/test/Transforms/MemCpyOpt/form-memset.ll +++ b/test/Transforms/MemCpyOpt/form-memset.ll @@ -162,3 +162,21 @@ entry: } declare void @foo(%struct.MV*, %struct.MV*, i8*) + + +define void @test3(i32* nocapture %P) nounwind ssp { +entry: + %arrayidx = getelementptr inbounds i32* %P, i64 1 + store i32 0, i32* %arrayidx, align 4 + %add.ptr = getelementptr inbounds i32* %P, i64 2 + %0 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false) + ret void +; CHECK: @test3 +; CHECK-NOT: store +; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false) +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + + -- 2.34.1