X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FMemCpyOptimizer.cpp;h=a87cce3f9d3eda7dee5573979a3c19754c13c98a;hp=6de5ef1d770a1ef79911d3ae5758f3f855f53e3a;hb=b83a67e1e3fe210bd99a82eccd3dc5b1b44f1503;hpb=f4afaa81f2d1736a4115a22c96d8188178f11cfc diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 6de5ef1d770..a87cce3f9d3 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -23,11 +23,13 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include using namespace llvm; @@ -52,7 +54,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, if (OpC->isZero()) continue; // No offset. // Handle struct indices, which add their field offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { + if (StructType *STy = dyn_cast(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); continue; } @@ -71,14 +73,13 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, /// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, const TargetData &TD) { - //Ptr1 = Ptr1->stripPointerCasts(); - //Ptr2 = Ptr2->stripPointerCasts(); + Ptr1 = Ptr1->stripPointerCasts(); + Ptr2 = Ptr2->stripPointerCasts(); GetElementPtrInst *GEP1 = dyn_cast(Ptr1); GetElementPtrInst *GEP2 = dyn_cast(Ptr2); bool VariableIdxFound = false; -#if 0 // If one pointer is a GEP and the other isn't, then see if the GEP is a // constant offset from the base, as in "P" and "gep P, 1". if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { @@ -90,7 +91,6 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); return !VariableIdxFound; } -#endif // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical // base. After that base, they may have some number of common (and @@ -147,8 +147,8 @@ struct MemsetRange { } // end anon namespace bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { - // If we found more than 8 stores to merge or 64 bytes, use memset. - if (TheStores.size() >= 8 || End-Start >= 64) return true; + // If we found more than 4 stores to merge or 16 bytes, use memset. + if (TheStores.size() >= 4 || End-Start >= 16) return true; // If there is nothing to merge, don't do anything. if (TheStores.size() < 2) return false; @@ -301,12 +301,15 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, namespace { class MemCpyOpt : public FunctionPass { MemoryDependenceAnalysis *MD; + TargetLibraryInfo *TLI; const TargetData *TD; public: static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); MD = 0; + TLI = 0; + TD = 0; } bool runOnFunction(Function &F); @@ -318,6 +321,7 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); } @@ -348,13 +352,14 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) /// tryMergingIntoMemset - When scanning forward over instructions, we look for /// some other patterns to fold away. In particular, this looks for stores to -/// neighboring locations of memory. If it sees enough consequtive ones, it +/// neighboring locations of memory. If it sees enough consecutive ones, it /// attempts to merge them together into a memcpy/memset. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { @@ -379,7 +384,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, if (StoreInst *NextStore = dyn_cast(BI)) { // If this is a store, see if we can merge it in. - if (NextStore->isVolatile()) break; + if (!NextStore->isSimple()) break; // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) @@ -393,8 +398,6 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Ranges.addStore(Offset, NextStore); } else { - break; - MemSetInst *MSI = cast(BI); if (MSI->isVolatile() || ByteVal != MSI->getValue() || @@ -445,7 +448,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { - const Type *EltType = + Type *EltType = cast(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } @@ -457,7 +460,10 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); - + + if (!Range.TheStores.empty()) + AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); + // Zap all the stores. for (SmallVector::const_iterator SI = Range.TheStores.begin(), @@ -473,7 +479,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { - if (SI->isVolatile()) return false; + if (!SI->isSimple()) return false; if (TD == 0) return false; @@ -481,12 +487,27 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // happen to be using a load-store pair to implement it, rather than // a memcpy. if (LoadInst *LI = dyn_cast(SI->getOperand(0))) { - if (!LI->isVolatile() && LI->hasOneUse()) { - MemDepResult dep = MD->getDependency(LI); + if (LI->isSimple() && LI->hasOneUse() && + LI->getParent() == SI->getParent()) { + MemDepResult ldep = MD->getDependency(LI); CallInst *C = 0; - if (dep.isClobber() && !isa(dep.getInst())) - C = dyn_cast(dep.getInst()); - + if (ldep.isClobber() && !isa(ldep.getInst())) + C = dyn_cast(ldep.getInst()); + + if (C) { + // Check that nothing touches the dest of the "copy" between + // the call and the store. + AliasAnalysis &AA = getAnalysis(); + AliasAnalysis::Location StoreLoc = AA.getLocation(SI); + for (BasicBlock::iterator I = --BasicBlock::iterator(SI), + E = C; I != E; --I) { + if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { + C = 0; + break; + } + } + } + if (C) { bool changed = performCallSlotOptzn(LI, SI->getPointerOperand()->stripPointerCasts(), @@ -521,9 +542,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { } bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { - // Temporarily disable this. - return false; - // See if there is another memset or store neighboring this memset which // allows us to widen out the memset to do a single larger store. if (isa(MSI->getLength()) && !MSI->isVolatile()) @@ -598,7 +616,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (!A->hasStructRetAttr()) return false; - const Type *StructTy = cast(A->getType())->getElementType(); + Type *StructTy = cast(A->getType())->getElementType(); uint64_t destSize = TD->getTypeAllocSize(StructTy); if (destSize < srcSize) @@ -695,10 +713,12 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, if (M->getSource() == MDep->getSource()) return false; - // Second, the length of the memcpy's must be the same, or the preceeding one + // Second, the length of the memcpy's must be the same, or the preceding one // must be larger than the following one. - ConstantInt *C1 = dyn_cast(MDep->getLength()); - if (!C1) return false; + ConstantInt *MDepLen = dyn_cast(MDep->getLength()); + ConstantInt *MLen = dyn_cast(M->getLength()); + if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) + return false; AliasAnalysis &AA = getAnalysis(); @@ -786,21 +806,25 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { // a) memcpy-memcpy xform which exposes redundance for DSE. // b) call-memcpy xform for return slot optimization. MemDepResult DepInfo = MD->getDependency(M); - if (!DepInfo.isClobber()) - return false; - - if (MemCpyInst *MDep = dyn_cast(DepInfo.getInst())) - return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue()); - - if (CallInst *C = dyn_cast(DepInfo.getInst())) { - if (performCallSlotOptzn(M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), C)) { - MD->removeInstruction(M); - M->eraseFromParent(); - return true; + if (DepInfo.isClobber()) { + if (CallInst *C = dyn_cast(DepInfo.getInst())) { + if (performCallSlotOptzn(M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), C)) { + MD->removeInstruction(M); + M->eraseFromParent(); + return true; + } } } - + + AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M); + MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true, + M, M->getParent()); + if (SrcDepInfo.isClobber()) { + if (MemCpyInst *MDep = dyn_cast(SrcDepInfo.getInst())) + return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue()); + } + return false; } @@ -809,6 +833,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { bool MemCpyOpt::processMemMove(MemMoveInst *M) { AliasAnalysis &AA = getAnalysis(); + if (!TLI->has(LibFunc::memmove)) + return false; + // See if the pointers alias. if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) return false; @@ -817,11 +844,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { // If not, then we know we can transform this. Module *Mod = M->getParent()->getParent()->getParent(); - const Type *ArgTys[3] = { M->getRawDest()->getType(), - M->getRawSource()->getType(), - M->getLength()->getType() }; + Type *ArgTys[3] = { M->getRawDest()->getType(), + M->getRawSource()->getType(), + M->getLength()->getType() }; M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, - ArgTys, 3)); + ArgTys)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. @@ -837,7 +864,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { // Find out what feeds this byval argument. Value *ByValArg = CS.getArgument(ArgNo); - const Type *ByValTy =cast(ByValArg->getType())->getElementType(); + Type *ByValTy = cast(ByValArg->getType())->getElementType(); uint64_t ByValSize = TD->getTypeAllocSize(ByValTy); MemDepResult DepInfo = MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), @@ -859,12 +886,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) return false; - // Get the alignment of the byval. If it is greater than the memcpy, then we - // can't do the substitution. If the call doesn't specify the alignment, then - // it is some target specific value that we can't know. + // Get the alignment of the byval. If the call doesn't specify the alignment, + // then it is some target specific value that we can't know. unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); - if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign) - return false; + if (ByValAlign == 0) return false; + + // If it is greater than the memcpy, then we check to see if we can force the + // source of the memcpy to the alignment we need. If we fail, we bail out. + if (MDep->getAlignment() < ByValAlign && + getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign) + return false; // Verify that the copied-from memory doesn't change in between the memcpy and // the byval call. @@ -918,7 +949,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { RepeatInstruction = processMemMove(M); else if (CallSite CS = (Value*)I) { for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) - if (CS.paramHasAttr(i+1, Attribute::ByVal)) + if (CS.isByValArgument(i)) MadeChange |= processByValArgument(CS, i); } @@ -940,6 +971,14 @@ bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; MD = &getAnalysis(); TD = getAnalysisIfAvailable(); + TLI = &getAnalysis(); + + // If we don't have at least memset and memcpy, there is little point of doing + // anything here. These are required by a freestanding implementation, so if + // even they are disabled, there is no point in trying hard. + if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) + return false; + while (1) { if (!iterateOnFunction(F)) break;