From 6549121c660dfd18361cd3daf6c766bee80d3097 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 15 Oct 2010 22:52:12 +0000 Subject: [PATCH] Generalize MemCpyOpt's handling of call slot forwarding to function properly when the call slot forwarding is implemented with a load/store pair rather than a memcpy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116637 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 66 +++++++++++++++------ test/Transforms/MemCpyOpt/loadstore-sret.ll | 25 ++++++++ 2 files changed, 73 insertions(+), 18 deletions(-) create mode 100644 test/Transforms/MemCpyOpt/loadstore-sret.ll diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index d4d4b576924..e4f329fdad7 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -321,7 +321,8 @@ namespace { bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); bool processMemCpy(MemCpyInst *M); bool processMemMove(MemMoveInst *M); - bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C); + bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, + uint64_t cpyLen, CallInst *C); bool iterateOnFunction(Function &F); }; @@ -339,7 +340,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) - /// processStore - When GVN is scanning forward over instructions, we look for /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consequtive ones @@ -347,6 +347,37 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (SI->isVolatile()) return false; + TargetData *TD = getAnalysisIfAvailable(); + if (!TD) return false; + + // Detect cases where we're performing call slot forwarding, but + // happen to be using a load-store pair to implement it, rather than + // a memcpy. + if (LoadInst *LI = dyn_cast(SI->getOperand(0))) { + if (!LI->isVolatile() && LI->hasOneUse()) { + MemoryDependenceAnalysis &MD = getAnalysis(); + + MemDepResult dep = MD.getDependency(LI); + CallInst *C = 0; + if (dep.isClobber() && !isa(dep.getInst())) + C = dyn_cast(dep.getInst()); + + if (C) { + bool changed = performCallSlotOptzn(LI, + SI->getPointerOperand()->stripPointerCasts(), + LI->getPointerOperand()->stripPointerCasts(), + TD->getTypeStoreSize(SI->getOperand(0)->getType()), C); + if (changed) { + MD.removeInstruction(SI); + SI->eraseFromParent(); + LI->eraseFromParent(); + ++NumMemCpyInstr; + return true; + } + } + } + } + LLVMContext &Context = SI->getContext(); // There are two cases that are interesting for this code to handle: memcpy @@ -359,8 +390,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!ByteVal) return false; - TargetData *TD = getAnalysisIfAvailable(); - if (!TD) return false; AliasAnalysis &AA = getAnalysis(); Module *M = SI->getParent()->getParent()->getParent(); @@ -494,7 +523,9 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { /// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. -bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { +bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, + Value *cpyDest, Value *cpySrc, + uint64_t cpyLen, CallInst *C) { // The general transformation to keep in mind is // // call @func(..., src, ...) @@ -511,16 +542,8 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. - Value *cpyDest = cpy->getDest(); - Value *cpySrc = cpy->getSource(); CallSite CS(C); - // We need to be able to reason about the size of the memcpy, so we require - // that it be a constant. - ConstantInt *cpyLength = dyn_cast(cpy->getLength()); - if (!cpyLength) - return false; - // Require that src be an alloca. This simplifies the reasoning considerably. AllocaInst *srcAlloca = dyn_cast(cpySrc); if (!srcAlloca) @@ -537,7 +560,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); - if (cpyLength->getZExtValue() < srcSize) + if (cpyLen < srcSize) return false; // Check that accessing the first srcSize bytes of dest will not cause a @@ -606,7 +629,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis(); - if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != + if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef) return false; @@ -635,7 +658,6 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Remove the memcpy MD.removeInstruction(cpy); - cpy->eraseFromParent(); ++NumMemCpyInstr; return true; @@ -649,6 +671,10 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { bool MemCpyOpt::processMemCpy(MemCpyInst *M) { MemoryDependenceAnalysis &MD = getAnalysis(); + // We can only optimize statically-sized memcpy's. + ConstantInt *cpyLen = dyn_cast(M->getLength()); + if (!cpyLen) return false; + // The are two possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. // b) call-memcpy xform for return slot optimization. @@ -656,8 +682,12 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { if (!dep.isClobber()) return false; if (!isa(dep.getInst())) { - if (CallInst *C = dyn_cast(dep.getInst())) - return performCallSlotOptzn(M, C); + if (CallInst *C = dyn_cast(dep.getInst())) { + bool changed = performCallSlotOptzn(M, M->getDest(), M->getSource(), + cpyLen->getZExtValue(), C); + if (changed) M->eraseFromParent(); + return changed; + } return false; } diff --git a/test/Transforms/MemCpyOpt/loadstore-sret.ll b/test/Transforms/MemCpyOpt/loadstore-sret.ll new file mode 100644 index 00000000000..ebc11fc0fbe --- /dev/null +++ b/test/Transforms/MemCpyOpt/loadstore-sret.ll @@ -0,0 +1,25 @@ +; RUN: opt -S < %s -memcpyopt | FileCheck %s +; + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%"class.std::auto_ptr" = type { i32* } + +; CHECK: @_Z3foov +define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) ssp { +_ZNSt8auto_ptrIiED1Ev.exit: + %temp.lvalue = alloca %"class.std::auto_ptr", align 8 +; CHECK: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result) + call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue) + %tmp.i.i = getelementptr inbounds %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0 +; CHECK-NOT: load + %tmp2.i.i = load i32** %tmp.i.i, align 8 + %tmp.i.i4 = getelementptr inbounds %"class.std::auto_ptr"* %agg.result, i64 0, i32 0 +; CHECK-NOT: store + store i32* %tmp2.i.i, i32** %tmp.i.i4, align 8 +; CHECK: ret void + ret void +} + +declare void @_Z3barv(%"class.std::auto_ptr"* sret) -- 2.34.1