From 6eaa62af77fbf5f950741b284672b676665ba07f Mon Sep 17 00:00:00 2001 From: Bjorn Steinbrink Date: Thu, 16 Oct 2014 19:43:08 +0000 Subject: [PATCH] Allow call-slop optzn for destinations with a suitable dereferenceable attribute Summary: Currently, call slot optimization requires that if the destination is an argument, the argument has the sret attribute. This is to ensure that the memory access won't trap. In addition to sret, we can also allow the optimization to happen for arguments that have the new dereferenceable attribute, which gives the same guarantee. Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D5832 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219950 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 30 +++++++++++---------- test/Transforms/MemCpyOpt/callslot_deref.ll | 29 ++++++++++++++++++++ 2 files changed, 45 insertions(+), 14 deletions(-) create mode 100644 test/Transforms/MemCpyOpt/callslot_deref.ll diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9709dfcc1f6..be524be0082 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -634,22 +634,24 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (destSize < srcSize) return false; } else if (Argument *A = dyn_cast(cpyDest)) { - // If the destination is an sret parameter then only accesses that are - // outside of the returned struct type can trap. - if (!A->hasStructRetAttr()) - return false; + if (A->getDereferenceableBytes() < srcSize) { + // If the destination is an sret parameter then only accesses that are + // outside of the returned struct type can trap. + if (!A->hasStructRetAttr()) + return false; - Type *StructTy = cast(A->getType())->getElementType(); - if (!StructTy->isSized()) { - // The call may never return and hence the copy-instruction may never - // be executed, and therefore it's not safe to say "the destination - // has at least bytes, as implied by the copy-instruction", - return false; - } + Type *StructTy = cast(A->getType())->getElementType(); + if (!StructTy->isSized()) { + // The call may never return and hence the copy-instruction may never + // be executed, and therefore it's not safe to say "the destination + // has at least bytes, as implied by the copy-instruction", + return false; + } - uint64_t destSize = DL->getTypeAllocSize(StructTy); - if (destSize < srcSize) - return false; + uint64_t destSize = DL->getTypeAllocSize(StructTy); + if (destSize < srcSize) + return false; + } } else { return false; } diff --git a/test/Transforms/MemCpyOpt/callslot_deref.ll b/test/Transforms/MemCpyOpt/callslot_deref.ll new file mode 100644 index 00000000000..4d51552d015 --- /dev/null +++ b/test/Transforms/MemCpyOpt/callslot_deref.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; all bytes of %dst that are touch by the memset are dereferenceable +define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) { +; CHECK-LABEL: @must_remove_memcpy( +; CHECK: call void @llvm.memset.p0i8.i64 +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 + %src = alloca [4096 x i8], align 1 + %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0 + call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2 + ret void +} + +; memset touch more bytes than those guaranteed to be dereferenceable +define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) { +; CHECK-LABEL: @must_not_remove_memcpy( +; CHECK: call void @llvm.memset.p0i8.i64 +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 + %src = alloca [4096 x i8], align 1 + %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0 + call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2 + ret void +} -- 2.34.1