From bd5603c16ef1953813ffef6d4643699c992d6595 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 14 Jul 2014 18:52:02 +0000 Subject: [PATCH] Don't eliminate memcpy's when the address of the pointer may itself be relevant. Fixes PR18304. Patch by David Wiberg! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212970 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 6 +++++ .../MemCpyOpt/2008-02-24-MultipleUseofSRet.ll | 4 ++-- .../MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll | 2 +- test/Transforms/MemCpyOpt/capturing-func.ll | 22 +++++++++++++++++++ test/Transforms/MemCpyOpt/loadstore-sret.ll | 2 +- test/Transforms/MemCpyOpt/memcpy.ll | 4 ++-- test/Transforms/MemCpyOpt/sret.ll | 2 +- 7 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 test/Transforms/MemCpyOpt/capturing-func.ll diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index b6bc7922882..7c184a4ad2c 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -684,6 +684,12 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, } } + // Check that src isn't captured by the called function since the + // transformation can cause aliasing issues in that case. + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i)) + return false; + // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. DominatorTree &DT = getAnalysis().getDomTree(); diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll index d124be5f902..00ac34d93e7 100644 --- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll +++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll @@ -6,7 +6,7 @@ target triple = "i386-pc-linux-gnu" %0 = type { x86_fp80, x86_fp80 } -define internal fastcc void @initialize(%0* noalias sret %agg.result) nounwind { +define internal fastcc void @initialize(%0* noalias nocapture sret %agg.result) nounwind { entry: %agg.result.03 = getelementptr %0* %agg.result, i32 0, i32 0 store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03 @@ -15,7 +15,7 @@ entry: ret void } -declare fastcc x86_fp80 @passed_uninitialized(%0*) nounwind +declare fastcc x86_fp80 @passed_uninitialized(%0* nocapture) nounwind define fastcc void @badly_optimized() nounwind { entry: diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll index 597b69dee3d..6982c8bf2f8 100644 --- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll +++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll @@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 %a = type { i32 } %b = type { float } -declare void @g(%a*) +declare void @g(%a* nocapture) define float @f() { entry: diff --git a/test/Transforms/MemCpyOpt/capturing-func.ll b/test/Transforms/MemCpyOpt/capturing-func.ll new file mode 100644 index 00000000000..17614fd181d --- /dev/null +++ b/test/Transforms/MemCpyOpt/capturing-func.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s + +target datalayout = "e" + +declare void @foo(i8*) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind + +define void @test() { + %ptr1 = alloca i8 + %ptr2 = alloca i8 + call void @foo(i8* %ptr2) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i32 1, i1 false) + call void @foo(i8* %ptr1) + ret void + + ; Check that the transformation isn't applied if the called function can + ; capture the pointer argument (i.e. the nocapture attribute isn't present) + ; CHECK-LABEL: @test( + ; CHECK: call void @foo(i8* %ptr2) + ; CHECK-NEXT: call void @llvm.memcpy + ; CHECK-NEXT: call void @foo(i8* %ptr1) +} diff --git a/test/Transforms/MemCpyOpt/loadstore-sret.ll b/test/Transforms/MemCpyOpt/loadstore-sret.ll index 89eabca21bf..d4a700d0311 100644 --- a/test/Transforms/MemCpyOpt/loadstore-sret.ll +++ b/test/Transforms/MemCpyOpt/loadstore-sret.ll @@ -22,4 +22,4 @@ _ZNSt8auto_ptrIiED1Ev.exit: ret void } -declare void @_Z3barv(%"class.std::auto_ptr"* sret) +declare void @_Z3barv(%"class.std::auto_ptr"* nocapture sret) diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll index 492c453932c..ee04f195116 100644 --- a/test/Transforms/MemCpyOpt/memcpy.ll +++ b/test/Transforms/MemCpyOpt/memcpy.ll @@ -29,7 +29,7 @@ entry: ; CHECK: ret void } -declare void @ccoshl(%0* sret , x86_fp80, x86_fp80) nounwind +declare void @ccoshl(%0* nocapture sret, x86_fp80, x86_fp80) nounwind ; The intermediate alloca and one of the memcpy's should be eliminated, the @@ -202,7 +202,7 @@ define void @test10(%opaque* noalias nocapture sret %x, i32 %y) { ret void } -declare void @f1(%struct.big* sret) +declare void @f1(%struct.big* nocapture sret) declare void @f2(%struct.big*) ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll index 1bbb5fe8651..bfe5e0fbb99 100644 --- a/test/Transforms/MemCpyOpt/sret.ll +++ b/test/Transforms/MemCpyOpt/sret.ll @@ -25,6 +25,6 @@ entry: ret void } -declare void @ccoshl(%0* noalias sret, %0* byval) nounwind +declare void @ccoshl(%0* noalias nocapture sret, %0* byval) nounwind declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -- 2.34.1