From 5a7aeaa01904b9b0adf256108f302f8961295754 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Thu, 18 Nov 2010 08:00:57 +0000 Subject: [PATCH] remove a pointless restriction from memcpyopt. It was refusing to optimize two memcpy's like this: copy A <- B copy C <- A if it couldn't prove that noalias(B,C). We can eliminate the copy by producing a memmove instead of memcpy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119694 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 11 ++++-- .../MemCpyOpt/2008-04-29-SRetRemoval.ll | 17 --------- test/Transforms/MemCpyOpt/memcpy.ll | 37 ++++++++++++++++++- 3 files changed, 42 insertions(+), 23 deletions(-) delete mode 100644 test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index ea29fca346a..9c16ae417c4 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -688,11 +688,14 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, if (DepSize < MSize) return false; - // Finally, we have to make sure that the dest of the second does not - // alias the source of the first. + Intrinsic::ID ResultFn = Intrinsic::memcpy; + + // If the dest of the second might alias the source of the first, then the + // source and dest might overlap. We still want to eliminate the intermediate + // value, but we have to generate a memmove instead of memcpy. AliasAnalysis &AA = getAnalysis(); if (!AA.isNoAlias(M->getRawDest(), MSize, MDep->getRawSource(), DepSize)) - return false; + ResultFn = Intrinsic::memmove; // If all checks passed, then we can transform these memcpy's const Type *ArgTys[3] = { @@ -702,7 +705,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, }; Function *MemCpyFun = Intrinsic::getDeclaration(M->getParent()->getParent()->getParent(), - M->getIntrinsicID(), ArgTys, 3); + ResultFn, ArgTys, 3); // Make sure to use the lesser of the alignment of the source and the dest // since we're changing where we're reading from, but don't want to increase diff --git a/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll b/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll deleted file mode 100644 index 4fec169fd06..00000000000 --- a/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: opt < %s -memcpyopt -S | grep {call.*memcpy.*agg.result} - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin8" -@x = external global { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1] - -define void @foo({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind { -entry: - %x.0 = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1] - %x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8* ; [#uses=2] - call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 ) - %agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; [#uses=1] - call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 ) - ret void -} - -declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll index 7d69287280b..303c2fcceef 100644 --- a/test/Transforms/MemCpyOpt/memcpy.ll +++ b/test/Transforms/MemCpyOpt/memcpy.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep {call.*memcpy} | count 1 +; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" @@ -20,7 +20,7 @@ entry: ; CHECK: @test1 ; CHECK: call void @ccoshl -; CHECK: call @llvm.memcpy +; CHECK: call void @llvm.memcpy ; CHECK-NOT: llvm.memcpy ; CHECK: ret void ret void @@ -29,3 +29,36 @@ entry: declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind + + +; The intermediate alloca and one of the memcpy's should be eliminated, the +; other should be related with a memmove. +define void @test2(i8* %P, i8* %Q) nounwind { + %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 + %R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* + call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 ) + call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 ) + ret void + +; CHECK: @test2 +; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P +; CHECK-NEXT: ret void +} + + + + +@x = external global { x86_fp80, x86_fp80 } + +define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind { + %x.0 = alloca { x86_fp80, x86_fp80 } + %x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8* + call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 ) + %agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* + call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 ) + ret void +; CHECK: @test3 +; CHECK-NEXT: %agg.result2 = bitcast +; CHECK-NEXT: call void @llvm.memcpy +; CHECK-NEXT: ret void +} -- 2.34.1