From: Benjamin Kramer Date: Fri, 24 Dec 2010 21:17:12 +0000 (+0000) Subject: MemCpyOpt: Turn memcpys from a constant into a memset if possible. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=a112087e4298ca8ec1bc8aef8a2b272e49faa7ac;p=oota-llvm.git MemCpyOpt: Turn memcpys from a constant into a memset if possible. This allows us to compile "int cst[] = {-1, -1, -1};" into movl $-1, 16(%rsp) movq $-1, 8(%rsp) instead of movl _cst+8(%rip), %eax movl %eax, 16(%rsp) movq _cst(%rip), %rax movq %rax, 8(%rsp) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122548 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 6dcb49f9358..c6381205c89 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "memcpyopt" #include "llvm/Transforms/Scalar.h" +#include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" #include "llvm/ADT/SmallVector.h" @@ -31,6 +32,7 @@ using namespace llvm; STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); +STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); /// isBytewiseValue - If the specified value can be set by repeating the same /// byte in memory, return the i8 value that it is represented with. This is @@ -38,6 +40,13 @@ STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); /// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated /// byte store (e.g. i16 0x1234), return null. static Value *isBytewiseValue(Value *V) { + // Look through constant globals. + if (GlobalVariable *GV = dyn_cast(V)) { + if (GV->mayBeOverridden() || !GV->isConstant() || !GV->hasInitializer()) + return 0; + V = GV->getInitializer(); + } + // All byte-wide stores are splatable, even of arbitrary variables. if (V->getType()->isIntegerTy(8)) return V; @@ -73,7 +82,24 @@ static Value *isBytewiseValue(Value *V) { return ConstantInt::get(V->getContext(), Val); } } - + + // A ConstantArray is splatable if all its members are equal and also + // splatable. + if (ConstantArray *CA = dyn_cast(V)) { + if (CA->getNumOperands() == 0) + return 0; + + Value *Val = isBytewiseValue(CA->getOperand(0)); + if (!Val) + return 0; + + for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I) + if (CA->getOperand(I-1) != CA->getOperand(I)) + return 0; + + return Val; + } + // Conceptually, we could handle things like: // %a = zext i8 %X to i16 // %b = shl i16 %a, 8 @@ -765,8 +791,24 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { M->eraseFromParent(); return false; } - - + + // If copying from a constant, try to turn the memcpy into a memset. + if (Value *ByteVal = isBytewiseValue(M->getSource())) { + Value *Ops[] = { + M->getRawDest(), ByteVal, // Start, value + CopySize, // Size + M->getAlignmentCst(), // Alignment + ConstantInt::getFalse(M->getContext()), // volatile + }; + const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; + Module *Mod = M->getParent()->getParent()->getParent(); + Function *MemSetF = Intrinsic::getDeclaration(Mod, Intrinsic::memset, Tys, 2); + CallInst::Create(MemSetF, Ops, Ops+5, "", M); + M->eraseFromParent(); + ++NumCpyToSet; + return true; + } + // The are two possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. // b) call-memcpy xform for return slot optimization. diff --git a/test/Transforms/MemCpyOpt/memcpy-to-memset.ll b/test/Transforms/MemCpyOpt/memcpy-to-memset.ll new file mode 100644 index 00000000000..b18d176f003 --- /dev/null +++ b/test/Transforms/MemCpyOpt/memcpy-to-memset.ll @@ -0,0 +1,19 @@ +; RUN: opt -memcpyopt -S < %s | FileCheck %s + +@cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4 + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @foo(i32*) nounwind + +define void @test1() nounwind { + %arr = alloca [3 x i32], align 4 + %arr_i8 = bitcast [3 x i32]* %arr to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arr_i8, i8* bitcast ([3 x i32]* @cst to i8*), i64 12, i32 4, i1 false) + %arraydecay = getelementptr inbounds [3 x i32]* %arr, i64 0, i64 0 + call void @foo(i32* %arraydecay) nounwind + ret void +; CHECK: @test1 +; CHECK: call void @llvm.memset +; CHECK-NOT: call void @llvm.memcpy +; CHECK: ret void +}