From: David Majnemer Date: Sun, 3 Nov 2013 12:22:13 +0000 (+0000) Subject: Revert "Inliner: Handle readonly attribute per argument when adding memcpy" X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=16d10987184281aff35c80542a3c02e7dcb7b59b;hp=42864070b016976b87c64830395e96ae0e49279c;p=oota-llvm.git Revert "Inliner: Handle readonly attribute per argument when adding memcpy" This reverts commit r193356, it caused PR17781. A reduced test case covering this regression has been added to the test suite. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193955 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 56429117f77..d021bcef402 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -338,35 +338,33 @@ static void UpdateCallGraphAfterInlining(CallSite CS, /// HandleByValArgument - When inlining a call site that has a byval argument, /// we have to make the implicit memcpy explicit by adding it. -static Value *HandleByValArgument(Value *PassedValue, - const Argument *ArgumentSignature, - Instruction *TheCall, +static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, unsigned ByValAlignment) { - Type *AggTy = cast(PassedValue->getType())->getElementType(); + Type *AggTy = cast(Arg->getType())->getElementType(); // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and // temporary. - if (CalledFunc->onlyReadsMemory() || ArgumentSignature->onlyReadsMemory()) { + if (CalledFunc->onlyReadsMemory()) { // If the byval argument has a specified alignment that is greater than the // passed in pointer, then we either have to round up the input pointer or // give up on this transformation. if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. - return PassedValue; + return Arg; // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. - if (getOrEnforceKnownAlignment(PassedValue, ByValAlignment, + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, IFI.TD) >= ByValAlignment) - return PassedValue; + return Arg; // Otherwise, we have to make a memcpy to get a safe alignment. This is bad // for code quality, but rarely happens and is required for correctness. } - LLVMContext &Context = PassedValue->getContext(); + LLVMContext &Context = Arg->getContext(); Type *VoidPtrTy = Type::getInt8PtrTy(Context); @@ -382,7 +380,7 @@ static Value *HandleByValArgument(Value *PassedValue, Function *Caller = TheCall->getParent()->getParent(); - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, PassedValue->getName(), + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), &*Caller->begin()->begin()); // Emit a memcpy. Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; @@ -390,7 +388,7 @@ static Value *HandleByValArgument(Value *PassedValue, Intrinsic::memcpy, Tys); Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); - Value *SrcCast = new BitCastInst(PassedValue, VoidPtrTy, "tmp", TheCall); + Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); Value *Size; if (IFI.TD == 0) @@ -591,14 +589,13 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; - const Argument *Arg = I; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CS.isByValArgument(ArgNo)) { - ActualArg = HandleByValArgument(ActualArg, Arg, TheCall, CalledFunc, IFI, + ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); // Calls that we inline may use the new alloca, so we need to clear diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll index 3d41dc5a794..d7597adaf84 100644 --- a/test/Transforms/Inline/byval.ll +++ b/test/Transforms/Inline/byval.ll @@ -25,7 +25,7 @@ entry: store i64 2, i64* %tmp4, align 4 call void @f( %struct.ss* byval %S ) nounwind ret i32 0 -; CHECK-LABEL: @test1() +; CHECK: @test1() ; CHECK: %S1 = alloca %struct.ss ; CHECK: %S = alloca %struct.ss ; CHECK: call void @llvm.memcpy @@ -52,7 +52,7 @@ entry: store i64 2, i64* %tmp4, align 4 %X = call i32 @f2( %struct.ss* byval %S ) nounwind ret i32 %X -; CHECK-LABEL: @test2() +; CHECK: @test2() ; CHECK: %S = alloca %struct.ss ; CHECK-NOT: call void @llvm.memcpy ; CHECK: ret i32 @@ -74,7 +74,7 @@ entry: %S = alloca %struct.ss, align 1 ;; May not be aligned. call void @f3( %struct.ss* byval align 64 %S) nounwind ret void -; CHECK-LABEL: @test3() +; CHECK: @test3() ; CHECK: %S1 = alloca %struct.ss, align 64 ; CHECK: %S = alloca %struct.ss ; CHECK: call void @llvm.memcpy @@ -97,35 +97,33 @@ entry: %S = alloca %struct.ss, align 2 ; <%struct.ss*> [#uses=4] %X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind ret i32 %X -; CHECK-LABEL: @test4() +; CHECK: @test4() ; CHECK: %S = alloca %struct.ss, align 64 ; CHECK-NOT: call void @llvm.memcpy ; CHECK: call void @g3 ; CHECK: ret i32 4 } -; Inlining a byval struct should NOT cause an explicit copy -; into an alloca if the parameter is readonly +%struct.S0 = type { i32 } -define internal i32 @f5(%struct.ss* byval readonly %b) nounwind { +@b = global %struct.S0 { i32 1 }, align 4 +@a = common global i32 0, align 4 + +define internal void @f5(%struct.S0* byval nocapture readonly align 4 %p) { entry: - %tmp = getelementptr %struct.ss* %b, i32 0, i32 0 ; [#uses=2] - %tmp1 = load i32* %tmp, align 4 ; [#uses=1] - %tmp2 = add i32 %tmp1, 1 ; [#uses=1] - ret i32 %tmp2 + store i32 0, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4 + %f2 = getelementptr inbounds %struct.S0* %p, i64 0, i32 0 + %0 = load i32* %f2, align 4 + store i32 %0, i32* @a, align 4 + ret void } -define i32 @test5() nounwind { +define i32 @test5() { entry: - %S = alloca %struct.ss ; <%struct.ss*> [#uses=4] - %tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0 ; [#uses=1] - store i32 1, i32* %tmp1, align 8 - %tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1 ; [#uses=1] - store i64 2, i64* %tmp4, align 4 - %X = call i32 @f5( %struct.ss* byval %S ) nounwind - ret i32 %X -; CHECK-LABEL: @test5() -; CHECK: %S = alloca %struct.ss -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret i32 + tail call void @f5(%struct.S0* byval align 4 @b) + %0 = load i32* @a, align 4 + ret i32 %0 +; CHECK: @test5() +; CHECK: store i32 0, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4 +; CHECK-NOT: load i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4 }