Inliner: Handle readonly attribute per argument when adding memcpy

author Tom Stellard <thomas.stellard@amd.com>

Thu, 24 Oct 2013 16:38:33 +0000 (16:38 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Thu, 24 Oct 2013 16:38:33 +0000 (16:38 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Thu, 24 Oct 2013 16:38:33 +0000 (16:38 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Thu, 24 Oct 2013 16:38:33 +0000 (16:38 +0000)
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp

index dabb67b921980a9db7a80de45801d41565f3628b..585658a881169643ee711f464978ba79efa4d926 100644 (file)
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -337,33 +337,35 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
  
  /// HandleByValArgument - When inlining a call site that has a byval argument,
  /// we have to make the implicit memcpy explicit by adding it.
-static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+static Value *HandleByValArgument(Value *PassedValue,
+                                  const Argument *ArgumentSignature,
+                                  Instruction *TheCall,
                                    const Function *CalledFunc,
                                    InlineFunctionInfo &IFI,
                                    unsigned ByValAlignment) {
-  Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+  Type *AggTy = cast<PointerType>(PassedValue->getType())->getElementType();
  
    // If the called function is readonly, then it could not mutate the caller's
    // copy of the byval'd memory.  In this case, it is safe to elide the copy and
    // temporary.
-  if (CalledFunc->onlyReadsMemory()) {
+  if (CalledFunc->onlyReadsMemory() || ArgumentSignature->onlyReadsMemory()) {
      // If the byval argument has a specified alignment that is greater than the
      // passed in pointer, then we either have to round up the input pointer or
      // give up on this transformation.
      if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
-      return Arg;
+      return PassedValue;
  
      // If the pointer is already known to be sufficiently aligned, or if we can
      // round it up to a larger alignment, then we don't need a temporary.
-    if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+    if (getOrEnforceKnownAlignment(PassedValue, ByValAlignment,
                                     IFI.TD) >= ByValAlignment)
-      return Arg;
+      return PassedValue;
      
      // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
      // for code quality, but rarely happens and is required for correctness.
    }
    
-  LLVMContext &Context = Arg->getContext();
+  LLVMContext &Context = PassedValue->getContext();
  
    Type *VoidPtrTy = Type::getInt8PtrTy(Context);
    
@@ -379,7 +381,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
    
    Function *Caller = TheCall->getParent()->getParent(); 
    
-  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
+  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, PassedValue->getName(),
                                      &*Caller->begin()->begin());
    // Emit a memcpy.
    Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
@@ -387,7 +389,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
                                                   Intrinsic::memcpy, 
                                                   Tys);
    Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
-  Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+  Value *SrcCast = new BitCastInst(PassedValue, VoidPtrTy, "tmp", TheCall);
    
    Value *Size;
    if (IFI.TD == 0)
@@ -588,13 +590,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
      for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
           E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
        Value *ActualArg = *AI;
+      const Argument *Arg = I;
  
        // When byval arguments actually inlined, we need to make the copy implied
        // by them explicit.  However, we don't do this if the callee is readonly
        // or readnone, because the copy would be unneeded: the callee doesn't
        // modify the struct.
        if (CS.isByValArgument(ArgNo)) {
-        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+        ActualArg = HandleByValArgument(ActualArg, Arg, TheCall, CalledFunc, IFI,
                                          CalledFunc->getParamAlignment(ArgNo+1));
   
          // Calls that we inline may use the new alloca, so we need to clear
diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll

index e601faf2bb3733f1356aca33cf9f4c5c14989dc3..3d41dc5a794d84b8baba4fa05cdcad0a8dcfa17c 100644 (file)
--- a/test/Transforms/Inline/byval.ll
+++ b/test/Transforms/Inline/byval.ll
@@ -25,7 +25,7 @@ entry:
         store i64 2, i64* %tmp4, align 4
         call void @f( %struct.ss* byval  %S ) nounwind 
         ret i32 0
-; CHECK: @test1()
+; CHECK-LABEL: @test1()
  ; CHECK: %S1 = alloca %struct.ss
  ; CHECK: %S = alloca %struct.ss
  ; CHECK: call void @llvm.memcpy
@@ -52,7 +52,7 @@ entry:
         store i64 2, i64* %tmp4, align 4
         %X = call i32 @f2( %struct.ss* byval  %S ) nounwind 
         ret i32 %X
-; CHECK: @test2()
+; CHECK-LABEL: @test2()
  ; CHECK: %S = alloca %struct.ss
  ; CHECK-NOT: call void @llvm.memcpy
  ; CHECK: ret i32
@@ -74,7 +74,7 @@ entry:
         %S = alloca %struct.ss, align 1  ;; May not be aligned.
         call void @f3( %struct.ss* byval align 64 %S) nounwind 
         ret void
-; CHECK: @test3()
+; CHECK-LABEL: @test3()
  ; CHECK: %S1 = alloca %struct.ss, align 64
  ; CHECK: %S = alloca %struct.ss
  ; CHECK: call void @llvm.memcpy
@@ -97,10 +97,35 @@ entry:
         %S = alloca %struct.ss, align 2         ; <%struct.ss*> [#uses=4]
         %X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind 
         ret i32 %X
-; CHECK: @test4()
+; CHECK-LABEL: @test4()
  ; CHECK: %S = alloca %struct.ss, align 64
  ; CHECK-NOT: call void @llvm.memcpy
  ; CHECK: call void @g3
  ; CHECK: ret i32 4
  }
  
+; Inlining a byval struct should NOT cause an explicit copy
+; into an alloca if the parameter is readonly
+
+define internal i32 @f5(%struct.ss* byval readonly %b) nounwind {
+entry:
+       %tmp = getelementptr %struct.ss* %b, i32 0, i32 0               ; <i32*> [#uses=2]
+       %tmp1 = load i32* %tmp, align 4         ; <i32> [#uses=1]
+       %tmp2 = add i32 %tmp1, 1                ; <i32> [#uses=1]
+       ret i32 %tmp2
+}
+
+define i32 @test5() nounwind  {
+entry:
+       %S = alloca %struct.ss          ; <%struct.ss*> [#uses=4]
+       %tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0              ; <i32*> [#uses=1]
+       store i32 1, i32* %tmp1, align 8
+       %tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1              ; <i64*> [#uses=1]
+       store i64 2, i64* %tmp4, align 4
+       %X = call i32 @f5( %struct.ss* byval  %S ) nounwind
+       ret i32 %X
+; CHECK-LABEL: @test5()
+; CHECK: %S = alloca %struct.ss
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret i32
+}
author	Tom Stellard <thomas.stellard@amd.com>
	Thu, 24 Oct 2013 16:38:33 +0000 (16:38 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Thu, 24 Oct 2013 16:38:33 +0000 (16:38 +0000)
lib/Transforms/Utils/InlineFunction.cpp		patch \| blob \| history
test/Transforms/Inline/byval.ll		patch \| blob \| history