when eliding a byval copy due to inlining a readonly function, we have

author Chris Lattner <sabre@nondot.org>

Mon, 20 Dec 2010 08:10:40 +0000 (08:10 +0000)

committer Chris Lattner <sabre@nondot.org>

Mon, 20 Dec 2010 08:10:40 +0000 (08:10 +0000)
author Chris Lattner <sabre@nondot.org>
Mon, 20 Dec 2010 08:10:40 +0000 (08:10 +0000)
committer Chris Lattner <sabre@nondot.org>
Mon, 20 Dec 2010 08:10:40 +0000 (08:10 +0000)
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp

index bca9fc4549af4f2759cd953053c5141869df9fb4..76fdd097c07ac901a8ed7d6ddaf78d0d7b704684 100644 (file)
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -229,17 +229,56 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
    CallerNode->removeCallEdgeFor(CS);
  }
  
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
  static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
                                    const Function *CalledFunc,
                                    InlineFunctionInfo &IFI,
                                    unsigned ByValAlignment) {
-  if (CalledFunc->onlyReadsMemory())
-    return Arg;
+  const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+  // If the called function is readonly, then it could not mutate the caller's
+  // copy of the byval'd memory.  In this case, it is safe to elide the copy and
+  // temporary.
+  if (CalledFunc->onlyReadsMemory()) {
+    // If the byval argument has a specified alignment that is greater than the
+    // passed in pointer, then we either have to round up the input pointer or
+    // give up on this transformation.
+    if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
+      return Arg;
+
+    // See if the argument is a (bitcasted) pointer to an alloca.  If so, we can
+    // round up the alloca if needed.
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts())) {
+      unsigned AIAlign = AI->getAlignment();
+      
+      // If the alloca is known at least aligned as much as the byval, we can do
+      // this optimization.
+      if (AIAlign >= ByValAlignment)
+        return Arg;
+      
+      // If the alloca has a specified alignment that is less than the byval,
+      // then we can safely bump it up.
+      if (AIAlign) {
+        AI->setAlignment(ByValAlignment);
+        return Arg;
+      }
+      
+      // If the alignment has an unspecified alignment, then we can only modify
+      // it if we have TD information.  Doing so without TD info could end up
+      // with us rounding the alignment *down* accidentally, which is badness.
+      if (IFI.TD) {
+        AIAlign = std::max(ByValAlignment, IFI.TD->getPrefTypeAlignment(AggTy));
+        AI->setAlignment(AIAlign);
+        return Arg;
+      }
+    }
+    
+    // Otherwise, we have to make a memcpy to get a safe alignment, pretty lame.
+  }
    
    LLVMContext &Context = Arg->getContext();
  
-  
-  const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
    const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
    
    // Create the alloca.  If we have TargetData, use nice alignment.
diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll

index 08219fcec8d14732d83a70cc9912fc97457af3ed..e601faf2bb3733f1356aca33cf9f4c5c14989dc3 100644 (file)
--- a/test/Transforms/Inline/byval.ll
+++ b/test/Transforms/Inline/byval.ll
@@ -81,3 +81,26 @@ entry:
  ; CHECK: call void @g3(%struct.ss* %S1)
  ; CHECK: ret void
  }
+
+
+; Inlining a byval struct should NOT cause an explicit copy 
+; into an alloca if the function is readonly, but should increase an alloca's
+; alignment to satisfy an explicit alignment request.
+
+define internal i32 @f4(%struct.ss* byval align 64 %b) nounwind readonly {
+        call void @g3(%struct.ss* %b)
+       ret i32 4
+}
+
+define i32 @test4() nounwind  {
+entry:
+       %S = alloca %struct.ss, align 2         ; <%struct.ss*> [#uses=4]
+       %X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind 
+       ret i32 %X
+; CHECK: @test4()
+; CHECK: %S = alloca %struct.ss, align 64
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: call void @g3
+; CHECK: ret i32 4
+}
+
author	Chris Lattner <sabre@nondot.org>
	Mon, 20 Dec 2010 08:10:40 +0000 (08:10 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Mon, 20 Dec 2010 08:10:40 +0000 (08:10 +0000)
lib/Transforms/Utils/InlineFunction.cpp		patch \| blob \| history
test/Transforms/Inline/byval.ll		patch \| blob \| history