Add support to GVN for performing sret return slot optimization. This means that...

author Owen Anderson <resistor@mac.com>

Mon, 18 Feb 2008 09:24:53 +0000 (09:24 +0000)

committer Owen Anderson <resistor@mac.com>

Mon, 18 Feb 2008 09:24:53 +0000 (09:24 +0000)
author Owen Anderson <resistor@mac.com>
Mon, 18 Feb 2008 09:24:53 +0000 (09:24 +0000)
committer Owen Anderson <resistor@mac.com>
Mon, 18 Feb 2008 09:24:53 +0000 (09:24 +0000)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp

index 41a23ed6c7cf2123cc470e41d354b1d309862408..a4f78fe4573c4291a960d2490e887b64039295f8 100644 (file)
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -21,6 +21,7 @@
  #include "llvm/Function.h"
  #include "llvm/IntrinsicInst.h"
  #include "llvm/Instructions.h"
+#include "llvm/ParameterAttributes.h"
  #include "llvm/Value.h"
  #include "llvm/ADT/BitVector.h"
  #include "llvm/ADT/DenseMap.h"
@@ -738,6 +739,8 @@ namespace {
      bool processNonLocalLoad(LoadInst* L,
                               SmallVector<Instruction*, 4>& toErase);
      bool processMemCpy(MemCpyInst* M, SmallVector<Instruction*, 4>& toErase);
+    bool performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
+                                SmallVector<Instruction*, 4>& toErase);
      Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
                              DenseMap<BasicBlock*, Value*> &Phis,
                              bool top_level = false);
@@ -1048,6 +1051,62 @@ bool GVN::processLoad(LoadInst* L,
    return deletedLoad;
  }
  
+/// performReturnSlotOptzn - takes a memcpy and a call that it depends on,
+/// and checks for the possibility of a return slot optimization by having
+/// the call write its result directly into the callees return parameter
+/// rather than using memcpy
+bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
+                                 SmallVector<Instruction*, 4>& toErase) {
+  // Check that we're copying to an argument...
+  Value* cpyDest = cpy->getDest();
+  if (!isa<Argument>(cpyDest))
+    return false;
+  
+  // And that the argument is the return slot
+  Argument* sretArg = cast<Argument>(cpyDest);
+  if (!sretArg->hasStructRetAttr())
+    return false;
+  
+  // Make sure the return slot is otherwise dead
+  std::set<User*> useList(sretArg->use_begin(), sretArg->use_end());
+  while (!useList.empty()) {
+    User* UI = *useList.begin();
+    
+    if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
+      useList.insert(UI->use_begin(), UI->use_end());
+      useList.erase(UI);
+    } else if (UI == cpy)
+      useList.erase(UI);
+    else
+      return false;
+  }
+  
+  // Make sure the call cannot modify the return slot in some unpredicted way
+  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+  if (AA.getModRefInfo(C, cpy->getRawDest(), ~0UL) != AliasAnalysis::NoModRef)
+    return false;
+  
+  // If all checks passed, then we can perform the transformation
+  CallSite CS = CallSite::get(C);
+  for (unsigned i = 0; i < CS.arg_size(); ++i) {
+    if (CS.paramHasAttr(i+1, ParamAttr::StructRet)) {
+      if (CS.getArgument(i)->getType() != cpyDest->getType())
+        return false;
+      
+      CS.setArgument(i, cpyDest);
+      break;
+    }
+  }
+  
+  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+  MD.dropInstruction(C);
+  
+  // Remove the memcpy
+  toErase.push_back(cpy);
+  
+  return true;
+}
+
  /// processMemCpy - perform simplication of memcpy's.  If we have memcpy A which
  /// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
  /// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
@@ -1059,9 +1118,14 @@ bool GVN::processMemCpy(MemCpyInst* M,
    // First, we have to check that the dependency is another memcpy
    Instruction* dep = MD.getDependency(M);
    if  (dep == MemoryDependenceAnalysis::None ||
-       dep == MemoryDependenceAnalysis::NonLocal ||
-       !isa<MemCpyInst>(dep))
+       dep == MemoryDependenceAnalysis::NonLocal)
      return false;
+  else if (!isa<MemCpyInst>(dep)) {
+    if (CallInst* C = dyn_cast<CallInst>(dep))
+      return performReturnSlotOptzn(M, C, toErase);
+    else
+      return false;
+  }
    
    // We can only transforms memcpy's where the dest of one is the source of the
    // other
diff --git a/test/Transforms/GVN/sret.ll b/test/Transforms/GVN/sret.ll

new file mode 100644 (file)

index 0000000..9ae73ef
--- /dev/null
+++ b/test/Transforms/GVN/sret.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep memcpy | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  %z) nounwind  {
+entry:
+       %iz = alloca { x86_fp80, x86_fp80 }             ; <{ x86_fp80, x86_fp80 }*> [#uses=3]
+       %memtmp = alloca { x86_fp80, x86_fp80 }, align 16               ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+       %tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1          ; <x86_fp80*> [#uses=1]
+       %tmp2 = load x86_fp80* %tmp1, align 16          ; <x86_fp80> [#uses=1]
+       %tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2             ; <x86_fp80> [#uses=1]
+       %tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1         ; <x86_fp80*> [#uses=1]
+       %real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0         ; <x86_fp80*> [#uses=1]
+       %tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0          ; <x86_fp80*> [#uses=1]
+       %tmp8 = load x86_fp80* %tmp7, align 16          ; <x86_fp80> [#uses=1]
+       store x86_fp80 %tmp3, x86_fp80* %real, align 16
+       store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
+       call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval  %iz ) nounwind 
+       %memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*              ; <i8*> [#uses=1]
+       %agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*              ; <i8*> [#uses=1]
+       call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
+       ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind 
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
author	Owen Anderson <resistor@mac.com>
	Mon, 18 Feb 2008 09:24:53 +0000 (09:24 +0000)
committer	Owen Anderson <resistor@mac.com>
	Mon, 18 Feb 2008 09:24:53 +0000 (09:24 +0000)
lib/Transforms/Scalar/GVN.cpp		patch \| blob \| history
test/Transforms/GVN/sret.ll	[new file with mode: 0644]	patch \| blob