R600: Make sure to inline all internal functions

author Tom Stellard <thomas.stellard@amd.com>

Fri, 31 Oct 2014 20:52:02 +0000 (20:52 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Fri, 31 Oct 2014 20:52:02 +0000 (20:52 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Fri, 31 Oct 2014 20:52:02 +0000 (20:52 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Fri, 31 Oct 2014 20:52:02 +0000 (20:52 +0000)
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h

index 5bc1276ffecc1d7046657d71483a9ddea261f77e..261075e1e95c55ac97c999435db823224d5cd09a 100644 (file)
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -57,6 +57,7 @@ extern char &SILoadStoreOptimizerID;
  FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
  Pass *createAMDGPUStructurizeCFGPass();
  FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
+ModulePass *createAMDGPUAlwaysInlinePass();
  
  /// \brief Creates an AMDGPU-specific Target Transformation Info pass.
  ImmutablePass *
diff --git a/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp

new file mode 100644 (file)

index 0000000..b545b45
--- /dev/null
+++ b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
@@ -0,0 +1,66 @@
+//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass marks all internal functions as always_inline and creates
+/// duplicates of all other functions a marks the duplicates as always_inline.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAlwaysInline : public ModulePass {
+
+  static char ID;
+
+public:
+  AMDGPUAlwaysInline() : ModulePass(ID) { }
+  bool runOnModule(Module &M) override;
+  const char *getPassName() const override { return "AMDGPU Always Inline Pass"; }
+};
+
+} // End anonymous namespace
+
+char AMDGPUAlwaysInline::ID = 0;
+
+bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+
+  std::vector<Function*> FuncsToClone;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function &F = *I;
+    if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty())
+      FuncsToClone.push_back(&F);
+  }
+
+  for (Function *F : FuncsToClone) {
+    ValueToValueMapTy VMap;
+    Function *NewFunc = CloneFunction(F, VMap, false);
+    NewFunc->setLinkage(GlobalValue::InternalLinkage);
+    F->getParent()->getFunctionList().push_back(NewFunc);
+    F->replaceAllUsesWith(NewFunc);
+  }
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function &F = *I;
+    if (F.hasLocalLinkage()) {
+      F.addFnAttr(Attribute::AlwaysInline);
+    }
+  }
+  return false;
+}
+
+ModulePass *llvm::createAMDGPUAlwaysInlinePass() {
+  return new AMDGPUAlwaysInline();
+}
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp

index 1b4fe832f20b80d1867a02160a8398233db0b366..ca4d0cae9b25cd70498bb59d47476e5314e8fb14 100644 (file)
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -80,6 +80,7 @@ public:
      return nullptr;
    }
  
+  void addIRPasses() override;
    void addCodeGenPrepare() override;
    bool addPreISel() override;
    bool addInstSelector() override;
@@ -106,6 +107,19 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
    PM.add(createAMDGPUTargetTransformInfoPass(this));
  }
  
+void AMDGPUPassConfig::addIRPasses() {
+  // Function calls are not supported, so make sure we inline everything.
+  addPass(createAMDGPUAlwaysInlinePass());
+  addPass(createAlwaysInlinerPass());
+  // We need to add the barrier noop pass, otherwise adding the function
+  // inlining pass will cause all of the PassConfigs passes to be run
+  // one function at a time, which means if we have a nodule with two
+  // functions, then we will generate code for the first function
+  // without ever running any passes on the second.
+  addPass(createBarrierNoopPass());
+  TargetPassConfig::addIRPasses();
+}
+
  void AMDGPUPassConfig::addCodeGenPrepare() {
    const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
    if (ST.isPromoteAllocaEnabled()) {
diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll

index d80347490b3981b52928c3eddbc9cf5296c4f6e8..1448f04ab91f0723a51667968f6e1709d27fe748 100644 (file)
--- a/test/CodeGen/R600/call.ll
+++ b/test/CodeGen/R600/call.ll
@@ -1,7 +1,7 @@
  ; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
  ; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
  
-; CHECK: error: unsupported call to function defined_function in test_call
+; CHECK: error: unsupported call to function external_function in test_call_external
  
  
  declare i32 @external_function(i32) nounwind
diff --git a/test/CodeGen/R600/inline-calls.ll b/test/CodeGen/R600/inline-calls.ll

new file mode 100644 (file)

index 0000000..3bceeca
--- /dev/null
+++ b/test/CodeGen/R600/inline-calls.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck  %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-NOT: {{^}}func:
+define internal fastcc i32 @func(i32 %a) {
+entry:
+  %tmp0 = add i32 %a, 1
+  ret i32 %tmp0
+}
+
+; CHECK: {{^}}kernel:
+define void @kernel(i32 addrspace(1)* %out) {
+entry:
+  %tmp0 = call i32 @func(i32 1)
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: {{^}}kernel2:
+define void @kernel2(i32 addrspace(1)* %out) {
+entry:
+  call void @kernel(i32 addrspace(1)* %out)
+  ret void
+}
author	Tom Stellard <thomas.stellard@amd.com>
	Fri, 31 Oct 2014 20:52:02 +0000 (20:52 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Fri, 31 Oct 2014 20:52:02 +0000 (20:52 +0000)
lib/Target/R600/AMDGPU.h		patch \| blob \| history
lib/Target/R600/AMDGPUAlwaysInlinePass.cpp	[new file with mode: 0644]	patch \| blob
lib/Target/R600/AMDGPUTargetMachine.cpp		patch \| blob \| history
test/CodeGen/R600/call.ll		patch \| blob \| history
test/CodeGen/R600/inline-calls.ll	[new file with mode: 0644]	patch \| blob