From 5d6cee5e65d60ad73d067254a7773368e32619f4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 31 Oct 2014 20:52:02 +0000 Subject: [PATCH] R600: Make sure to inline all internal functions Function calls aren't supported yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220996 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 1 + lib/Target/R600/AMDGPUAlwaysInlinePass.cpp | 66 ++++++++++++++++++++++ lib/Target/R600/AMDGPUTargetMachine.cpp | 14 +++++ test/CodeGen/R600/call.ll | 2 +- test/CodeGen/R600/inline-calls.ll | 24 ++++++++ 5 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 lib/Target/R600/AMDGPUAlwaysInlinePass.cpp create mode 100644 test/CodeGen/R600/inline-calls.ll diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 5bc1276ffec..261075e1e95 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -57,6 +57,7 @@ extern char &SILoadStoreOptimizerID; FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST); Pass *createAMDGPUStructurizeCFGPass(); FunctionPass *createAMDGPUISelDag(TargetMachine &tm); +ModulePass *createAMDGPUAlwaysInlinePass(); /// \brief Creates an AMDGPU-specific Target Transformation Info pass. ImmutablePass * diff --git a/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp new file mode 100644 index 00000000000..b545b456161 --- /dev/null +++ b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp @@ -0,0 +1,66 @@ +//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass marks all internal functions as always_inline and creates +/// duplicates of all other functions a marks the duplicates as always_inline. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +namespace { + +class AMDGPUAlwaysInline : public ModulePass { + + static char ID; + +public: + AMDGPUAlwaysInline() : ModulePass(ID) { } + bool runOnModule(Module &M) override; + const char *getPassName() const override { return "AMDGPU Always Inline Pass"; } +}; + +} // End anonymous namespace + +char AMDGPUAlwaysInline::ID = 0; + +bool AMDGPUAlwaysInline::runOnModule(Module &M) { + + std::vector FuncsToClone; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function &F = *I; + if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty()) + FuncsToClone.push_back(&F); + } + + for (Function *F : FuncsToClone) { + ValueToValueMapTy VMap; + Function *NewFunc = CloneFunction(F, VMap, false); + NewFunc->setLinkage(GlobalValue::InternalLinkage); + F->getParent()->getFunctionList().push_back(NewFunc); + F->replaceAllUsesWith(NewFunc); + } + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function &F = *I; + if (F.hasLocalLinkage()) { + F.addFnAttr(Attribute::AlwaysInline); + } + } + return false; +} + +ModulePass *llvm::createAMDGPUAlwaysInlinePass() { + return new AMDGPUAlwaysInline(); +} diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 1b4fe832f20..ca4d0cae9b2 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -80,6 +80,7 @@ public: return nullptr; } + void addIRPasses() override; void addCodeGenPrepare() override; bool addPreISel() override; bool addInstSelector() override; @@ -106,6 +107,19 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) { PM.add(createAMDGPUTargetTransformInfoPass(this)); } +void AMDGPUPassConfig::addIRPasses() { + // Function calls are not supported, so make sure we inline everything. + addPass(createAMDGPUAlwaysInlinePass()); + addPass(createAlwaysInlinerPass()); + // We need to add the barrier noop pass, otherwise adding the function + // inlining pass will cause all of the PassConfigs passes to be run + // one function at a time, which means if we have a nodule with two + // functions, then we will generate code for the first function + // without ever running any passes on the second. + addPass(createBarrierNoopPass()); + TargetPassConfig::addIRPasses(); +} + void AMDGPUPassConfig::addCodeGenPrepare() { const AMDGPUSubtarget &ST = TM->getSubtarget(); if (ST.isPromoteAllocaEnabled()) { diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll index d80347490b3..1448f04ab91 100644 --- a/test/CodeGen/R600/call.ll +++ b/test/CodeGen/R600/call.ll @@ -1,7 +1,7 @@ ; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s ; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s -; CHECK: error: unsupported call to function defined_function in test_call +; CHECK: error: unsupported call to function external_function in test_call_external declare i32 @external_function(i32) nounwind diff --git a/test/CodeGen/R600/inline-calls.ll b/test/CodeGen/R600/inline-calls.ll new file mode 100644 index 00000000000..3bceeca34b4 --- /dev/null +++ b/test/CodeGen/R600/inline-calls.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s + +; CHECK-NOT: {{^}}func: +define internal fastcc i32 @func(i32 %a) { +entry: + %tmp0 = add i32 %a, 1 + ret i32 %tmp0 +} + +; CHECK: {{^}}kernel: +define void @kernel(i32 addrspace(1)* %out) { +entry: + %tmp0 = call i32 @func(i32 1) + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; CHECK: {{^}}kernel2: +define void @kernel2(i32 addrspace(1)* %out) { +entry: + call void @kernel(i32 addrspace(1)* %out) + ret void +} -- 2.34.1