From c1e784cb78d115ed9dcfcfc90900c5f698c12370 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 21 Aug 2015 02:48:20 +0000 Subject: [PATCH] TransformUtils: Introduce module splitter. The module splitter splits a module into linkable partitions. It will be used to implement parallel LTO code generation. This initial version of the splitter does not attempt to deal with the somewhat subtle symbol visibility issues around module splitting. These will be dealt with in a future change. Differential Revision: http://reviews.llvm.org/D12132 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245662 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Utils/Cloning.h | 9 +++ include/llvm/Transforms/Utils/SplitModule.h | 43 +++++++++++ lib/Transforms/Utils/CMakeLists.txt | 1 + lib/Transforms/Utils/CloneModule.cpp | 39 ++++++++++ lib/Transforms/Utils/SplitModule.cpp | 85 +++++++++++++++++++++ test/CMakeLists.txt | 1 + test/lit.cfg | 1 + test/tools/llvm-split/alias.ll | 19 +++++ test/tools/llvm-split/comdat.ll | 19 +++++ test/tools/llvm-split/function.ll | 17 +++++ test/tools/llvm-split/global.ll | 11 +++ test/tools/llvm-split/internal.ll | 17 +++++ test/tools/llvm-split/unnamed.ll | 31 ++++++++ tools/LLVMBuild.txt | 1 + tools/Makefile | 3 +- tools/llvm-split/CMakeLists.txt | 11 +++ tools/llvm-split/LLVMBuild.txt | 22 ++++++ tools/llvm-split/Makefile | 17 +++++ tools/llvm-split/llvm-split.cpp | 67 ++++++++++++++++ 19 files changed, 413 insertions(+), 1 deletion(-) create mode 100644 include/llvm/Transforms/Utils/SplitModule.h create mode 100644 lib/Transforms/Utils/SplitModule.cpp create mode 100644 test/tools/llvm-split/alias.ll create mode 100644 test/tools/llvm-split/comdat.ll create mode 100644 test/tools/llvm-split/function.ll create mode 100644 test/tools/llvm-split/global.ll create mode 100644 test/tools/llvm-split/internal.ll create mode 100644 test/tools/llvm-split/unnamed.ll create mode 100644 tools/llvm-split/CMakeLists.txt create mode 100644 tools/llvm-split/LLVMBuild.txt create mode 100644 tools/llvm-split/Makefile create mode 100644 tools/llvm-split/llvm-split.cpp diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h index 2caa9a2462d..7dcae2d5885 100644 --- a/include/llvm/Transforms/Utils/Cloning.h +++ b/include/llvm/Transforms/Utils/Cloning.h @@ -23,6 +23,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include namespace llvm { @@ -52,6 +53,14 @@ class DominatorTree; Module *CloneModule(const Module *M); Module *CloneModule(const Module *M, ValueToValueMapTy &VMap); +/// Return a copy of the specified module. The ShouldCloneDefinition function +/// controls whether a specific GlobalValue's definition is cloned. If the +/// function returns false, the module copy will contain an external reference +/// in place of the global definition. +Module * +CloneModule(const Module *M, ValueToValueMapTy &VMap, + std::function ShouldCloneDefinition); + /// ClonedCodeInfo - This struct can be used to capture information about code /// being cloned, while it is being cloned. struct ClonedCodeInfo { diff --git a/include/llvm/Transforms/Utils/SplitModule.h b/include/llvm/Transforms/Utils/SplitModule.h new file mode 100644 index 00000000000..7d896d1993d --- /dev/null +++ b/include/llvm/Transforms/Utils/SplitModule.h @@ -0,0 +1,43 @@ +//===- SplitModule.h - Split a module into partitions -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the function llvm::SplitModule, which splits a module +// into multiple linkable partitions. It can be used to implement parallel code +// generation for link-time optimization. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULE_H +#define LLVM_TRANSFORMS_UTILS_SPLITMODULE_H + +#include +#include + +namespace llvm { + +class Module; +class StringRef; + +/// Splits the module M into N linkable partitions. The function ModuleCallback +/// is called N times passing each individual partition as the MPart argument. +/// +/// FIXME: This function does not deal with the somewhat subtle symbol +/// visibility issues around module splitting, including (but not limited to): +/// +/// - Internal symbols should not collide with symbols defined outside the +/// module. +/// - Internal symbols defined in module-level inline asm should be visible to +/// each partition. +void SplitModule( + std::unique_ptr M, unsigned N, + std::function MPart)> ModuleCallback); + +} // End llvm namespace + +#endif diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 716e655affb..8308a9b6914 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -34,6 +34,7 @@ add_llvm_library(LLVMTransformUtils SimplifyIndVar.cpp SimplifyInstructions.cpp SimplifyLibCalls.cpp + SplitModule.cpp SymbolRewriter.cpp UnifyFunctionExitNodes.cpp Utils.cpp diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 61f1811e7b4..5e7cae5364a 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -33,6 +33,12 @@ Module *llvm::CloneModule(const Module *M) { } Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { + return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; }); +} + +Module *llvm::CloneModule( + const Module *M, ValueToValueMapTy &VMap, + std::function ShouldCloneDefinition) { // First off, we need to create the new module. Module *New = new Module(M->getModuleIdentifier(), M->getContext()); New->setDataLayout(M->getDataLayout()); @@ -68,6 +74,26 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { + if (!ShouldCloneDefinition(I)) { + // An alias cannot act as an external reference, so we need to create + // either a function or a global variable depending on the value type. + // FIXME: Once pointee types are gone we can probably pick one or the + // other. + GlobalValue *GV; + if (I->getValueType()->isFunctionTy()) + GV = Function::Create(cast(I->getValueType()), + GlobalValue::ExternalLinkage, I->getName(), New); + else + GV = new GlobalVariable( + *New, I->getValueType(), false, GlobalValue::ExternalLinkage, + (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr, + I->getThreadLocalMode(), I->getType()->getAddressSpace()); + VMap[I] = GV; + // We do not copy attributes (mainly because copying between different + // kinds of globals is forbidden), but this is generally not required for + // correctness. + continue; + } auto *PTy = cast(I->getType()); auto *GA = GlobalAlias::create(PTy, I->getLinkage(), I->getName(), New); GA->copyAttributesFrom(I); @@ -81,6 +107,11 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { GlobalVariable *GV = cast(VMap[I]); + if (!ShouldCloneDefinition(I)) { + // Skip after setting the correct linkage for an external reference. + GV->setLinkage(GlobalValue::ExternalLinkage); + continue; + } if (I->hasInitializer()) GV->setInitializer(MapValue(I->getInitializer(), VMap)); } @@ -89,6 +120,11 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { Function *F = cast(VMap[I]); + if (!ShouldCloneDefinition(I)) { + // Skip after setting the correct linkage for an external reference. + F->setLinkage(GlobalValue::ExternalLinkage); + continue; + } if (!I->isDeclaration()) { Function::arg_iterator DestI = F->arg_begin(); for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); @@ -109,6 +145,9 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // And aliases for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { + // We already dealt with undefined aliases above. + if (!ShouldCloneDefinition(I)) + continue; GlobalAlias *GA = cast(VMap[I]); if (const Constant *C = I->getAliasee()) GA->setAliasee(MapValue(C, VMap)); diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp new file mode 100644 index 00000000000..ad6b782caf8 --- /dev/null +++ b/lib/Transforms/Utils/SplitModule.cpp @@ -0,0 +1,85 @@ +//===- SplitModule.cpp - Split a module into partitions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the function llvm::SplitModule, which splits a module +// into multiple linkable partitions. It can be used to implement parallel code +// generation for link-time optimization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SplitModule.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +static void externalize(GlobalValue *GV) { + if (GV->hasLocalLinkage()) { + GV->setLinkage(GlobalValue::ExternalLinkage); + GV->setVisibility(GlobalValue::HiddenVisibility); + } + + // Unnamed entities must be named consistently between modules. setName will + // give a distinct name to each such entity. + if (!GV->hasName()) + GV->setName("__llvmsplit_unnamed"); +} + +// Returns whether GV should be in partition (0-based) I of N. +static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { + if (auto GA = dyn_cast(GV)) + if (const GlobalObject *Base = GA->getBaseObject()) + GV = Base; + + StringRef Name; + if (const Comdat *C = GV->getComdat()) + Name = C->getName(); + else + Name = GV->getName(); + + // Partition by MD5 hash. We only need a few bits for evenness as the number + // of partitions will generally be in the 1-2 figure range; the low 16 bits + // are enough. + MD5 H; + MD5::MD5Result R; + H.update(Name); + H.final(R); + return (R[0] | (R[1] << 8)) % N == I; +} + +void llvm::SplitModule( + std::unique_ptr M, unsigned N, + std::function MPart)> ModuleCallback) { + for (Function &F : *M) + externalize(&F); + for (GlobalVariable &GV : M->globals()) + externalize(&GV); + for (GlobalAlias &GA : M->aliases()) + externalize(&GA); + + // FIXME: We should be able to reuse M as the last partition instead of + // cloning it. + for (unsigned I = 0; I != N; ++I) { + ValueToValueMapTy VMap; + std::unique_ptr MPart( + CloneModule(M.get(), VMap, [=](const GlobalValue *GV) { + return isInPartition(GV, I, N); + })); + if (I != 0) + MPart->setModuleInlineAsm(""); + ModuleCallback(std::move(MPart)); + } +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f49df542f4e..1f417c2a0fc 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -46,6 +46,7 @@ set(LLVM_TEST_DEPENDS llvm-readobj llvm-rtdyld llvm-size + llvm-split llvm-symbolizer llvm-tblgen macho-dump diff --git a/test/lit.cfg b/test/lit.cfg index 627c8a19801..167a81afad1 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -248,6 +248,7 @@ for pattern in [r"\bbugpoint\b(?!-)", r"\bllvm-readobj\b", r"\bllvm-rtdyld\b", r"\bllvm-size\b", + r"\bllvm-split\b", r"\bllvm-tblgen\b", r"\bllvm-c-test\b", r"\bmacho-dump\b", diff --git a/test/tools/llvm-split/alias.ll b/test/tools/llvm-split/alias.ll new file mode 100644 index 00000000000..3294b8d9aff --- /dev/null +++ b/test/tools/llvm-split/alias.ll @@ -0,0 +1,19 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0-DAG: @afoo = alias [2 x i8*]* @foo +; CHECK1-DAG: @afoo = external global [2 x i8*] +@afoo = alias [2 x i8*]* @foo + +; CHECK0-DAG: declare void @abar() +; CHECK1-DAG: @abar = alias void ()* @bar +@abar = alias void ()* @bar + +@foo = global [2 x i8*] [i8* bitcast (void ()* @bar to i8*), i8* bitcast (void ()* @abar to i8*)] + +define void @bar() { + store [2 x i8*] zeroinitializer, [2 x i8*]* @foo + store [2 x i8*] zeroinitializer, [2 x i8*]* @afoo + ret void +} diff --git a/test/tools/llvm-split/comdat.ll b/test/tools/llvm-split/comdat.ll new file mode 100644 index 00000000000..45faf4bfe26 --- /dev/null +++ b/test/tools/llvm-split/comdat.ll @@ -0,0 +1,19 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +$foo = comdat any + +; CHECK0: define void @foo() +; CHECK1: declare void @foo() +define void @foo() comdat { + call void @bar() + ret void +} + +; CHECK0: define void @bar() +; CHECK1: declare void @bar() +define void @bar() comdat($foo) { + call void @foo() + ret void +} diff --git a/test/tools/llvm-split/function.ll b/test/tools/llvm-split/function.ll new file mode 100644 index 00000000000..37272dbbcee --- /dev/null +++ b/test/tools/llvm-split/function.ll @@ -0,0 +1,17 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0: define void @foo() +; CHECK1: declare void @foo() +define void @foo() { + call void @bar() + ret void +} + +; CHECK0: declare void @bar() +; CHECK1: define void @bar() +define void @bar() { + call void @foo() + ret void +} diff --git a/test/tools/llvm-split/global.ll b/test/tools/llvm-split/global.ll new file mode 100644 index 00000000000..6d2425691e1 --- /dev/null +++ b/test/tools/llvm-split/global.ll @@ -0,0 +1,11 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0: @foo = global i8* bitcast +; CHECK1: @foo = external global i8* +@foo = global i8* bitcast (i8** @bar to i8*) + +; CHECK0: @bar = external global i8* +; CHECK1: @bar = global i8* bitcast +@bar = global i8* bitcast (i8** @foo to i8*) diff --git a/test/tools/llvm-split/internal.ll b/test/tools/llvm-split/internal.ll new file mode 100644 index 00000000000..ce4272c5f0d --- /dev/null +++ b/test/tools/llvm-split/internal.ll @@ -0,0 +1,17 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0: define hidden void @foo() +; CHECK1: declare hidden void @foo() +define internal void @foo() { + call void @bar() + ret void +} + +; CHECK0: declare void @bar() +; CHECK1: define void @bar() +define void @bar() { + call void @foo() + ret void +} diff --git a/test/tools/llvm-split/unnamed.ll b/test/tools/llvm-split/unnamed.ll new file mode 100644 index 00000000000..96a7fe4e1b7 --- /dev/null +++ b/test/tools/llvm-split/unnamed.ll @@ -0,0 +1,31 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0: declare hidden void @__llvmsplit_unnamed() +; CHECK1: define hidden void @__llvmsplit_unnamed() +define internal void @0() { + ; CHECK1: call void @foo() + call void @foo() + ret void +} + +; CHECK0: declare hidden void @__llvmsplit_unnamed1() +; CHECK1: define hidden void @__llvmsplit_unnamed1() +define internal void @1() { + ; CHECK1: call void @foo() + ; CHECK1: call void @foo() + call void @foo() + call void @foo() + ret void +} + +; CHECK0: define void @foo() +; CHECK1: declare void @foo() +define void @foo() { + ; CHECK0: call void @__llvmsplit_unnamed1() + ; CHECK0: call void @__llvmsplit_unnamed() + call void @1() + call void @0() + ret void +} diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt index acf61b0268c..13f47e3b59c 100644 --- a/tools/LLVMBuild.txt +++ b/tools/LLVMBuild.txt @@ -40,6 +40,7 @@ subdirectories = llvm-profdata llvm-rtdyld llvm-size + llvm-split macho-dump opt verify-uselistorder diff --git a/tools/Makefile b/tools/Makefile index a47710f111a..f5020365f36 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -32,7 +32,8 @@ PARALLEL_DIRS := opt llvm-as llvm-dis llc llvm-ar llvm-nm llvm-link \ macho-dump llvm-objdump llvm-readobj llvm-rtdyld \ llvm-dwarfdump llvm-cov llvm-size llvm-stress llvm-mcmarkup \ llvm-profdata llvm-symbolizer obj2yaml yaml2obj llvm-c-test \ - llvm-cxxdump verify-uselistorder dsymutil llvm-pdbdump + llvm-cxxdump verify-uselistorder dsymutil llvm-pdbdump \ + llvm-split # If Intel JIT Events support is configured, build an extra tool to test it. ifeq ($(USE_INTEL_JITEVENTS), 1) diff --git a/tools/llvm-split/CMakeLists.txt b/tools/llvm-split/CMakeLists.txt new file mode 100644 index 00000000000..6e22a723671 --- /dev/null +++ b/tools/llvm-split/CMakeLists.txt @@ -0,0 +1,11 @@ +set(LLVM_LINK_COMPONENTS + TransformUtils + BitWriter + Core + IRReader + Support + ) + +add_llvm_tool(llvm-split + llvm-split.cpp + ) diff --git a/tools/llvm-split/LLVMBuild.txt b/tools/llvm-split/LLVMBuild.txt new file mode 100644 index 00000000000..780c76afa41 --- /dev/null +++ b/tools/llvm-split/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/llvm-split/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-split +parent = Tools +required_libraries = TransformUtils BitWriter Core IRReader Support diff --git a/tools/llvm-split/Makefile b/tools/llvm-split/Makefile new file mode 100644 index 00000000000..ef1243dbf1f --- /dev/null +++ b/tools/llvm-split/Makefile @@ -0,0 +1,17 @@ +##===- tools/llvm-split/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../.. +TOOLNAME := llvm-split +LINK_COMPONENTS := transformutils bitwriter core irreader support + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS := 1 + +include $(LEVEL)/Makefile.common diff --git a/tools/llvm-split/llvm-split.cpp b/tools/llvm-split/llvm-split.cpp new file mode 100644 index 00000000000..059770fbf4a --- /dev/null +++ b/tools/llvm-split/llvm-split.cpp @@ -0,0 +1,67 @@ +//===-- llvm-split: command line tool for testing module splitter ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This program can be used to test the llvm::SplitModule function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringExtras.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SplitModule.h" + +using namespace llvm; + +static cl::opt +InputFilename(cl::Positional, cl::desc(""), + cl::init("-"), cl::value_desc("filename")); + +static cl::opt +OutputFilename("o", cl::desc("Override output filename"), + cl::value_desc("filename")); + +static cl::opt NumOutputs("j", cl::Prefix, cl::init(2), + cl::desc("Number of output files")); + +int main(int argc, char **argv) { + LLVMContext &Context = getGlobalContext(); + SMDiagnostic Err; + cl::ParseCommandLineOptions(argc, argv, "LLVM module splitter\n"); + + std::unique_ptr M = parseIRFile(InputFilename, Err, Context); + + if (!M) { + Err.print(argv[0], errs()); + return 1; + } + + unsigned I = 0; + SplitModule(std::move(M), NumOutputs, [&](std::unique_ptr MPart) { + std::error_code EC; + std::unique_ptr Out(new tool_output_file( + OutputFilename + utostr(I++), EC, sys::fs::F_None)); + if (EC) { + errs() << EC.message() << '\n'; + exit(1); + } + + WriteBitcodeToFile(MPart.get(), Out->os()); + + // Declare success. + Out->keep(); + }); + + return 0; +} -- 2.34.1