CodeGen: Introduce splitCodeGen and teach LTOCodeGenerator to use it.

author Peter Collingbourne <peter@pcc.me.uk>

Thu, 27 Aug 2015 23:37:36 +0000 (23:37 +0000)

committer Peter Collingbourne <peter@pcc.me.uk>

Thu, 27 Aug 2015 23:37:36 +0000 (23:37 +0000)
author Peter Collingbourne <peter@pcc.me.uk>
Thu, 27 Aug 2015 23:37:36 +0000 (23:37 +0000)
committer Peter Collingbourne <peter@pcc.me.uk>
Thu, 27 Aug 2015 23:37:36 +0000 (23:37 +0000)
diff --git a/include/llvm/CodeGen/ParallelCG.h b/include/llvm/CodeGen/ParallelCG.h

new file mode 100644 (file)

index 0000000..73ac461
--- /dev/null
+++ b/include/llvm/CodeGen/ParallelCG.h
@@ -0,0 +1,41 @@
+//===-- llvm/CodeGen/ParallelCG.h - Parallel code generation ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header declares functions that can be used for parallel code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PARALLELCG_H
+#define LLVM_CODEGEN_PARALLELCG_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+
+class Module;
+class TargetOptions;
+class raw_pwrite_stream;
+
+/// Split M into OSs.size() partitions, and generate code for each. Writes
+/// OSs.size() object files to the output streams in OSs. The resulting object
+/// files if linked together are intended to be equivalent to the single object
+/// file that would have been code generated from M.
+///
+/// \returns M if OSs.size() == 1, otherwise returns std::unique_ptr<Module>().
+std::unique_ptr<Module>
+splitCodeGen(std::unique_ptr<Module> M, ArrayRef<raw_pwrite_stream *> OSs,
+             StringRef CPU, StringRef Features, const TargetOptions &Options,
+             Reloc::Model RM = Reloc::Default,
+             CodeModel::Model CM = CodeModel::Default,
+             CodeGenOpt::Level OL = CodeGenOpt::Default);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/LTO/LTOCodeGenerator.h b/include/llvm/LTO/LTOCodeGenerator.h

index ca0578fdc7d2c3e847184b5563fa6f93dd1ebd51..0047a10247a3e7c4b9109d67335a0fb3144bf294 100644 (file)
--- a/include/llvm/LTO/LTOCodeGenerator.h
+++ b/include/llvm/LTO/LTOCodeGenerator.h
@@ -133,6 +133,12 @@ struct LTOCodeGenerator {
    // if the compilation was not successful.
    std::unique_ptr<MemoryBuffer> compileOptimized(std::string &errMsg);
  
+  // Compile the merged optimized module into out.size() object files each
+  // representing a linkable partition of the module. If out contains more than
+  // one element, code generation is done in parallel with out.size() threads.
+  // Object files will be written to members of out. Returns true on success.
+  bool compileOptimized(ArrayRef<raw_pwrite_stream *> out, std::string &errMsg);
+
    void setDiagnosticHandler(lto_diagnostic_handler_t, void *);
  
    LLVMContext &getContext() { return Context; }
@@ -140,7 +146,6 @@ struct LTOCodeGenerator {
  private:
    void initializeLTOPasses();
  
-  bool compileOptimized(raw_pwrite_stream &out, std::string &errMsg);
    bool compileOptimizedToFile(const char **name, std::string &errMsg);
    void applyScopeRestrictions();
    void applyRestriction(GlobalValue &GV, ArrayRef<StringRef> Libcalls,
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt

index eb7552970d3f564f2da1091ecd326fb5bf18c8a6..f77141b3f30723000f4c68bd243528a5819d172e 100644 (file)
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -80,6 +80,7 @@ add_llvm_library(LLVMCodeGen
    OptimizePHIs.cpp
    PHIElimination.cpp
    PHIEliminationUtils.cpp
+  ParallelCG.cpp
    Passes.cpp
    PeepholeOptimizer.cpp
    PostRASchedulerList.cpp
diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp

new file mode 100644 (file)

index 0000000..3e6a71d
--- /dev/null
+++ b/lib/CodeGen/ParallelCG.cpp
@@ -0,0 +1,95 @@
+//===-- ParallelCG.cpp ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that can be used for parallel code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ParallelCG.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/thread.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+
+using namespace llvm;
+
+static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
+                    const Target *TheTarget, StringRef CPU, StringRef Features,
+                    const TargetOptions &Options, Reloc::Model RM,
+                    CodeModel::Model CM, CodeGenOpt::Level OL) {
+  std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine(
+      M->getTargetTriple(), CPU, Features, Options, RM, CM, OL));
+
+  legacy::PassManager CodeGenPasses;
+  if (TM->addPassesToEmitFile(CodeGenPasses, OS,
+                              TargetMachine::CGFT_ObjectFile))
+    report_fatal_error("Failed to setup codegen");
+  CodeGenPasses.run(*M);
+}
+
+std::unique_ptr<Module>
+llvm::splitCodeGen(std::unique_ptr<Module> M,
+                   ArrayRef<llvm::raw_pwrite_stream *> OSs, StringRef CPU,
+                   StringRef Features, const TargetOptions &Options,
+                   Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
+  StringRef TripleStr = M->getTargetTriple();
+  std::string ErrMsg;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
+  if (!TheTarget)
+    report_fatal_error(Twine("Target not found: ") + ErrMsg);
+
+  if (OSs.size() == 1) {
+    codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM,
+            OL);
+    return M;
+  }
+
+  std::vector<std::thread> Threads;
+  SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr<Module> MPart) {
+    // We want to clone the module in a new context to multi-thread the codegen.
+    // We do it by serializing partition modules to bitcode (while still on the
+    // main thread, in order to avoid data races) and spinning up new threads
+    // which deserialize the partitions into separate contexts.
+    // FIXME: Provide a more direct way to do this in LLVM.
+    SmallVector<char, 0> BC;
+    raw_svector_ostream BCOS(BC);
+    WriteBitcodeToFile(MPart.get(), BCOS);
+
+    llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()];
+    Threads.emplace_back(
+        [TheTarget, CPU, Features, Options, RM, CM, OL,
+         ThreadOS](const SmallVector<char, 0> &BC) {
+          LLVMContext Ctx;
+          ErrorOr<std::unique_ptr<Module>> MOrErr =
+              parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()),
+                                               "<split-module>"),
+                               Ctx);
+          if (!MOrErr)
+            report_fatal_error("Failed to read bitcode");
+          std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
+
+          codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features,
+                  Options, RM, CM, OL);
+        },
+        // Pass BC using std::move to ensure that it get moved rather than
+        // copied into the thread's context.
+        std::move(BC));
+  });
+
+  for (std::thread &T : Threads)
+    T.join();
+
+  return {};
+}
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp

index 60ce4f67e034d29e7781004bfe47a4ba819acc8d..eaec47ee682e0730da45ac2b4abbe4f3aa6100e7 100644 (file)
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -18,6 +18,7 @@
  #include "llvm/Analysis/TargetLibraryInfo.h"
  #include "llvm/Analysis/TargetTransformInfo.h"
  #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CodeGen/ParallelCG.h"
  #include "llvm/CodeGen/RuntimeLibcalls.h"
  #include "llvm/Config/config.h"
  #include "llvm/IR/Constants.h"
@@ -218,7 +219,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **name,
    // generate object file
    tool_output_file objFile(Filename.c_str(), FD);
  
-  bool genResult = compileOptimized(objFile.os(), errMsg);
+  bool genResult = compileOptimized(&objFile.os(), errMsg);
    objFile.os().close();
    if (objFile.os().has_error()) {
      objFile.os().clear_error();
@@ -495,25 +496,26 @@ bool LTOCodeGenerator::optimize(bool DisableInline,
    return true;
  }
  
-bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out,
+bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> out,
                                          std::string &errMsg) {
    if (!this->determineTarget(errMsg))
      return false;
  
-  legacy::PassManager codeGenPasses;
+  legacy::PassManager preCodeGenPasses;
  
    // If the bitcode files contain ARC code and were compiled with optimization,
    // the ObjCARCContractPass must be run, so do it unconditionally here.
-  codeGenPasses.add(createObjCARCContractPass());
-
-  if (TargetMach->addPassesToEmitFile(codeGenPasses, out,
-                                      TargetMachine::CGFT_ObjectFile)) {
-    errMsg = "target file type not supported";
-    return false;
-  }
-
-  // Run the code generator, and write object file
-  codeGenPasses.run(*MergedModule);
+  preCodeGenPasses.add(createObjCARCContractPass());
+  preCodeGenPasses.run(*MergedModule);
+
+  // Do code generation. We need to preserve the module in case the client calls
+  // writeMergedModules() after compilation, but we only need to allow this at
+  // parallelism level 1. This is achieved by having splitCodeGen return the
+  // original module at parallelism level 1 which we then assign back to
+  // MergedModule.
+  MergedModule =
+      splitCodeGen(std::move(MergedModule), out, MCpu, FeatureStr, Options,
+                   RelocModel, CodeModel::Default, CGOptLevel);
  
    return true;
  }
diff --git a/test/LTO/X86/parallel.ll b/test/LTO/X86/parallel.ll

new file mode 100644 (file)

index 0000000..15e89ea
--- /dev/null
+++ b/test/LTO/X86/parallel.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as -o %t.bc %s
+; RUN: llvm-lto -exported-symbol=foo -exported-symbol=bar -j2 -o %t.o %t.bc
+; RUN: llvm-nm %t.o.0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-nm %t.o.1 | FileCheck --check-prefix=CHECK1 %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK0-NOT: bar
+; CHECK0: T foo
+; CHECK0-NOT: bar
+define void @foo() {
+  call void @bar()
+  ret void
+}
+
+; CHECK1-NOT: foo
+; CHECK1: T bar
+; CHECK1-NOT: foo
+define void @bar() {
+  call void @foo()
+  ret void
+}
diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp

index 52e45296126475d1fd5eb273a01e998f89d2edda..ddde23175a3bc44b003e23fdcca5a1a309e214c7 100644 (file)
--- a/tools/llvm-lto/llvm-lto.cpp
+++ b/tools/llvm-lto/llvm-lto.cpp
@@ -22,7 +22,9 @@
  #include "llvm/Support/PrettyStackTrace.h"
  #include "llvm/Support/Signals.h"
  #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
  #include "llvm/Support/raw_ostream.h"
+#include <list>
  
  using namespace llvm;
  
@@ -77,6 +79,9 @@ static cl::opt<bool> SetMergedModule(
      "set-merged-module", cl::init(false),
      cl::desc("Use the first input module as the merged module"));
  
+static cl::opt<unsigned> Parallelism("j", cl::Prefix, cl::init(1),
+                                     cl::desc("Number of backend threads"));
+
  namespace {
  struct ModuleInfo {
    std::vector<bool> CanBeHidden;
@@ -240,24 +245,41 @@ int main(int argc, char **argv) {
  
    if (!OutputFilename.empty()) {
      std::string ErrorInfo;
-    std::unique_ptr<MemoryBuffer> Code = CodeGen.compile(
-        DisableInline, DisableGVNLoadPRE, DisableLTOVectorization, ErrorInfo);
-    if (!Code) {
-      errs() << argv[0]
-             << ": error compiling the code: " << ErrorInfo << "\n";
+    if (!CodeGen.optimize(DisableInline, DisableGVNLoadPRE,
+                          DisableLTOVectorization, ErrorInfo)) {
+      errs() << argv[0] << ": error optimizing the code: " << ErrorInfo << "\n";
        return 1;
      }
  
-    std::error_code EC;
-    raw_fd_ostream FileStream(OutputFilename, EC, sys::fs::F_None);
-    if (EC) {
-      errs() << argv[0] << ": error opening the file '" << OutputFilename
-             << "': " << EC.message() << "\n";
+    std::list<tool_output_file> OSs;
+    std::vector<raw_pwrite_stream *> OSPtrs;
+    for (unsigned I = 0; I != Parallelism; ++I) {
+      std::string PartFilename = OutputFilename;
+      if (Parallelism != 1)
+        PartFilename += "." + utostr(I);
+      std::error_code EC;
+      OSs.emplace_back(PartFilename, EC, sys::fs::F_None);
+      if (EC) {
+        errs() << argv[0] << ": error opening the file '" << PartFilename
+               << "': " << EC.message() << "\n";
+        return 1;
+      }
+      OSPtrs.push_back(&OSs.back().os());
+    }
+
+    if (!CodeGen.compileOptimized(OSPtrs, ErrorInfo)) {
+      errs() << argv[0] << ": error compiling the code: " << ErrorInfo << "\n";
        return 1;
      }
  
-    FileStream.write(Code->getBufferStart(), Code->getBufferSize());
+    for (tool_output_file &OS : OSs)
+      OS.keep();
    } else {
+    if (Parallelism != 1) {
+      errs() << argv[0] << ": -j must be specified together with -o\n";
+      return 1;
+    }
+
      std::string ErrorInfo;
      const char *OutputName = nullptr;
      if (!CodeGen.compile_to_file(&OutputName, DisableInline,
author	Peter Collingbourne <peter@pcc.me.uk>
	Thu, 27 Aug 2015 23:37:36 +0000 (23:37 +0000)
committer	Peter Collingbourne <peter@pcc.me.uk>
	Thu, 27 Aug 2015 23:37:36 +0000 (23:37 +0000)
include/llvm/CodeGen/ParallelCG.h	[new file with mode: 0644]	patch \| blob
include/llvm/LTO/LTOCodeGenerator.h		patch \| blob \| history
lib/CodeGen/CMakeLists.txt		patch \| blob \| history
lib/CodeGen/ParallelCG.cpp	[new file with mode: 0644]	patch \| blob
lib/LTO/LTOCodeGenerator.cpp		patch \| blob \| history
test/LTO/X86/parallel.ll	[new file with mode: 0644]	patch \| blob
tools/llvm-lto/llvm-lto.cpp		patch \| blob \| history