From 394be6c159d16417edd71dd97531d7dfe2f1e32c Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 18 Sep 2014 21:28:49 +0000 Subject: [PATCH] LTO: introduce object file-based on-disk module format. This format is simply a regular object file with the bitcode stored in a section named ".llvmbc", plus any number of other (non-allocated) sections. One immediate use case for this is to accommodate compilation processes which expect the object file to contain metadata in non-allocated sections, such as the ".go_export" section used by some Go compilers [1], although I imagine that in the future we could consider compiling parts of the module (such as large non-inlinable functions) directly into the object file to improve LTO efficiency. [1] http://golang.org/doc/install/gccgo#Imports Differential Revision: http://reviews.llvm.org/D4371 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218078 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/BitCodeFormat.rst | 18 +++++++++++-- docs/ReleaseNotes.rst | 3 +++ include/llvm/Object/Error.h | 3 ++- include/llvm/Object/IRObjectFile.h | 12 +++++++++ lib/LTO/LTOModule.cpp | 43 +++++++++++++++++++++--------- lib/Object/Error.cpp | 2 ++ lib/Object/IRObjectFile.cpp | 42 ++++++++++++++++++++++++++++- lib/Object/SymbolicFile.cpp | 20 +++++++++++--- test/LTO/Inputs/bcsection.macho.s | 2 ++ test/LTO/Inputs/bcsection.s | 2 ++ test/LTO/bcsection.ll | 21 +++++++++++++++ test/tools/gold/Inputs/bcsection.s | 2 ++ test/tools/gold/bcsection.ll | 11 ++++++++ tools/gold/gold-plugin.cpp | 15 ++++++++--- tools/llvm-nm/llvm-nm.cpp | 7 +++-- 15 files changed, 178 insertions(+), 25 deletions(-) create mode 100644 test/LTO/Inputs/bcsection.macho.s create mode 100644 test/LTO/Inputs/bcsection.s create mode 100644 test/LTO/bcsection.ll create mode 100644 test/tools/gold/Inputs/bcsection.s create mode 100644 test/tools/gold/bcsection.ll diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index fce1e37cf51..2e487b32131 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -28,8 +28,9 @@ Unlike XML, the bitstream format is a binary encoding, and unlike XML it provides a mechanism for the file to self-describe "abbreviations", which are effectively size optimizations for the content. -LLVM IR files may be optionally embedded into a `wrapper`_ structure that makes -it easy to embed extra data along with LLVM IR files. +LLVM IR files may be optionally embedded into a `wrapper`_ structure, or in a +`native object file`_. Both of these mechanisms make it easy to embed extra +data along with LLVM IR files. This document first describes the LLVM bitstream format, describes the wrapper format, then describes the record structure used by LLVM IR files. @@ -460,6 +461,19 @@ to the start of the bitcode stream in the file, and the Size field is the size in bytes of the stream. CPUType is a target-specific value that can be used to encode the CPU of the target. +.. _native object file: + +Native Object File Wrapper Format +================================= + +Bitcode files for LLVM IR may also be wrapped in a native object file +(i.e. ELF, COFF, Mach-O). The bitcode must be stored in a section of the +object file named ``.llvmbc``. This wrapper format is useful for accommodating +LTO in compilation pipelines where intermediate objects must be native object +files which contain metadata in other sections. + +Not all tools support this format. + .. _encoding of LLVM IR: LLVM IR Encoding diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index dad7e7ed842..be2954ce3c4 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -43,6 +43,9 @@ Non-comprehensive list of changes in this release * Support for AuroraUX has been removed. +* Added support for a `native object file-based bitcode wrapper format + `_. + * ... next change ... .. NOTE diff --git a/include/llvm/Object/Error.h b/include/llvm/Object/Error.h index 701da1272cd..90c2bd74b43 100644 --- a/include/llvm/Object/Error.h +++ b/include/llvm/Object/Error.h @@ -26,7 +26,8 @@ enum class object_error { arch_not_found, invalid_file_type, parse_failed, - unexpected_eof + unexpected_eof, + bitcode_section_not_found, }; inline std::error_code make_error_code(object_error e) { diff --git a/include/llvm/Object/IRObjectFile.h b/include/llvm/Object/IRObjectFile.h index 2b6fa2c779f..b650d5d3293 100644 --- a/include/llvm/Object/IRObjectFile.h +++ b/include/llvm/Object/IRObjectFile.h @@ -22,6 +22,8 @@ class Module; class GlobalValue; namespace object { +class ObjectFile; + class IRObjectFile : public SymbolicFile { std::unique_ptr M; std::unique_ptr Mang; @@ -49,6 +51,16 @@ public: return v->isIR(); } + /// \brief Finds and returns bitcode embedded in the given object file, or an + /// error code if not found. + static ErrorOr findBitcodeInObject(const ObjectFile &Obj); + + /// \brief Finds and returns bitcode in the given memory buffer (which may + /// be either a bitcode file or a native object file with embedded bitcode), + /// or an error code if not found. + static ErrorOr + findBitcodeInMemBuffer(MemoryBufferRef Object); + static ErrorOr> createIRObjectFile(MemoryBufferRef Object, LLVMContext &Context); }; diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 88f82f0722a..b9320d24cc3 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -29,6 +29,8 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" @@ -44,6 +46,7 @@ #include "llvm/Transforms/Utils/GlobalStatus.h" #include using namespace llvm; +using namespace llvm::object; LTOModule::LTOModule(std::unique_ptr Obj, llvm::TargetMachine *TM) @@ -51,23 +54,31 @@ LTOModule::LTOModule(std::unique_ptr Obj, /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM /// bitcode. -bool LTOModule::isBitcodeFile(const void *mem, size_t length) { - return sys::fs::identify_magic(StringRef((const char *)mem, length)) == - sys::fs::file_magic::bitcode; +bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) { + ErrorOr BCData = IRObjectFile::findBitcodeInMemBuffer( + MemoryBufferRef(StringRef((const char *)Mem, Length), "")); + return bool(BCData); } -bool LTOModule::isBitcodeFile(const char *path) { - sys::fs::file_magic type; - if (sys::fs::identify_magic(path, type)) +bool LTOModule::isBitcodeFile(const char *Path) { + ErrorOr> BufferOrErr = + MemoryBuffer::getFile(Path); + if (!BufferOrErr) return false; - return type == sys::fs::file_magic::bitcode; + + ErrorOr BCData = IRObjectFile::findBitcodeInMemBuffer( + BufferOrErr.get()->getMemBufferRef()); + return bool(BCData); } -bool LTOModule::isBitcodeForTarget(MemoryBuffer *buffer, - StringRef triplePrefix) { - std::string Triple = - getBitcodeTargetTriple(buffer->getMemBufferRef(), getGlobalContext()); - return StringRef(Triple).startswith(triplePrefix); +bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, + StringRef TriplePrefix) { + ErrorOr BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef()); + if (!BCOrErr) + return false; + std::string Triple = getBitcodeTargetTriple(*BCOrErr, getGlobalContext()); + return StringRef(Triple).startswith(TriplePrefix); } LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options, @@ -113,7 +124,13 @@ LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length, LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, TargetOptions options, std::string &errMsg) { - ErrorOr MOrErr = parseBitcodeFile(Buffer, getGlobalContext()); + ErrorOr MBOrErr = + IRObjectFile::findBitcodeInMemBuffer(Buffer); + if (std::error_code EC = MBOrErr.getError()) { + errMsg = EC.message(); + return nullptr; + } + ErrorOr MOrErr = parseBitcodeFile(*MBOrErr, getGlobalContext()); if (std::error_code EC = MOrErr.getError()) { errMsg = EC.message(); return nullptr; diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp index e7431d78074..44667c247b1 100644 --- a/lib/Object/Error.cpp +++ b/lib/Object/Error.cpp @@ -41,6 +41,8 @@ std::string _object_error_category::message(int EV) const { return "Invalid data was encountered while parsing the file"; case object_error::unexpected_eof: return "The end of the file was unexpectedly encountered"; + case object_error::bitcode_section_not_found: + return "Bitcode section not found in object file"; } llvm_unreachable("An enumerator of object_error does not have a message " "defined."); diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp index 856f4c6c278..f3bea51be97 100644 --- a/lib/Object/IRObjectFile.cpp +++ b/lib/Object/IRObjectFile.cpp @@ -25,6 +25,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" @@ -264,11 +265,50 @@ basic_symbol_iterator IRObjectFile::symbol_end_impl() const { return basic_symbol_iterator(BasicSymbolRef(Ret, this)); } +ErrorOr IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) { + for (const SectionRef &Sec : Obj.sections()) { + StringRef SecName; + if (std::error_code EC = Sec.getName(SecName)) + return EC; + if (SecName == ".llvmbc") { + StringRef SecContents; + if (std::error_code EC = Sec.getContents(SecContents)) + return EC; + return MemoryBufferRef(SecContents, Obj.getFileName()); + } + } + + return object_error::bitcode_section_not_found; +} + +ErrorOr IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { + sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + switch (Type) { + case sys::fs::file_magic::bitcode: + return Object; + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::coff_object: { + ErrorOr> ObjFile = + ObjectFile::createObjectFile(Object, Type); + if (!ObjFile) + return ObjFile.getError(); + return findBitcodeInObject(*ObjFile->get()); + } + default: + return object_error::invalid_file_type; + } +} + ErrorOr> llvm::object::IRObjectFile::createIRObjectFile(MemoryBufferRef Object, LLVMContext &Context) { + ErrorOr BCOrErr = findBitcodeInMemBuffer(Object); + if (!BCOrErr) + return BCOrErr.getError(); - std::unique_ptr Buff(MemoryBuffer::getMemBuffer(Object, false)); + std::unique_ptr Buff( + MemoryBuffer::getMemBuffer(BCOrErr.get(), false)); ErrorOr MOrErr = getLazyBitcodeModule(std::move(Buff), Context); if (std::error_code EC = MOrErr.getError()) diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp index f8dd4b33a39..9dfb09dbeca 100644 --- a/lib/Object/SymbolicFile.cpp +++ b/lib/Object/SymbolicFile.cpp @@ -40,11 +40,9 @@ ErrorOr> SymbolicFile::createSymbolicFile( case sys::fs::file_magic::macho_universal_binary: case sys::fs::file_magic::windows_resource: return object_error::invalid_file_type; - case sys::fs::file_magic::elf_relocatable: case sys::fs::file_magic::elf_executable: case sys::fs::file_magic::elf_shared_object: case sys::fs::file_magic::elf_core: - case sys::fs::file_magic::macho_object: case sys::fs::file_magic::macho_executable: case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: case sys::fs::file_magic::macho_core: @@ -54,10 +52,26 @@ ErrorOr> SymbolicFile::createSymbolicFile( case sys::fs::file_magic::macho_bundle: case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::coff_object: case sys::fs::file_magic::coff_import_library: case sys::fs::file_magic::pecoff_executable: return ObjectFile::createObjectFile(Object, Type); + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::coff_object: { + ErrorOr> Obj = + ObjectFile::createObjectFile(Object, Type); + if (!Obj || !Context) + return std::move(Obj); + + ErrorOr BCData = + IRObjectFile::findBitcodeInObject(*Obj->get()); + if (!BCData) + return std::move(Obj); + + return IRObjectFile::createIRObjectFile( + MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()), + *Context); + } } llvm_unreachable("Unexpected Binary File Type"); } diff --git a/test/LTO/Inputs/bcsection.macho.s b/test/LTO/Inputs/bcsection.macho.s new file mode 100644 index 00000000000..cb7fe03b3e7 --- /dev/null +++ b/test/LTO/Inputs/bcsection.macho.s @@ -0,0 +1,2 @@ +.section .llvmbc,.llvmbc +.incbin "bcsection.bc" diff --git a/test/LTO/Inputs/bcsection.s b/test/LTO/Inputs/bcsection.s new file mode 100644 index 00000000000..ede1e5c532d --- /dev/null +++ b/test/LTO/Inputs/bcsection.s @@ -0,0 +1,2 @@ +.section .llvmbc +.incbin "bcsection.bc" diff --git a/test/LTO/bcsection.ll b/test/LTO/bcsection.ll new file mode 100644 index 00000000000..da8da8c6e83 --- /dev/null +++ b/test/LTO/bcsection.ll @@ -0,0 +1,21 @@ +; RUN: llvm-as -o %T/bcsection.bc %s + +; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-pc-win32 -o %T/bcsection.coff.bco %p/Inputs/bcsection.s +; RUN: llvm-nm %T/bcsection.coff.bco | FileCheck %s +; RUN: llvm-lto -exported-symbol=main -o %T/bcsection.coff.o %T/bcsection.coff.bco +; RUN: llvm-nm %T/bcsection.coff.o | FileCheck %s + +; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-unknown-linux-gnu -o %T/bcsection.elf.bco %p/Inputs/bcsection.s +; RUN: llvm-nm %T/bcsection.elf.bco | FileCheck %s +; RUN: llvm-lto -exported-symbol=main -o %T/bcsection.elf.o %T/bcsection.elf.bco +; RUN: llvm-nm %T/bcsection.elf.o | FileCheck %s + +; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-apple-darwin11 -o %T/bcsection.macho.bco %p/Inputs/bcsection.macho.s +; RUN: llvm-nm %T/bcsection.macho.bco | FileCheck %s +; RUN: llvm-lto -exported-symbol=main -o %T/bcsection.macho.o %T/bcsection.macho.bco +; RUN: llvm-nm %T/bcsection.macho.o | FileCheck %s + +; CHECK: main +define i32 @main() { + ret i32 0 +} diff --git a/test/tools/gold/Inputs/bcsection.s b/test/tools/gold/Inputs/bcsection.s new file mode 100644 index 00000000000..ede1e5c532d --- /dev/null +++ b/test/tools/gold/Inputs/bcsection.s @@ -0,0 +1,2 @@ +.section .llvmbc +.incbin "bcsection.bc" diff --git a/test/tools/gold/bcsection.ll b/test/tools/gold/bcsection.ll new file mode 100644 index 00000000000..8565d9ddc4c --- /dev/null +++ b/test/tools/gold/bcsection.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as -o %T/bcsection.bc %s + +; RUN: llvm-mc -I=%T -filetype=obj -o %T/bcsection.bco %p/Inputs/bcsection.s +; RUN: llvm-nm -no-llvm-bc %T/bcsection.bco | count 0 +; RUN: ld -r -o %T/bcsection.o -plugin %llvmshlibdir/LLVMgold.so %T/bcsection.bco +; RUN: llvm-nm -no-llvm-bc %T/bcsection.o | FileCheck %s + +; CHECK: main +define i32 @main() { + ret i32 0 +} diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index 95f7e32a611..a0c678796fa 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -300,7 +300,9 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, ErrorOr> ObjOrErr = object::IRObjectFile::createIRObjectFile(BufferRef, Context); std::error_code EC = ObjOrErr.getError(); - if (EC == BitcodeError::InvalidBitcodeSignature) + if (EC == BitcodeError::InvalidBitcodeSignature || + EC == object::object_error::invalid_file_type || + EC == object::object_error::bitcode_section_not_found) return LDPS_OK; *claimed = 1; @@ -548,8 +550,15 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile, if (get_view(F.handle, &View) != LDPS_OK) message(LDPL_FATAL, "Failed to get a view of file"); - std::unique_ptr Buffer = MemoryBuffer::getMemBuffer( - StringRef((char *)View, File.filesize), "", false); + llvm::ErrorOr MBOrErr = + object::IRObjectFile::findBitcodeInMemBuffer( + MemoryBufferRef(StringRef((const char *)View, File.filesize), "")); + if (std::error_code EC = MBOrErr.getError()) + message(LDPL_FATAL, "Could not read bitcode from file : %s", + EC.message().c_str()); + + std::unique_ptr Buffer = + MemoryBuffer::getMemBuffer(MBOrErr->getBuffer(), "", false); if (release_input_file(F.handle) != LDPS_OK) message(LDPL_FATAL, "Failed to release file information"); diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp index 3002119cc51..241ef26facc 100644 --- a/tools/llvm-nm/llvm-nm.cpp +++ b/tools/llvm-nm/llvm-nm.cpp @@ -149,6 +149,9 @@ cl::list SegSect("s", cl::Positional, cl::ZeroOrMore, cl::opt FormatMachOasHex("x", cl::desc("Print symbol entry in hex, " "Mach-O only")); +cl::opt NoLLVMBitcode("no-llvm-bc", + cl::desc("Disable LLVM bitcode reader")); + bool PrintAddress = true; bool MultipleFiles = false; @@ -1009,8 +1012,8 @@ static void dumpSymbolNamesFromFile(std::string &Filename) { return; LLVMContext &Context = getGlobalContext(); - ErrorOr> BinaryOrErr = - createBinary(BufferOrErr.get()->getMemBufferRef(), &Context); + ErrorOr> BinaryOrErr = createBinary( + BufferOrErr.get()->getMemBufferRef(), NoLLVMBitcode ? nullptr : &Context); if (error(BinaryOrErr.getError(), Filename)) return; Binary &Bin = *BinaryOrErr.get(); -- 2.34.1