Add a little wrapper header that is put around bc files when emitting

author Chris Lattner <sabre@nondot.org>

Wed, 9 Jul 2008 05:14:23 +0000 (05:14 +0000)

committer Chris Lattner <sabre@nondot.org>

Wed, 9 Jul 2008 05:14:23 +0000 (05:14 +0000)
author Chris Lattner <sabre@nondot.org>
Wed, 9 Jul 2008 05:14:23 +0000 (05:14 +0000)
committer Chris Lattner <sabre@nondot.org>
Wed, 9 Jul 2008 05:14:23 +0000 (05:14 +0000)
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html

index 4adf75e91b56f39649cf38f1b2bd57abf8c1385d..ed9bd082b56ad351a320c4d5831ef05ec8e98a97 100644 (file)
--- a/docs/BitCodeFormat.html
+++ b/docs/BitCodeFormat.html
@@ -22,6 +22,8 @@
      <li><a href="#stdblocks">Standard Blocks</a></li>
      </ol>
    </li>
+  <li><a href="#wrapper">Bitcode Wrapper Format</a>
+  </li>
    <li><a href="#llvmir">LLVM IR Encoding</a>
      <ol>
      <li><a href="#basics">Basics</a></li>
@@ -65,8 +67,12 @@ Unlike XML, the bitstream format is a binary encoding, and unlike XML it
  provides a mechanism for the file to self-describe "abbreviations", which are
  effectively size optimizations for the content.</p>
  
-<p>This document first describes the LLVM bitstream format, then describes the
-record structure used by LLVM IR files.
+<p>LLVM IR files may be optionally embedded into a <a 
+href="#wrapper">wrapper</a> structure that makes it easy to embed extra data
+along with LLVM IR files.</p>
+
+<p>This document first describes the LLVM bitstream format, describes the
+wrapper format, then describes the record structure used by LLVM IR files.
  </p>
  
  </div>
@@ -544,6 +550,36 @@ corresponding blocks.  It is not safe to skip them.
  
  </div>
  
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="wrapper">Bitcode Wrapper Format</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper
+structure.  This structure contains a simple header that indicates the offset
+and size of the embedded BC file.  This allows additional information to be
+stored alongside the BC file.  The structure of this file header is:
+</p>
+
+<p>
+<pre>
+[Magic<sub>32</sub>,
+ Version<sub>32</sub>,
+ Offset<sub>32</sub>,
+ Size<sub>32</sub>,
+ CPUType<sub>32</sub>]
+</pre></p>
+
+<p>Each of the fields are 32-bit fields stored in little endian form (as with
+the rest of the bitcode file fields).  The Magic number is always
+<tt>0x0B17C0DE</tt> and the version is currently always <tt>0</tt>.  The Offset
+field is the offset in bytes to the start of the bitcode stream in the file, and
+the Size field is a size in bytes of the stream. CPUType is a target-specific
+value that can be used to encode the CPU of the target.
+</div>
+
+
  <!-- *********************************************************************** -->
  <div class="doc_section"> <a name="llvmir">LLVM IR Encoding</a></div>
  <!-- *********************************************************************** -->
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h

index 3b7e40541472dce65a9e587f35a61db8c6591ef3..f76bb88dee8c8214850e9e189074872636109125 100644 (file)
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -157,6 +157,15 @@ public:
      Emit(Val, CurCodeSize);
    }
    
+  // BackpatchWord - Backpatch a 32-bit word in the output with the specified
+  // value.
+  void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
+    Out[ByteNo++] = (unsigned char)(NewWord >>  0);
+    Out[ByteNo++] = (unsigned char)(NewWord >>  8);
+    Out[ByteNo++] = (unsigned char)(NewWord >> 16);
+    Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
+  }
+  
    //===--------------------------------------------------------------------===//
    // Block Manipulation
    //===--------------------------------------------------------------------===//
@@ -227,10 +236,7 @@ public:
      unsigned ByteNo = B.StartSizeWord*4;
      
      // Update the block size field in the header of this sub-block.
-    Out[ByteNo++] = (unsigned char)(SizeInWords >>  0);
-    Out[ByteNo++] = (unsigned char)(SizeInWords >>  8);
-    Out[ByteNo++] = (unsigned char)(SizeInWords >> 16);
-    Out[ByteNo++] = (unsigned char)(SizeInWords >> 24);
+    BackpatchWord(ByteNo, SizeInWords);
      
      // Restore the inner block's code size and abbrev table.
      CurCodeSize = B.PrevCodeSize;
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp

index 3fc6b17521710f35aa20a458a214681ac00cff9f..a8c62be88fc87bcc1d4d54f7b8f4034d2fca2235 100644 (file)
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1184,6 +1184,47 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
    return Error("Premature end of bitstream");
  }
  
+/// SkipWrapperHeader - Some systems wrap bc files with a special header for
+/// padding or other reasons.  The format of this header is:
+///
+/// struct bc_header {
+///   uint32_t Magic;         // 0x0B17C0DE
+///   uint32_t Version;       // Version, currently always 0.
+///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
+///   ... potentially other gunk ...
+/// };
+/// 
+/// This function is called when we find a file with a matching magic number.
+/// In this case, skip down to the subsection of the file that is actually a BC
+/// file.
+static bool SkipWrapperHeader(unsigned char *&BufPtr, unsigned char *&BufEnd) {
+  enum {
+    KnownHeaderSize = 4*4,  // Size of header we read.
+    OffsetField = 2*4,      // Offset in bytes to Offset field.
+    SizeField = 3*4         // Offset in bytes to Size field.
+  };
+  
+  
+  // Must contain the header!
+  if (BufEnd-BufPtr < KnownHeaderSize) return true;
+  
+  unsigned Offset = ( BufPtr[OffsetField  ]        |
+                     (BufPtr[OffsetField+1] << 8)  |
+                     (BufPtr[OffsetField+2] << 16) |
+                     (BufPtr[OffsetField+3] << 24));
+  unsigned Size   = ( BufPtr[SizeField    ]        |
+                     (BufPtr[SizeField  +1] << 8)  |
+                     (BufPtr[SizeField  +2] << 16) |
+                     (BufPtr[SizeField  +3] << 24));
+  
+  // Verify that Offset+Size fits in the file.
+  if (Offset+Size > unsigned(BufEnd-BufPtr))
+    return true;
+  BufPtr += Offset;
+  BufEnd = BufPtr+Size;
+  return false;
+}
  
  bool BitcodeReader::ParseBitcode() {
    TheModule = 0;
@@ -1192,7 +1233,16 @@ bool BitcodeReader::ParseBitcode() {
      return Error("Bitcode stream should be a multiple of 4 bytes in length");
    
    unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
-  Stream.init(BufPtr, BufPtr+Buffer->getBufferSize());
+  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+  
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (BufPtr != BufEnd && BufPtr[0] == 0xDE && BufPtr[1] == 0xC0 && 
+      BufPtr[2] == 0x17 && BufPtr[3] == 0x0B)
+    if (SkipWrapperHeader(BufPtr, BufEnd))
+      return Error("Invalid bitcode wrapper header");
+  
+  Stream.init(BufPtr, BufEnd);
    
    // Sniff for the signature.
    if (Stream.Read(8) != 'B' ||
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp

index 0030aca3bc5a6d97b44f00734921fe4ee6bf9350..9794fac009c25c2ca0e8414ce8ea92bb705364fd 100644 (file)
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1273,6 +1273,70 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
    Stream.ExitBlock();
  }
  
+/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a
+/// header and trailer to make it compatible with the system archiver.  To do
+/// this we emit the following header, and then emit a trailer that pads the
+/// file out to be a multiple of 16 bytes.
+/// 
+/// struct bc_header {
+///   uint32_t Magic;         // 0x0B17C0DE
+///   uint32_t Version;       // Version, currently always 0.
+///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
+///   uint32_t CPUType;       // CPU specifier.
+///   ... potentially more later ...
+/// };
+enum {
+  DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size.
+  DarwinBCHeaderSize = 5*4
+};
+
+static void EmitDarwinBCHeader(BitstreamWriter &Stream,
+                               const std::string &TT) {
+  unsigned CPUType = ~0U;
+  
+  // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*.  The CPUType is a
+  // magic number from /usr/include/mach/machine.h.  It is ok to reproduce the
+  // specific constants here because they are implicitly part of the Darwin ABI.
+  enum {
+    DARWIN_CPU_ARCH_ABI64      = 0x01000000,
+    DARWIN_CPU_TYPE_X86        = 7,
+    DARWIN_CPU_TYPE_POWERPC    = 18
+  };
+  
+  if (TT.find("x86_64-") == 0)
+    CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
+  else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
+           TT[4] == '-' && TT[1] - '3' < 6)
+    CPUType = DARWIN_CPU_TYPE_X86;
+  else if (TT.find("powerpc-") == 0)
+    CPUType = DARWIN_CPU_TYPE_POWERPC;
+  else if (TT.find("powerpc64-") == 0)
+    CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+  
+  // Traditional Bitcode starts after header.
+  unsigned BCOffset = DarwinBCHeaderSize;
+  
+  Stream.Emit(0x0B17C0DE, 32);
+  Stream.Emit(0         , 32);  // Version.
+  Stream.Emit(BCOffset  , 32);
+  Stream.Emit(0         , 32);  // Filled in later.
+  Stream.Emit(CPUType   , 32);
+}
+
+/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and
+/// finalize the header.
+static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
+  // Update the size field in the header.
+  Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
+  
+  // If the file is not a multiple of 16 bytes, insert dummy padding.
+  while (BufferSize & 15) {
+    Stream.Emit(0, 8);
+    ++BufferSize;
+  }
+}
+
  
  /// WriteBitcodeToFile - Write the specified module to the specified output
  /// stream.
@@ -1282,6 +1346,11 @@ void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) {
    
    Buffer.reserve(256*1024);
    
+  // If this is darwin, emit a file header and trailer if needed.
+  bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos;
+  if (isDarwin)
+    EmitDarwinBCHeader(Stream, M->getTargetTriple());
+  
    // Emit the file header.
    Stream.Emit((unsigned)'B', 8);
    Stream.Emit((unsigned)'C', 8);
@@ -1292,10 +1361,14 @@ void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) {
  
    // Emit the module.
    WriteModule(M, Stream);
+
+  if (isDarwin)
+    EmitDarwinBCTrailer(Stream, Buffer.size());
+
    
    // If writing to stdout, set binary mode.
    if (llvm::cout == Out)
-      sys::Program::ChangeStdoutToBinary();
+    sys::Program::ChangeStdoutToBinary();
  
    // Write the generated bitstream to "Out".
    Out.write((char*)&Buffer.front(), Buffer.size());
diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp

index fbb6b6629ce831c4a8fe8f41d170e14eb17797e0..88479fe5083fff8040daac7c396fe093a35604d9 100644 (file)
--- a/lib/System/Path.cpp
+++ b/lib/System/Path.cpp
@@ -52,10 +52,15 @@ Path::GetLLVMConfigDir() {
  }
  
  LLVMFileType
-sys::IdentifyFileType(const char*magic, unsigned length) {
+sys::IdentifyFileType(const char *magic, unsigned length) {
    assert(magic && "Invalid magic number string");
    assert(length >=4 && "Invalid magic number length");
    switch (magic[0]) {
+    case 0xDE:  // 0x0B17C0DE = BC wraper
+      if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+          magic[3] == (char)0x0B)
+        return Bitcode_FileType;
+      break;
      case 'B':
        if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
          return Bitcode_FileType;
author	Chris Lattner <sabre@nondot.org>
	Wed, 9 Jul 2008 05:14:23 +0000 (05:14 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Wed, 9 Jul 2008 05:14:23 +0000 (05:14 +0000)
docs/BitCodeFormat.html		patch \| blob \| history
include/llvm/Bitcode/BitstreamWriter.h		patch \| blob \| history
lib/Bitcode/Reader/BitcodeReader.cpp		patch \| blob \| history
lib/Bitcode/Writer/BitcodeWriter.cpp		patch \| blob \| history
lib/System/Path.cpp		patch \| blob \| history