Add support for compressed bytecode

author Reid Spencer <rspencer@reidspencer.com>

Sat, 6 Nov 2004 23:17:23 +0000 (23:17 +0000)

committer Reid Spencer <rspencer@reidspencer.com>

Sat, 6 Nov 2004 23:17:23 +0000 (23:17 +0000)
author Reid Spencer <rspencer@reidspencer.com>
Sat, 6 Nov 2004 23:17:23 +0000 (23:17 +0000)
committer Reid Spencer <rspencer@reidspencer.com>
Sat, 6 Nov 2004 23:17:23 +0000 (23:17 +0000)
diff --git a/include/llvm/Bytecode/Writer.h b/include/llvm/Bytecode/Writer.h

index ae762b4a8456690b045c1ad08adfd6d39fbfc806..4a6f5f0f413a9f9beb69ca051c5af21b58b5f1d6 100644 (file)
--- a/include/llvm/Bytecode/Writer.h
+++ b/include/llvm/Bytecode/Writer.h
@@ -28,7 +28,8 @@
  
  namespace llvm {
    class Module;
-  void WriteBytecodeToFile(const Module *M, std::ostream &Out);
+  void WriteBytecodeToFile(const Module *M, std::ostream &Out, 
+                           bool compress = false);
  } // End llvm namespace
  
  #endif
diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp

index 3501d8775860dd1713d2a547729d56cbe02da683..bedfa7eea7e1e460bef7698e755329585b47b13e 100644 (file)
--- a/lib/Bytecode/Reader/Reader.cpp
+++ b/lib/Bytecode/Reader/Reader.cpp
@@ -24,6 +24,7 @@
  #include "llvm/SymbolTable.h"
  #include "llvm/Bytecode/Format.h"
  #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/Compressor.h"
  #include "llvm/ADT/StringExtras.h"
  #include <sstream>
  #include <algorithm>
@@ -2152,6 +2153,22 @@ void BytecodeReader::ParseModule() {
      error("Function declared, but bytecode stream ended before definition");
  }
  
+static unsigned GetUncompressionBuffer(char*&buff, unsigned& sz, void* ctxt){
+  BytecodeReader::BufferInfo* bi = 
+    reinterpret_cast<BytecodeReader::BufferInfo*>(ctxt);
+  unsigned new_size = bi->size * 2;
+  if (bi->buff == 0 ) {
+    buff = bi->buff = (char*) malloc(new_size);
+    sz = new_size;
+  } else {
+    bi->buff = (char*) ::realloc(bi->buff, new_size);
+    buff = bi->buff + bi->size;
+    sz = bi->size;
+  }
+  bi->size = new_size;
+  return (bi->buff == 0 ? 1 : 0);
+}
+
  /// This function completely parses a bytecode buffer given by the \p Buf
  /// and \p Length parameters.
  void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length, 
@@ -2167,9 +2184,25 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
      if (Handler) Handler->handleStart(TheModule, Length);
  
      // Read and check signature...
-    unsigned Sig = read_uint();
-    if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
-      error("Invalid bytecode signature: " + utostr(Sig));
+    bool compressed = 
+      (Buf[0] == 0xEC && Buf[1] == 0xEC && Buf[2] == 0xF6 && Buf[3] == 0xED);
+
+    if (compressed) {
+      bi.size = Length * 2;;
+      // Bytecode is compressed, have to decompress it first.
+      unsigned uncompressedLength = Compressor::decompress((char*)Buf+4,Length-4,
+        GetUncompressionBuffer, (void*) &bi);
+
+      At = MemStart = BlockStart = Buf = (BufPtr) bi.buff;
+      MemEnd = BlockEnd = Buf + uncompressedLength;
+
+    } else {
+      if (!(Buf[0] == 'l' && Buf[1] == 'l' && Buf[2] == 'v' && Buf[3] == 'm'))
+        error("Invalid bytecode signature: " + 
+            utohexstr(Buf[0]) + utohexstr(Buf[1]) + utohexstr(Buf[2]) +
+            utohexstr(Buf[3]));
+      else
+        At += 4; // skip the bytes
      }
  
      // Tell the handler we're starting a module
@@ -2215,6 +2248,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
      freeState();
      delete TheModule;
      TheModule = 0;
+    if (bi.buff != 0 )
+      ::free(bi.buff);
      throw;
    } catch (...) {
      std::string msg("Unknown Exception Occurred");
@@ -2222,6 +2257,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
      freeState();
      delete TheModule;
      TheModule = 0;
+    if (bi.buff != 0 )
+      ::free(bi.buff);
      throw msg;
    }
  }
diff --git a/lib/Bytecode/Reader/Reader.h b/lib/Bytecode/Reader/Reader.h

index 89d079d3f67029d4336bd139b6278c9c6d8bdcae..49d81733b8a12a41fd1109882b929232f676abfb 100644 (file)
--- a/lib/Bytecode/Reader/Reader.h
+++ b/lib/Bytecode/Reader/Reader.h
@@ -47,10 +47,14 @@ public:
    BytecodeReader( 
      BytecodeHandler* h = 0
    ) { 
-    Handler = h; 
+    Handler = h;
    }
  
-  ~BytecodeReader() { freeState(); }
+  ~BytecodeReader() { 
+    freeState(); 
+    if (bi.buff != 0)
+      ::free(bi.buff);
+  }
  
  /// @}
  /// @name Types
@@ -63,6 +67,13 @@ public:
    /// @brief The type used for a vector of potentially abstract types
    typedef std::vector<PATypeHolder> TypeListTy;
  
+  /// @brief An internal buffer object used for handling decompression
+  struct BufferInfo {
+    char* buff;
+    unsigned size;
+    BufferInfo() { buff = 0; size = 0; }
+  };
+
    /// This type provides a vector of Value* via the User class for
    /// storage of Values that have been constructed when reading the
    /// bytecode. Because of forward referencing, constant replacement
@@ -235,6 +246,8 @@ protected:
  /// @name Data
  /// @{
  private:
+  BufferInfo bi;      ///< Buffer info for decompression
+
    BufPtr MemStart;     ///< Start of the memory buffer
    BufPtr MemEnd;       ///< End of the memory buffer
    BufPtr BlockStart;   ///< Start of current block being parsed
diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp

index 4d988f969b6b155796aed6491371678d3a16caae..15d605111e8689275c262159416b688f72086a45 100644 (file)
--- a/lib/Bytecode/Writer/Writer.cpp
+++ b/lib/Bytecode/Writer/Writer.cpp
@@ -25,6 +25,7 @@
  #include "llvm/Module.h"
  #include "llvm/SymbolTable.h"
  #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/Compressor.h"
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/ADT/Statistic.h"
  #include <cstring>
@@ -1085,36 +1086,92 @@ void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) {
    }
  }
  
-void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out) {
+struct CompressionContext {
+  char* chunk;
+  unsigned sz;
+  unsigned written;
+  std::ostream* Out;
+};
+
+static unsigned WriteCompressedData(char*&buffer, unsigned& size, void* context) {
+  CompressionContext* ctxt = reinterpret_cast<CompressionContext*>(context);
+  if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
+    ctxt->Out->write(ctxt->chunk,ctxt->sz);
+    delete [] ctxt->chunk;
+    ctxt->written += ctxt->sz;
+  }
+  size = ctxt->sz = 1024*1024;
+  buffer = ctxt->chunk = new char [ctxt->sz];
+  return (ctxt->chunk == 0 ? 1 : 0);
+}
+
+void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out,
+                               bool compress ) {
    assert(M && "You can't write a null module!!");
  
+  // Create a vector of unsigned char for the bytecode output. We
+  // reserve 256KBytes of space in the vector so that we avoid doing
+  // lots of little allocations. 256KBytes is sufficient for a large
+  // proportion of the bytecode files we will encounter. Larger files
+  // will be automatically doubled in size as needed (std::vector
+  // behavior).
    std::vector<unsigned char> Buffer;
-  Buffer.reserve(64 * 1024); // avoid lots of little reallocs
+  Buffer.reserve(256 * 1024);
  
-  // This object populates buffer for us...
+  // The BytecodeWriter populates Buffer for us.
    BytecodeWriter BCW(Buffer, M);
  
-  // Keep track of how much we've written...
+  // Keep track of how much we've written
    BytesWritten += Buffer.size();
  
-  // Okay, write the deque out to the ostream now... the deque is not
-  // sequential in memory, however, so write out as much as possible in big
-  // chunks, until we're done.
-  //
-  for (std::vector<unsigned char>::const_iterator I = Buffer.begin(),
-         E = Buffer.end(); I != E; ) {
-    // Scan to see how big this chunk is...
-    const unsigned char *ChunkPtr = &*I;
-    const unsigned char *LastPtr = ChunkPtr;
-    while (I != E) {
-      const unsigned char *ThisPtr = &*++I;
-      if (++LastPtr != ThisPtr) // Advanced by more than a byte of memory?
-        break;
+  // Determine start and end points of the Buffer
+  std::vector<unsigned char>::iterator I = Buffer.begin();
+  const unsigned char *FirstByte = &(*I);
+  const unsigned char *LastByte = FirstByte + Buffer.size();
+
+  // If we're supposed to compress this mess ...
+  if (compress) {
+
+    // We signal compression by using an alternate magic number for the
+    // file. The compressed bytecode file's magic number is the same as
+    // the uncompressed one but with the high bits set. So, "llvm", which
+    // is 0x6C 0x6C 0x76 0x6D becomes 0xEC 0xEC 0xF6 0xED
+    unsigned char compressed_magic[4];
+    compressed_magic[0] = 0xEC; // 'l' + 0x80
+    compressed_magic[1] = 0xEC; // 'l' + 0x80
+    compressed_magic[2] = 0xF6; // 'v' + 0x80
+    compressed_magic[3] = 0xED; // 'm' + 0x80
+
+    Out.write((char*)compressed_magic,4);
+
+    // Do the compression, writing as we go.
+    CompressionContext ctxt;
+    ctxt.chunk = 0;
+    ctxt.sz = 0;
+    ctxt.written = 0;
+    ctxt.Out = &Out;
+
+    // Compress everything after the magic number (which we'll alter)
+    uint64_t zipSize = Compressor::compress(
+      (char*)(FirstByte+4),        // Skip the magic number
+      Buffer.size()-4,             // Skip the magic number
+      WriteCompressedData,         // use this function to allocate / write
+      Compressor::COMP_TYPE_BZIP2, // Try bzip2 compression first
+      (void*)&ctxt                 // Keep track of allocated memory
+    );
+
+    if (ctxt.chunk && ctxt.sz > 0) {
+      Out.write(ctxt.chunk, zipSize - ctxt.written);
+      delete [] ctxt.chunk;
      }
-    
-    // Write out the chunk...
-    Out.write((char*)ChunkPtr, unsigned(LastPtr-ChunkPtr));
+  } else {
+
+    // We're not compressing, so just write the entire block.
+    Out.write((char*)FirstByte, LastByte-FirstByte);
+
    }
+
+  // make sure it hits disk now
    Out.flush();
  }
  
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp

index 1d48f7a7fa12c680f27c6cb7648f20330999da05..edefe69c613abb0eed7a2a33a449804ce3e3f062 100644 (file)
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -40,6 +40,9 @@ Force("f", cl::desc("Overwrite output files"));
  static cl::opt<bool>
  DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
  
+static cl::opt<bool> Compress("compress", cl::Optional,
+       cl::desc("Compress the generated bytecode"));
+
  static cl::opt<bool>
  DisableVerify("disable-verify", cl::Hidden,
                cl::desc("Do not run verifier on input LLVM (dangerous!)"));
@@ -119,7 +122,7 @@ int main(int argc, char **argv) {
        return 1;
      }
     
-    WriteBytecodeToFile(M.get(), *Out);
+    WriteBytecodeToFile(M.get(), *Out, Compress);
    } catch (const ParseException &E) {
      std::cerr << argv[0] << ": " << E.getMessage() << "\n";
      return 1;
author	Reid Spencer <rspencer@reidspencer.com>
	Sat, 6 Nov 2004 23:17:23 +0000 (23:17 +0000)
committer	Reid Spencer <rspencer@reidspencer.com>
	Sat, 6 Nov 2004 23:17:23 +0000 (23:17 +0000)
include/llvm/Bytecode/Writer.h		patch \| blob \| history
lib/Bytecode/Reader/Reader.cpp		patch \| blob \| history
lib/Bytecode/Reader/Reader.h		patch \| blob \| history
lib/Bytecode/Writer/Writer.cpp		patch \| blob \| history
tools/llvm-as/llvm-as.cpp		patch \| blob \| history