lib/Support/Compressor.cpp

   1 //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by Reid Spencer and is distributed under the
   6 // University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the llvm::Compressor class, an abstraction for memory
  11 // block compression.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/Config/config.h"
  16 #include "llvm/Support/Compressor.h"
  17 #include "llvm/ADT/StringExtras.h"
  18 #include <cassert>
  19 #include <string>
  20 #include <ostream>
  21 #include "bzip2/bzlib.h"
  22 using namespace llvm;
  23
  24 enum CompressionTypes {
  25   COMP_TYPE_NONE  = '0',
  26   COMP_TYPE_BZIP2 = '2',
  27 };
  28
  29 static int getdata(char*& buffer, size_t &size,
  30                    llvm::Compressor::OutputDataCallback* cb, void* context) {
  31   buffer = 0;
  32   size = 0;
  33   int result = (*cb)(buffer, size, context);
  34   assert(buffer != 0 && "Invalid result from Compressor callback");
  35   assert(size != 0 && "Invalid result from Compressor callback");
  36   return result;
  37 }
  38
  39 static int getdata_uns(char*& buffer, unsigned &size,
  40                        llvm::Compressor::OutputDataCallback* cb, void* context) {
  41   size_t SizeOut;
  42   int Res = getdata(buffer, SizeOut, cb, context);
  43   size = SizeOut;
  44   return Res;
  45 }
  46
  47 //===----------------------------------------------------------------------===//
  48 //=== NULLCOMP - a compression like set of routines that just copies data
  49 //===            without doing any compression. This is provided so that if the
  50 //===            configured environment doesn't have a compression library the
  51 //===            program can still work, albeit using more data/memory.
  52 //===----------------------------------------------------------------------===//
  53
  54 struct NULLCOMP_stream {
  55   // User provided fields
  56   char*  next_in;
  57   size_t avail_in;
  58   char*  next_out;
  59   size_t avail_out;
  60
  61   // Information fields
  62   size_t output_count; // Total count of output bytes
  63 };
  64
  65 static void NULLCOMP_init(NULLCOMP_stream* s) {
  66   s->output_count = 0;
  67 }
  68
  69 static bool NULLCOMP_compress(NULLCOMP_stream* s) {
  70   assert(s && "Invalid NULLCOMP_stream");
  71   assert(s->next_in != 0);
  72   assert(s->next_out != 0);
  73   assert(s->avail_in >= 1);
  74   assert(s->avail_out >= 1);
  75
  76   if (s->avail_out >= s->avail_in) {
  77     ::memcpy(s->next_out, s->next_in, s->avail_in);
  78     s->output_count += s->avail_in;
  79     s->avail_out -= s->avail_in;
  80     s->next_in += s->avail_in;
  81     s->avail_in = 0;
  82     return true;
  83   } else {
  84     ::memcpy(s->next_out, s->next_in, s->avail_out);
  85     s->output_count += s->avail_out;
  86     s->avail_in -= s->avail_out;
  87     s->next_in += s->avail_out;
  88     s->avail_out = 0;
  89     return false;
  90   }
  91 }
  92
  93 static bool NULLCOMP_decompress(NULLCOMP_stream* s) {
  94   assert(s && "Invalid NULLCOMP_stream");
  95   assert(s->next_in != 0);
  96   assert(s->next_out != 0);
  97   assert(s->avail_in >= 1);
  98   assert(s->avail_out >= 1);
  99
 100   if (s->avail_out >= s->avail_in) {
 101     ::memcpy(s->next_out, s->next_in, s->avail_in);
 102     s->output_count += s->avail_in;
 103     s->avail_out -= s->avail_in;
 104     s->next_in += s->avail_in;
 105     s->avail_in = 0;
 106     return true;
 107   } else {
 108     ::memcpy(s->next_out, s->next_in, s->avail_out);
 109     s->output_count += s->avail_out;
 110     s->avail_in -= s->avail_out;
 111     s->next_in += s->avail_out;
 112     s->avail_out = 0;
 113     return false;
 114   }
 115 }
 116
 117 static void NULLCOMP_end(NULLCOMP_stream* strm) {
 118 }
 119
 120 namespace {
 121
 122 /// This structure is only used when a bytecode file is compressed.
 123 /// As bytecode is being decompressed, the memory buffer might need
 124 /// to be reallocated. The buffer allocation is handled in a callback
 125 /// and this structure is needed to retain information across calls
 126 /// to the callback.
 127 /// @brief An internal buffer object used for handling decompression
 128 struct BufferContext {
 129   char* buff;
 130   size_t size;
 131   BufferContext(size_t compressedSize) {
 132     // Null to indicate malloc of a new block
 133     buff = 0;
 134
 135     // Compute the initial length of the uncompression buffer. Note that this
 136     // is twice the length of the compressed buffer and will be doubled again
 137     // in the callback for an initial allocation of 4x compressedSize.  This
 138     // calculation is based on the typical compression ratio of bzip2 on LLVM
 139     // bytecode files which typically ranges in the 50%-75% range.   Since we
 140     // typically get at least 50%, doubling is insufficient. By using a 4x
 141     // multiplier on the first allocation, we minimize the impact of having to
 142     // copy the buffer on reallocation.
 143     size = compressedSize*2;
 144   }
 145
 146   /// trimTo - Reduce the size of the buffer down to the specified amount.  This
 147   /// is useful after have read in the bytecode file to discard extra unused
 148   /// memory.
 149   ///
 150   void trimTo(size_t NewSize) {
 151     buff = (char*)::realloc(buff, NewSize);
 152     size = NewSize;
 153   }
 154
 155   /// This function handles allocation of the buffer used for decompression of
 156   /// compressed bytecode files. It is called by Compressor::decompress which is
 157   /// called by BytecodeReader::ParseBytecode.
 158   static size_t callback(char*&buff, size_t &sz, void* ctxt){
 159     // Case the context variable to our BufferContext
 160     BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
 161
 162     // Compute the new, doubled, size of the block
 163     size_t new_size = bc->size * 2;
 164
 165     // Extend or allocate the block (realloc(0,n) == malloc(n))
 166     char* new_buff = (char*) ::realloc(bc->buff, new_size);
 167
 168     // Figure out what to return to the Compressor. If this is the first call,
 169     // then bc->buff will be null. In this case we want to return the entire
 170     // buffer because there was no previous allocation.  Otherwise, when the
 171     // buffer is reallocated, we save the new base pointer in the
 172     // BufferContext.buff field but return the address of only the extension,
 173     // mid-way through the buffer (since its size was doubled). Furthermore,
 174     // the sz result must be 1/2 the total size of the buffer.
 175     if (bc->buff == 0 ) {
 176       buff = bc->buff = new_buff;
 177       sz = new_size;
 178     } else {
 179       bc->buff = new_buff;
 180       buff = new_buff + bc->size;
 181       sz = bc->size;
 182     }
 183
 184     // Retain the size of the allocated block
 185     bc->size = new_size;
 186
 187     // Make sure we fail (return 1) if we didn't get any memory.
 188     return (bc->buff == 0 ? 1 : 0);
 189   }
 190 };
 191
 192 } // end anonymous namespace
 193
 194
 195 namespace {
 196
 197 // This structure retains the context when compressing the bytecode file. The
 198 // WriteCompressedData function below uses it to keep track of the previously
 199 // filled chunk of memory (which it writes) and how many bytes have been
 200 // written.
 201 struct WriterContext {
 202   // Initialize the context
 203   WriterContext(std::ostream*OS, size_t CS)
 204     : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
 205
 206   // Make sure we clean up memory
 207   ~WriterContext() {
 208     if (chunk)
 209       delete [] chunk;
 210   }
 211
 212   // Write the chunk
 213   void write(size_t size = 0) {
 214     size_t write_size = (size == 0 ? sz : size);
 215     Out->write(chunk,write_size);
 216     written += write_size;
 217     delete [] chunk;
 218     chunk = 0;
 219     sz = 0;
 220   }
 221
 222   // This function is a callback used by the Compressor::compress function to
 223   // allocate memory for the compression buffer. This function fulfills that
 224   // responsibility but also writes the previous (now filled) buffer out to the
 225   // stream.
 226   static size_t callback(char*& buffer, size_t &size, void* context) {
 227     // Cast the context to the structure it must point to.
 228     WriterContext* ctxt = reinterpret_cast<WriterContext*>(context);
 229
 230     // If there's a previously allocated chunk, it must now be filled with
 231     // compressed data, so we write it out and deallocate it.
 232     if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
 233       ctxt->write();
 234     }
 235
 236     // Compute the size of the next chunk to allocate. We attempt to allocate
 237     // enough memory to handle the compression in a single memory allocation. In
 238     // general, the worst we do on compression of bytecode is about 50% so we
 239     // conservatively estimate compSize / 2 as the size needed for the
 240     // compression buffer. compSize is the size of the compressed data, provided
 241     // by WriteBytecodeToFile.
 242     size = ctxt->sz = ctxt->compSize / 2;
 243
 244     // Allocate the chunks
 245     buffer = ctxt->chunk = new char [size];
 246
 247     // We must return 1 if the allocation failed so that the Compressor knows
 248     // not to use the buffer pointer.
 249     return (ctxt->chunk == 0 ? 1 : 0);
 250   }
 251
 252   char* chunk;       // pointer to the chunk of memory filled by compression
 253   size_t sz;         // size of chunk
 254   size_t written;    // aggregate total of bytes written in all chunks
 255   size_t compSize;   // size of the uncompressed buffer
 256   std::ostream* Out; // The stream we write the data to.
 257 };
 258
 259 }  // end anonymous namespace
 260
 261 // Compress in one of three ways
 262 size_t Compressor::compress(const char* in, size_t size,
 263                             OutputDataCallback* cb, void* context) {
 264   assert(in && "Can't compress null buffer");
 265   assert(size && "Can't compress empty buffer");
 266   assert(cb && "Can't compress without a callback function");
 267
 268   size_t result = 0;
 269
 270   // For small files, we just don't bother compressing. bzip2 isn't very good
 271   // with tiny files and can actually make the file larger, so we just avoid
 272   // it altogether.
 273   if (size > 64*1024) {
 274     // Set up the bz_stream
 275     bz_stream bzdata;
 276     bzdata.bzalloc = 0;
 277     bzdata.bzfree = 0;
 278     bzdata.opaque = 0;
 279     bzdata.next_in = (char*)in;
 280     bzdata.avail_in = size;
 281     bzdata.next_out = 0;
 282     bzdata.avail_out = 0;
 283     switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) {
 284       case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
 285       case BZ_PARAM_ERROR:  throw std::string("Compressor internal error");
 286       case BZ_MEM_ERROR:    throw std::string("Out of memory");
 287       case BZ_OK:
 288       default:
 289         break;
 290     }
 291
 292     // Get a block of memory
 293     if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
 294       BZ2_bzCompressEnd(&bzdata);
 295       throw std::string("Can't allocate output buffer");
 296     }
 297
 298     // Put compression code in first byte
 299     (*bzdata.next_out++) = COMP_TYPE_BZIP2;
 300     bzdata.avail_out--;
 301
 302     // Compress it
 303     int bzerr = BZ_FINISH_OK;
 304     while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
 305       if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
 306         BZ2_bzCompressEnd(&bzdata);
 307         throw std::string("Can't allocate output buffer");
 308       }
 309     }
 310     switch (bzerr) {
 311       case BZ_SEQUENCE_ERROR:
 312       case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
 313       case BZ_FINISH_OK:
 314       case BZ_STREAM_END: break;
 315       default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
 316     }
 317
 318     // Finish
 319     result = bzdata.total_out_lo32 + 1;
 320     if (sizeof(size_t) == sizeof(uint64_t))
 321       result |= static_cast<uint64_t>(bzdata.total_out_hi32) << 32;
 322
 323     BZ2_bzCompressEnd(&bzdata);
 324   } else {
 325     // Do null compression, for small files
 326     NULLCOMP_stream sdata;
 327     sdata.next_in = (char*)in;
 328     sdata.avail_in = size;
 329     NULLCOMP_init(&sdata);
 330
 331     if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
 332       throw std::string("Can't allocate output buffer");
 333     }
 334
 335     *(sdata.next_out++) = COMP_TYPE_NONE;
 336     sdata.avail_out--;
 337
 338     while (!NULLCOMP_compress(&sdata)) {
 339       if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
 340         throw std::string("Can't allocate output buffer");
 341       }
 342     }
 343
 344     result = sdata.output_count + 1;
 345     NULLCOMP_end(&sdata);
 346   }
 347   return result;
 348 }
 349
 350 size_t Compressor::compressToNewBuffer(const char* in, size_t size, char*&out) {
 351   BufferContext bc(size);
 352   size_t result = compress(in,size,BufferContext::callback,(void*)&bc);
 353   bc.trimTo(result);
 354   out = bc.buff;
 355   return result;
 356 }
 357
 358 size_t
 359 Compressor::compressToStream(const char*in, size_t size, std::ostream& out) {
 360   // Set up the context and writer
 361   WriterContext ctxt(&out, size / 2);
 362
 363   // Compress everything after the magic number (which we'll alter).
 364   size_t zipSize = Compressor::compress(in,size,
 365     WriterContext::callback, (void*)&ctxt);
 366
 367   if (ctxt.chunk) {
 368     ctxt.write(zipSize - ctxt.written);
 369   }
 370   return zipSize;
 371 }
 372
 373 // Decompress in one of three ways
 374 size_t Compressor::decompress(const char *in, size_t size,
 375                               OutputDataCallback* cb, void* context) {
 376   assert(in && "Can't decompress null buffer");
 377   assert(size > 1 && "Can't decompress empty buffer");
 378   assert(cb && "Can't decompress without a callback function");
 379
 380   size_t result = 0;
 381
 382   switch (*in++) {
 383     case COMP_TYPE_BZIP2: {
 384       // Set up the bz_stream
 385       bz_stream bzdata;
 386       bzdata.bzalloc = 0;
 387       bzdata.bzfree = 0;
 388       bzdata.opaque = 0;
 389       bzdata.next_in = (char*)in;
 390       bzdata.avail_in = size - 1;
 391       bzdata.next_out = 0;
 392       bzdata.avail_out = 0;
 393       switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
 394         case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
 395         case BZ_PARAM_ERROR:  throw std::string("Compressor internal error");
 396         case BZ_MEM_ERROR:    throw std::string("Out of memory");
 397         case BZ_OK:
 398         default:
 399           break;
 400       }
 401
 402       // Get a block of memory
 403       if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
 404         BZ2_bzDecompressEnd(&bzdata);
 405         throw std::string("Can't allocate output buffer");
 406       }
 407
 408       // Decompress it
 409       int bzerr = BZ_OK;
 410       while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) {
 411         if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
 412           BZ2_bzDecompressEnd(&bzdata);
 413           throw std::string("Can't allocate output buffer");
 414         }
 415       }
 416
 417       switch (bzerr) {
 418         case BZ_PARAM_ERROR:  throw std::string("Compressor internal error");
 419         case BZ_MEM_ERROR:    throw std::string("Out of memory");
 420         case BZ_DATA_ERROR:   throw std::string("Data integrity error");
 421         case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
 422         default: throw("Ooops");
 423         case BZ_STREAM_END:
 424           break;
 425       }
 426
 427       // Finish
 428       result = bzdata.total_out_lo32;
 429       if (sizeof(size_t) == sizeof(uint64_t))
 430         result |= (static_cast<uint64_t>(bzdata.total_out_hi32) << 32);
 431       BZ2_bzDecompressEnd(&bzdata);
 432       break;
 433     }
 434
 435     case COMP_TYPE_NONE: {
 436       NULLCOMP_stream sdata;
 437       sdata.next_in = (char*)in;
 438       sdata.avail_in = size - 1;
 439       NULLCOMP_init(&sdata);
 440
 441       if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
 442         throw std::string("Can't allocate output buffer");
 443       }
 444
 445       while (!NULLCOMP_decompress(&sdata)) {
 446         if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
 447           throw std::string("Can't allocate output buffer");
 448         }
 449       }
 450
 451       result = sdata.output_count;
 452       NULLCOMP_end(&sdata);
 453       break;
 454     }
 455
 456     default:
 457       throw std::string("Unknown type of compressed data");
 458   }
 459
 460   return result;
 461 }
 462
 463 size_t
 464 Compressor::decompressToNewBuffer(const char* in, size_t size, char*&out) {
 465   BufferContext bc(size);
 466   size_t result = decompress(in,size,BufferContext::callback,(void*)&bc);
 467   out = bc.buff;
 468   return result;
 469 }
 470
 471 size_t
 472 Compressor::decompressToStream(const char*in, size_t size, std::ostream& out){
 473   // Set up the context and writer
 474   WriterContext ctxt(&out,size / 2);
 475
 476   // Compress everything after the magic number (which we'll alter)
 477   size_t zipSize = Compressor::decompress(in,size,
 478     WriterContext::callback, (void*)&ctxt);
 479
 480   if (ctxt.chunk) {
 481     ctxt.write(zipSize - ctxt.written);
 482   }
 483   return zipSize;
 484 }
 485
 486 // vim: sw=2 ai