include/llvm/Bitcode/BitstreamWriter.h

   1 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This header defines the BitstreamWriter class.  This class can be used to
  11 // write an arbitrary bitstream, regardless of its contents.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef BITSTREAM_WRITER_H
  16 #define BITSTREAM_WRITER_H
  17
  18 #include "llvm/Bitcode/BitCodes.h"
  19 #include <vector>
  20
  21 namespace llvm {
  22
  23 class BitstreamWriter {
  24   std::vector<unsigned char> &Out;
  25
  26   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
  27   unsigned CurBit;
  28
  29   /// CurValue - The current value.  Only bits < CurBit are valid.
  30   uint32_t CurValue;
  31
  32   /// CurCodeSize - This is the declared size of code values used for the
  33   /// current block, in bits.
  34   unsigned CurCodeSize;
  35
  36   /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
  37   /// selected BLOCK ID.
  38   unsigned BlockInfoCurBID;
  39
  40   /// CurAbbrevs - Abbrevs installed at in this block.
  41   std::vector<BitCodeAbbrev*> CurAbbrevs;
  42
  43   struct Block {
  44     unsigned PrevCodeSize;
  45     unsigned StartSizeWord;
  46     std::vector<BitCodeAbbrev*> PrevAbbrevs;
  47     Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
  48   };
  49
  50   /// BlockScope - This tracks the current blocks that we have entered.
  51   std::vector<Block> BlockScope;
  52
  53   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
  54   /// These describe abbreviations that all blocks of the specified ID inherit.
  55   struct BlockInfo {
  56     unsigned BlockID;
  57     std::vector<BitCodeAbbrev*> Abbrevs;
  58   };
  59   std::vector<BlockInfo> BlockInfoRecords;
  60
  61 public:
  62   explicit BitstreamWriter(std::vector<unsigned char> &O)
  63     : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
  64
  65   ~BitstreamWriter() {
  66     assert(CurBit == 0 && "Unflused data remaining");
  67     assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
  68
  69     // Free the BlockInfoRecords.
  70     while (!BlockInfoRecords.empty()) {
  71       BlockInfo &Info = BlockInfoRecords.back();
  72       // Free blockinfo abbrev info.
  73       for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
  74            i != e; ++i)
  75         Info.Abbrevs[i]->dropRef();
  76       BlockInfoRecords.pop_back();
  77     }
  78   }
  79
  80   std::vector<unsigned char> &getBuffer() { return Out; }
  81
  82   //===--------------------------------------------------------------------===//
  83   // Basic Primitives for emitting bits to the stream.
  84   //===--------------------------------------------------------------------===//
  85
  86   void Emit(uint32_t Val, unsigned NumBits) {
  87     assert(NumBits <= 32 && "Invalid value size!");
  88     assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
  89     CurValue |= Val << CurBit;
  90     if (CurBit + NumBits < 32) {
  91       CurBit += NumBits;
  92       return;
  93     }
  94
  95     // Add the current word.
  96     unsigned V = CurValue;
  97     Out.push_back((unsigned char)(V >>  0));
  98     Out.push_back((unsigned char)(V >>  8));
  99     Out.push_back((unsigned char)(V >> 16));
 100     Out.push_back((unsigned char)(V >> 24));
 101
 102     if (CurBit)
 103       CurValue = Val >> (32-CurBit);
 104     else
 105       CurValue = 0;
 106     CurBit = (CurBit+NumBits) & 31;
 107   }
 108
 109   void Emit64(uint64_t Val, unsigned NumBits) {
 110     if (NumBits <= 32)
 111       Emit((uint32_t)Val, NumBits);
 112     else {
 113       Emit((uint32_t)Val, 32);
 114       Emit((uint32_t)(Val >> 32), NumBits-32);
 115     }
 116   }
 117
 118   void FlushToWord() {
 119     if (CurBit) {
 120       unsigned V = CurValue;
 121       Out.push_back((unsigned char)(V >>  0));
 122       Out.push_back((unsigned char)(V >>  8));
 123       Out.push_back((unsigned char)(V >> 16));
 124       Out.push_back((unsigned char)(V >> 24));
 125       CurBit = 0;
 126       CurValue = 0;
 127     }
 128   }
 129
 130   void EmitVBR(uint32_t Val, unsigned NumBits) {
 131     uint32_t Threshold = 1U << (NumBits-1);
 132
 133     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 134     while (Val >= Threshold) {
 135       Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
 136       Val >>= NumBits-1;
 137     }
 138
 139     Emit(Val, NumBits);
 140   }
 141
 142   void EmitVBR64(uint64_t Val, unsigned NumBits) {
 143     if ((uint32_t)Val == Val)
 144       return EmitVBR((uint32_t)Val, NumBits);
 145
 146     uint64_t Threshold = 1U << (NumBits-1);
 147
 148     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 149     while (Val >= Threshold) {
 150       Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
 151            (1 << (NumBits-1)), NumBits);
 152       Val >>= NumBits-1;
 153     }
 154
 155     Emit((uint32_t)Val, NumBits);
 156   }
 157
 158   /// EmitCode - Emit the specified code.
 159   void EmitCode(unsigned Val) {
 160     Emit(Val, CurCodeSize);
 161   }
 162
 163   // BackpatchWord - Backpatch a 32-bit word in the output with the specified
 164   // value.
 165   void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
 166     Out[ByteNo++] = (unsigned char)(NewWord >>  0);
 167     Out[ByteNo++] = (unsigned char)(NewWord >>  8);
 168     Out[ByteNo++] = (unsigned char)(NewWord >> 16);
 169     Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
 170   }
 171
 172   //===--------------------------------------------------------------------===//
 173   // Block Manipulation
 174   //===--------------------------------------------------------------------===//
 175
 176   /// getBlockInfo - If there is block info for the specified ID, return it,
 177   /// otherwise return null.
 178   BlockInfo *getBlockInfo(unsigned BlockID) {
 179     // Common case, the most recent entry matches BlockID.
 180     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
 181       return &BlockInfoRecords.back();
 182
 183     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
 184          i != e; ++i)
 185       if (BlockInfoRecords[i].BlockID == BlockID)
 186         return &BlockInfoRecords[i];
 187     return 0;
 188   }
 189
 190   void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
 191     // Block header:
 192     //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 193     EmitCode(bitc::ENTER_SUBBLOCK);
 194     EmitVBR(BlockID, bitc::BlockIDWidth);
 195     EmitVBR(CodeLen, bitc::CodeLenWidth);
 196     FlushToWord();
 197
 198     unsigned BlockSizeWordLoc = static_cast<unsigned>(Out.size());
 199     unsigned OldCodeSize = CurCodeSize;
 200
 201     // Emit a placeholder, which will be replaced when the block is popped.
 202     Emit(0, bitc::BlockSizeWidth);
 203
 204     CurCodeSize = CodeLen;
 205
 206     // Push the outer block's abbrev set onto the stack, start out with an
 207     // empty abbrev set.
 208     BlockScope.push_back(Block(OldCodeSize, BlockSizeWordLoc/4));
 209     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 210
 211     // If there is a blockinfo for this BlockID, add all the predefined abbrevs
 212     // to the abbrev list.
 213     if (BlockInfo *Info = getBlockInfo(BlockID)) {
 214       for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
 215            i != e; ++i) {
 216         CurAbbrevs.push_back(Info->Abbrevs[i]);
 217         Info->Abbrevs[i]->addRef();
 218       }
 219     }
 220   }
 221
 222   void ExitBlock() {
 223     assert(!BlockScope.empty() && "Block scope imbalance!");
 224
 225     // Delete all abbrevs.
 226     for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
 227          i != e; ++i)
 228       CurAbbrevs[i]->dropRef();
 229
 230     const Block &B = BlockScope.back();
 231
 232     // Block tail:
 233     //    [END_BLOCK, <align4bytes>]
 234     EmitCode(bitc::END_BLOCK);
 235     FlushToWord();
 236
 237     // Compute the size of the block, in words, not counting the size field.
 238     unsigned SizeInWords= static_cast<unsigned>(Out.size())/4-B.StartSizeWord-1;
 239     unsigned ByteNo = B.StartSizeWord*4;
 240
 241     // Update the block size field in the header of this sub-block.
 242     BackpatchWord(ByteNo, SizeInWords);
 243
 244     // Restore the inner block's code size and abbrev table.
 245     CurCodeSize = B.PrevCodeSize;
 246     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 247     BlockScope.pop_back();
 248   }
 249
 250   //===--------------------------------------------------------------------===//
 251   // Record Emission
 252   //===--------------------------------------------------------------------===//
 253
 254 private:
 255   /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
 256   /// record.  This is a no-op, since the abbrev specifies the literal to use.
 257   template<typename uintty>
 258   void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
 259     assert(Op.isLiteral() && "Not a literal");
 260     // If the abbrev specifies the literal value to use, don't emit
 261     // anything.
 262     assert(V == Op.getLiteralValue() &&
 263            "Invalid abbrev for record!");
 264   }
 265
 266   /// EmitAbbreviatedField - Emit a single scalar field value with the specified
 267   /// encoding.
 268   template<typename uintty>
 269   void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
 270     assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
 271
 272     // Encode the value as we are commanded.
 273     switch (Op.getEncoding()) {
 274     default: assert(0 && "Unknown encoding!");
 275     case BitCodeAbbrevOp::Fixed:
 276       Emit((unsigned)V, (unsigned)Op.getEncodingData());
 277       break;
 278     case BitCodeAbbrevOp::VBR:
 279       EmitVBR64(V, (unsigned)Op.getEncodingData());
 280       break;
 281     case BitCodeAbbrevOp::Char6:
 282       Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
 283       break;
 284     }
 285   }
 286
 287   /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
 288   /// emission code.  If BlobData is non-null, then it specifies an array of
 289   /// data that should be emitted as part of the Blob or Array operand that is
 290   /// known to exist at the end of the the record.
 291   template<typename uintty>
 292   void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 293                                 const char *BlobData, unsigned BlobLen) {
 294     unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
 295     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
 296     BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo];
 297
 298     EmitCode(Abbrev);
 299
 300     unsigned RecordIdx = 0;
 301     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 302          i != e; ++i) {
 303       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 304       if (Op.isLiteral()) {
 305         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 306         EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
 307         ++RecordIdx;
 308       } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
 309         // Array case.
 310         assert(i+2 == e && "array op not second to last?");
 311         const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
 312
 313         // If this record has blob data, emit it, otherwise we must have record
 314         // entries to encode this way.
 315         if (BlobData) {
 316           assert(RecordIdx == Vals.size() &&
 317                  "Blob data and record entries specified for array!");
 318           // Emit a vbr6 to indicate the number of elements present.
 319           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 320
 321           // Emit each field.
 322           for (unsigned i = 0; i != BlobLen; ++i)
 323             EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
 324
 325           // Know that blob data is consumed for assertion below.
 326           BlobData = 0;
 327         } else {
 328           // Emit a vbr6 to indicate the number of elements present.
 329           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 330
 331           // Emit each field.
 332           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
 333             EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
 334         }
 335       } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
 336         // If this record has blob data, emit it, otherwise we must have record
 337         // entries to encode this way.
 338
 339         // Emit a vbr6 to indicate the number of elements present.
 340         if (BlobData) {
 341           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 342           assert(RecordIdx == Vals.size() &&
 343                  "Blob data and record entries specified for blob operand!");
 344         } else {
 345           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 346         }
 347
 348         // Flush to a 32-bit alignment boundary.
 349         FlushToWord();
 350         assert((Out.size() & 3) == 0 && "Not 32-bit aligned");
 351
 352         // Emit each field as a literal byte.
 353         if (BlobData) {
 354           for (unsigned i = 0; i != BlobLen; ++i)
 355             Out.push_back((unsigned char)BlobData[i]);
 356
 357           // Know that blob data is consumed for assertion below.
 358           BlobData = 0;
 359         } else {
 360           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) {
 361             assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob");
 362             Out.push_back((unsigned char)Vals[RecordIdx]);
 363           }
 364         }
 365         // Align end to 32-bits.
 366         while (Out.size() & 3)
 367           Out.push_back(0);
 368
 369       } else {  // Single scalar field.
 370         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 371         EmitAbbreviatedField(Op, Vals[RecordIdx]);
 372         ++RecordIdx;
 373       }
 374     }
 375     assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
 376     assert(BlobData == 0 &&
 377            "Blob data specified for record that doesn't use it!");
 378   }
 379
 380 public:
 381
 382   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
 383   /// we have one to compress the output.
 384   template<typename uintty>
 385   void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
 386                   unsigned Abbrev = 0) {
 387     if (!Abbrev) {
 388       // If we don't have an abbrev to use, emit this in its fully unabbreviated
 389       // form.
 390       EmitCode(bitc::UNABBREV_RECORD);
 391       EmitVBR(Code, 6);
 392       EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
 393       for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
 394         EmitVBR64(Vals[i], 6);
 395       return;
 396     }
 397
 398     // Insert the code into Vals to treat it uniformly.
 399     Vals.insert(Vals.begin(), Code);
 400
 401     EmitRecordWithAbbrev(Abbrev, Vals);
 402   }
 403
 404   /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
 405   /// Unlike EmitRecord, the code for the record should be included in Vals as
 406   /// the first entry.
 407   template<typename uintty>
 408   void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
 409     EmitRecordWithAbbrevImpl(Abbrev, Vals, 0, 0);
 410   }
 411
 412   /// EmitRecordWithBlob - Emit the specified record to the stream, using an
 413   /// abbrev that includes a blob at the end.  The blob data to emit is
 414   /// specified by the pointer and length specified at the end.  In contrast to
 415   /// EmitRecord, this routine expects that the first entry in Vals is the code
 416   /// of the record.
 417   template<typename uintty>
 418   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 419                           const char *BlobData, unsigned BlobLen) {
 420     EmitRecordWithAbbrevImpl(Abbrev, Vals, BlobData, BlobLen);
 421   }
 422
 423   /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
 424   /// that end with an array.
 425   template<typename uintty>
 426   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 427                           const char *ArrayData, unsigned ArrayLen) {
 428     EmitRecordWithAbbrevImpl(Abbrev, Vals, ArrayData, ArrayLen);
 429   }
 430
 431   //===--------------------------------------------------------------------===//
 432   // Abbrev Emission
 433   //===--------------------------------------------------------------------===//
 434
 435 private:
 436   // Emit the abbreviation as a DEFINE_ABBREV record.
 437   void EncodeAbbrev(BitCodeAbbrev *Abbv) {
 438     EmitCode(bitc::DEFINE_ABBREV);
 439     EmitVBR(Abbv->getNumOperandInfos(), 5);
 440     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 441          i != e; ++i) {
 442       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 443       Emit(Op.isLiteral(), 1);
 444       if (Op.isLiteral()) {
 445         EmitVBR64(Op.getLiteralValue(), 8);
 446       } else {
 447         Emit(Op.getEncoding(), 3);
 448         if (Op.hasEncodingData())
 449           EmitVBR64(Op.getEncodingData(), 5);
 450       }
 451     }
 452   }
 453 public:
 454
 455   /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
 456   /// method takes ownership of the specified abbrev.
 457   unsigned EmitAbbrev(BitCodeAbbrev *Abbv) {
 458     // Emit the abbreviation as a record.
 459     EncodeAbbrev(Abbv);
 460     CurAbbrevs.push_back(Abbv);
 461     return static_cast<unsigned>(CurAbbrevs.size())-1 +
 462       bitc::FIRST_APPLICATION_ABBREV;
 463   }
 464
 465   //===--------------------------------------------------------------------===//
 466   // BlockInfo Block Emission
 467   //===--------------------------------------------------------------------===//
 468
 469   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
 470   void EnterBlockInfoBlock(unsigned CodeWidth) {
 471     EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
 472     BlockInfoCurBID = -1U;
 473   }
 474 private:
 475   /// SwitchToBlockID - If we aren't already talking about the specified block
 476   /// ID, emit a BLOCKINFO_CODE_SETBID record.
 477   void SwitchToBlockID(unsigned BlockID) {
 478     if (BlockInfoCurBID == BlockID) return;
 479     SmallVector<unsigned, 2> V;
 480     V.push_back(BlockID);
 481     EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V);
 482     BlockInfoCurBID = BlockID;
 483   }
 484
 485   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
 486     if (BlockInfo *BI = getBlockInfo(BlockID))
 487       return *BI;
 488
 489     // Otherwise, add a new record.
 490     BlockInfoRecords.push_back(BlockInfo());
 491     BlockInfoRecords.back().BlockID = BlockID;
 492     return BlockInfoRecords.back();
 493   }
 494
 495 public:
 496
 497   /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
 498   /// BlockID.
 499   unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) {
 500     SwitchToBlockID(BlockID);
 501     EncodeAbbrev(Abbv);
 502
 503     // Add the abbrev to the specified block record.
 504     BlockInfo &Info = getOrCreateBlockInfo(BlockID);
 505     Info.Abbrevs.push_back(Abbv);
 506
 507     return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
 508   }
 509 };
 510
 511
 512 } // End llvm namespace
 513
 514 #endif