include/llvm/Bitcode/BitstreamWriter.h

   1 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This header defines the BitstreamWriter class.  This class can be used to
  11 // write an arbitrary bitstream, regardless of its contents.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef BITSTREAM_WRITER_H
  16 #define BITSTREAM_WRITER_H
  17
  18 #include "llvm/Bitcode/BitCodes.h"
  19 #include <vector>
  20
  21 namespace llvm {
  22
  23 class BitstreamWriter {
  24   std::vector<unsigned char> &Out;
  25
  26   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
  27   unsigned CurBit;
  28
  29   /// CurValue - The current value.  Only bits < CurBit are valid.
  30   uint32_t CurValue;
  31
  32   /// CurCodeSize - This is the declared size of code values used for the
  33   /// current block, in bits.
  34   unsigned CurCodeSize;
  35
  36   /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
  37   /// selected BLOCK ID.
  38   unsigned BlockInfoCurBID;
  39
  40   /// CurAbbrevs - Abbrevs installed at in this block.
  41   std::vector<BitCodeAbbrev*> CurAbbrevs;
  42
  43   struct Block {
  44     unsigned PrevCodeSize;
  45     unsigned StartSizeWord;
  46     std::vector<BitCodeAbbrev*> PrevAbbrevs;
  47     Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
  48   };
  49
  50   /// BlockScope - This tracks the current blocks that we have entered.
  51   std::vector<Block> BlockScope;
  52
  53   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
  54   /// These describe abbreviations that all blocks of the specified ID inherit.
  55   struct BlockInfo {
  56     unsigned BlockID;
  57     std::vector<BitCodeAbbrev*> Abbrevs;
  58   };
  59   std::vector<BlockInfo> BlockInfoRecords;
  60
  61 public:
  62   explicit BitstreamWriter(std::vector<unsigned char> &O)
  63     : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
  64
  65   ~BitstreamWriter() {
  66     assert(CurBit == 0 && "Unflused data remaining");
  67     assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
  68
  69     // Free the BlockInfoRecords.
  70     while (!BlockInfoRecords.empty()) {
  71       BlockInfo &Info = BlockInfoRecords.back();
  72       // Free blockinfo abbrev info.
  73       for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
  74            i != e; ++i)
  75         Info.Abbrevs[i]->dropRef();
  76       BlockInfoRecords.pop_back();
  77     }
  78   }
  79
  80   std::vector<unsigned char> &getBuffer() { return Out; }
  81
  82   /// \brief Retrieve the current position in the stream, in bits.
  83   uint64_t GetCurrentBitNo() const { return Out.size() * 8 + CurBit; }
  84
  85   //===--------------------------------------------------------------------===//
  86   // Basic Primitives for emitting bits to the stream.
  87   //===--------------------------------------------------------------------===//
  88
  89   void Emit(uint32_t Val, unsigned NumBits) {
  90     assert(NumBits <= 32 && "Invalid value size!");
  91     assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
  92     CurValue |= Val << CurBit;
  93     if (CurBit + NumBits < 32) {
  94       CurBit += NumBits;
  95       return;
  96     }
  97
  98     // Add the current word.
  99     unsigned V = CurValue;
 100     Out.push_back((unsigned char)(V >>  0));
 101     Out.push_back((unsigned char)(V >>  8));
 102     Out.push_back((unsigned char)(V >> 16));
 103     Out.push_back((unsigned char)(V >> 24));
 104
 105     if (CurBit)
 106       CurValue = Val >> (32-CurBit);
 107     else
 108       CurValue = 0;
 109     CurBit = (CurBit+NumBits) & 31;
 110   }
 111
 112   void Emit64(uint64_t Val, unsigned NumBits) {
 113     if (NumBits <= 32)
 114       Emit((uint32_t)Val, NumBits);
 115     else {
 116       Emit((uint32_t)Val, 32);
 117       Emit((uint32_t)(Val >> 32), NumBits-32);
 118     }
 119   }
 120
 121   void FlushToWord() {
 122     if (CurBit) {
 123       unsigned V = CurValue;
 124       Out.push_back((unsigned char)(V >>  0));
 125       Out.push_back((unsigned char)(V >>  8));
 126       Out.push_back((unsigned char)(V >> 16));
 127       Out.push_back((unsigned char)(V >> 24));
 128       CurBit = 0;
 129       CurValue = 0;
 130     }
 131   }
 132
 133   void EmitVBR(uint32_t Val, unsigned NumBits) {
 134     uint32_t Threshold = 1U << (NumBits-1);
 135
 136     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 137     while (Val >= Threshold) {
 138       Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
 139       Val >>= NumBits-1;
 140     }
 141
 142     Emit(Val, NumBits);
 143   }
 144
 145   void EmitVBR64(uint64_t Val, unsigned NumBits) {
 146     if ((uint32_t)Val == Val)
 147       return EmitVBR((uint32_t)Val, NumBits);
 148
 149     uint64_t Threshold = 1U << (NumBits-1);
 150
 151     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 152     while (Val >= Threshold) {
 153       Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
 154            (1 << (NumBits-1)), NumBits);
 155       Val >>= NumBits-1;
 156     }
 157
 158     Emit((uint32_t)Val, NumBits);
 159   }
 160
 161   /// EmitCode - Emit the specified code.
 162   void EmitCode(unsigned Val) {
 163     Emit(Val, CurCodeSize);
 164   }
 165
 166   // BackpatchWord - Backpatch a 32-bit word in the output with the specified
 167   // value.
 168   void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
 169     Out[ByteNo++] = (unsigned char)(NewWord >>  0);
 170     Out[ByteNo++] = (unsigned char)(NewWord >>  8);
 171     Out[ByteNo++] = (unsigned char)(NewWord >> 16);
 172     Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
 173   }
 174
 175   //===--------------------------------------------------------------------===//
 176   // Block Manipulation
 177   //===--------------------------------------------------------------------===//
 178
 179   /// getBlockInfo - If there is block info for the specified ID, return it,
 180   /// otherwise return null.
 181   BlockInfo *getBlockInfo(unsigned BlockID) {
 182     // Common case, the most recent entry matches BlockID.
 183     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
 184       return &BlockInfoRecords.back();
 185
 186     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
 187          i != e; ++i)
 188       if (BlockInfoRecords[i].BlockID == BlockID)
 189         return &BlockInfoRecords[i];
 190     return 0;
 191   }
 192
 193   void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
 194     // Block header:
 195     //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 196     EmitCode(bitc::ENTER_SUBBLOCK);
 197     EmitVBR(BlockID, bitc::BlockIDWidth);
 198     EmitVBR(CodeLen, bitc::CodeLenWidth);
 199     FlushToWord();
 200
 201     unsigned BlockSizeWordLoc = static_cast<unsigned>(Out.size());
 202     unsigned OldCodeSize = CurCodeSize;
 203
 204     // Emit a placeholder, which will be replaced when the block is popped.
 205     Emit(0, bitc::BlockSizeWidth);
 206
 207     CurCodeSize = CodeLen;
 208
 209     // Push the outer block's abbrev set onto the stack, start out with an
 210     // empty abbrev set.
 211     BlockScope.push_back(Block(OldCodeSize, BlockSizeWordLoc/4));
 212     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 213
 214     // If there is a blockinfo for this BlockID, add all the predefined abbrevs
 215     // to the abbrev list.
 216     if (BlockInfo *Info = getBlockInfo(BlockID)) {
 217       for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
 218            i != e; ++i) {
 219         CurAbbrevs.push_back(Info->Abbrevs[i]);
 220         Info->Abbrevs[i]->addRef();
 221       }
 222     }
 223   }
 224
 225   void ExitBlock() {
 226     assert(!BlockScope.empty() && "Block scope imbalance!");
 227
 228     // Delete all abbrevs.
 229     for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
 230          i != e; ++i)
 231       CurAbbrevs[i]->dropRef();
 232
 233     const Block &B = BlockScope.back();
 234
 235     // Block tail:
 236     //    [END_BLOCK, <align4bytes>]
 237     EmitCode(bitc::END_BLOCK);
 238     FlushToWord();
 239
 240     // Compute the size of the block, in words, not counting the size field.
 241     unsigned SizeInWords= static_cast<unsigned>(Out.size())/4-B.StartSizeWord-1;
 242     unsigned ByteNo = B.StartSizeWord*4;
 243
 244     // Update the block size field in the header of this sub-block.
 245     BackpatchWord(ByteNo, SizeInWords);
 246
 247     // Restore the inner block's code size and abbrev table.
 248     CurCodeSize = B.PrevCodeSize;
 249     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 250     BlockScope.pop_back();
 251   }
 252
 253   //===--------------------------------------------------------------------===//
 254   // Record Emission
 255   //===--------------------------------------------------------------------===//
 256
 257 private:
 258   /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
 259   /// record.  This is a no-op, since the abbrev specifies the literal to use.
 260   template<typename uintty>
 261   void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
 262     assert(Op.isLiteral() && "Not a literal");
 263     // If the abbrev specifies the literal value to use, don't emit
 264     // anything.
 265     assert(V == Op.getLiteralValue() &&
 266            "Invalid abbrev for record!");
 267   }
 268
 269   /// EmitAbbreviatedField - Emit a single scalar field value with the specified
 270   /// encoding.
 271   template<typename uintty>
 272   void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
 273     assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
 274
 275     // Encode the value as we are commanded.
 276     switch (Op.getEncoding()) {
 277     default: assert(0 && "Unknown encoding!");
 278     case BitCodeAbbrevOp::Fixed:
 279       Emit((unsigned)V, (unsigned)Op.getEncodingData());
 280       break;
 281     case BitCodeAbbrevOp::VBR:
 282       EmitVBR64(V, (unsigned)Op.getEncodingData());
 283       break;
 284     case BitCodeAbbrevOp::Char6:
 285       Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
 286       break;
 287     }
 288   }
 289
 290   /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
 291   /// emission code.  If BlobData is non-null, then it specifies an array of
 292   /// data that should be emitted as part of the Blob or Array operand that is
 293   /// known to exist at the end of the the record.
 294   template<typename uintty>
 295   void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 296                                 const char *BlobData, unsigned BlobLen) {
 297     unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
 298     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
 299     BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo];
 300
 301     EmitCode(Abbrev);
 302
 303     unsigned RecordIdx = 0;
 304     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 305          i != e; ++i) {
 306       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 307       if (Op.isLiteral()) {
 308         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 309         EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
 310         ++RecordIdx;
 311       } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
 312         // Array case.
 313         assert(i+2 == e && "array op not second to last?");
 314         const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
 315
 316         // If this record has blob data, emit it, otherwise we must have record
 317         // entries to encode this way.
 318         if (BlobData) {
 319           assert(RecordIdx == Vals.size() &&
 320                  "Blob data and record entries specified for array!");
 321           // Emit a vbr6 to indicate the number of elements present.
 322           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 323
 324           // Emit each field.
 325           for (unsigned i = 0; i != BlobLen; ++i)
 326             EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
 327
 328           // Know that blob data is consumed for assertion below.
 329           BlobData = 0;
 330         } else {
 331           // Emit a vbr6 to indicate the number of elements present.
 332           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 333
 334           // Emit each field.
 335           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
 336             EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
 337         }
 338       } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
 339         // If this record has blob data, emit it, otherwise we must have record
 340         // entries to encode this way.
 341
 342         // Emit a vbr6 to indicate the number of elements present.
 343         if (BlobData) {
 344           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 345           assert(RecordIdx == Vals.size() &&
 346                  "Blob data and record entries specified for blob operand!");
 347         } else {
 348           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 349         }
 350
 351         // Flush to a 32-bit alignment boundary.
 352         FlushToWord();
 353         assert((Out.size() & 3) == 0 && "Not 32-bit aligned");
 354
 355         // Emit each field as a literal byte.
 356         if (BlobData) {
 357           for (unsigned i = 0; i != BlobLen; ++i)
 358             Out.push_back((unsigned char)BlobData[i]);
 359
 360           // Know that blob data is consumed for assertion below.
 361           BlobData = 0;
 362         } else {
 363           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) {
 364             assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob");
 365             Out.push_back((unsigned char)Vals[RecordIdx]);
 366           }
 367         }
 368         // Align end to 32-bits.
 369         while (Out.size() & 3)
 370           Out.push_back(0);
 371
 372       } else {  // Single scalar field.
 373         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 374         EmitAbbreviatedField(Op, Vals[RecordIdx]);
 375         ++RecordIdx;
 376       }
 377     }
 378     assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
 379     assert(BlobData == 0 &&
 380            "Blob data specified for record that doesn't use it!");
 381   }
 382
 383 public:
 384
 385   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
 386   /// we have one to compress the output.
 387   template<typename uintty>
 388   void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
 389                   unsigned Abbrev = 0) {
 390     if (!Abbrev) {
 391       // If we don't have an abbrev to use, emit this in its fully unabbreviated
 392       // form.
 393       EmitCode(bitc::UNABBREV_RECORD);
 394       EmitVBR(Code, 6);
 395       EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
 396       for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
 397         EmitVBR64(Vals[i], 6);
 398       return;
 399     }
 400
 401     // Insert the code into Vals to treat it uniformly.
 402     Vals.insert(Vals.begin(), Code);
 403
 404     EmitRecordWithAbbrev(Abbrev, Vals);
 405   }
 406
 407   /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
 408   /// Unlike EmitRecord, the code for the record should be included in Vals as
 409   /// the first entry.
 410   template<typename uintty>
 411   void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
 412     EmitRecordWithAbbrevImpl(Abbrev, Vals, 0, 0);
 413   }
 414
 415   /// EmitRecordWithBlob - Emit the specified record to the stream, using an
 416   /// abbrev that includes a blob at the end.  The blob data to emit is
 417   /// specified by the pointer and length specified at the end.  In contrast to
 418   /// EmitRecord, this routine expects that the first entry in Vals is the code
 419   /// of the record.
 420   template<typename uintty>
 421   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 422                           const char *BlobData, unsigned BlobLen) {
 423     EmitRecordWithAbbrevImpl(Abbrev, Vals, BlobData, BlobLen);
 424   }
 425
 426   /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
 427   /// that end with an array.
 428   template<typename uintty>
 429   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 430                           const char *ArrayData, unsigned ArrayLen) {
 431     EmitRecordWithAbbrevImpl(Abbrev, Vals, ArrayData, ArrayLen);
 432   }
 433
 434   //===--------------------------------------------------------------------===//
 435   // Abbrev Emission
 436   //===--------------------------------------------------------------------===//
 437
 438 private:
 439   // Emit the abbreviation as a DEFINE_ABBREV record.
 440   void EncodeAbbrev(BitCodeAbbrev *Abbv) {
 441     EmitCode(bitc::DEFINE_ABBREV);
 442     EmitVBR(Abbv->getNumOperandInfos(), 5);
 443     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 444          i != e; ++i) {
 445       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 446       Emit(Op.isLiteral(), 1);
 447       if (Op.isLiteral()) {
 448         EmitVBR64(Op.getLiteralValue(), 8);
 449       } else {
 450         Emit(Op.getEncoding(), 3);
 451         if (Op.hasEncodingData())
 452           EmitVBR64(Op.getEncodingData(), 5);
 453       }
 454     }
 455   }
 456 public:
 457
 458   /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
 459   /// method takes ownership of the specified abbrev.
 460   unsigned EmitAbbrev(BitCodeAbbrev *Abbv) {
 461     // Emit the abbreviation as a record.
 462     EncodeAbbrev(Abbv);
 463     CurAbbrevs.push_back(Abbv);
 464     return static_cast<unsigned>(CurAbbrevs.size())-1 +
 465       bitc::FIRST_APPLICATION_ABBREV;
 466   }
 467
 468   //===--------------------------------------------------------------------===//
 469   // BlockInfo Block Emission
 470   //===--------------------------------------------------------------------===//
 471
 472   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
 473   void EnterBlockInfoBlock(unsigned CodeWidth) {
 474     EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
 475     BlockInfoCurBID = -1U;
 476   }
 477 private:
 478   /// SwitchToBlockID - If we aren't already talking about the specified block
 479   /// ID, emit a BLOCKINFO_CODE_SETBID record.
 480   void SwitchToBlockID(unsigned BlockID) {
 481     if (BlockInfoCurBID == BlockID) return;
 482     SmallVector<unsigned, 2> V;
 483     V.push_back(BlockID);
 484     EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V);
 485     BlockInfoCurBID = BlockID;
 486   }
 487
 488   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
 489     if (BlockInfo *BI = getBlockInfo(BlockID))
 490       return *BI;
 491
 492     // Otherwise, add a new record.
 493     BlockInfoRecords.push_back(BlockInfo());
 494     BlockInfoRecords.back().BlockID = BlockID;
 495     return BlockInfoRecords.back();
 496   }
 497
 498 public:
 499
 500   /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
 501   /// BlockID.
 502   unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) {
 503     SwitchToBlockID(BlockID);
 504     EncodeAbbrev(Abbv);
 505
 506     // Add the abbrev to the specified block record.
 507     BlockInfo &Info = getOrCreateBlockInfo(BlockID);
 508     Info.Abbrevs.push_back(Abbv);
 509
 510     return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
 511   }
 512 };
 513
 514
 515 } // End llvm namespace
 516
 517 #endif