include/llvm/Bitcode/BitstreamWriter.h

   1 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This header defines the BitstreamWriter class.  This class can be used to
  11 // write an arbitrary bitstream, regardless of its contents.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef BITSTREAM_WRITER_H
  16 #define BITSTREAM_WRITER_H
  17
  18 #include "llvm/ADT/StringRef.h"
  19 #include "llvm/Bitcode/BitCodes.h"
  20 #include <vector>
  21
  22 namespace llvm {
  23
  24 class BitstreamWriter {
  25   std::vector<unsigned char> &Out;
  26
  27   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
  28   unsigned CurBit;
  29
  30   /// CurValue - The current value.  Only bits < CurBit are valid.
  31   uint32_t CurValue;
  32
  33   /// CurCodeSize - This is the declared size of code values used for the
  34   /// current block, in bits.
  35   unsigned CurCodeSize;
  36
  37   /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
  38   /// selected BLOCK ID.
  39   unsigned BlockInfoCurBID;
  40
  41   /// CurAbbrevs - Abbrevs installed at in this block.
  42   std::vector<BitCodeAbbrev*> CurAbbrevs;
  43
  44   struct Block {
  45     unsigned PrevCodeSize;
  46     unsigned StartSizeWord;
  47     std::vector<BitCodeAbbrev*> PrevAbbrevs;
  48     Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
  49   };
  50
  51   /// BlockScope - This tracks the current blocks that we have entered.
  52   std::vector<Block> BlockScope;
  53
  54   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
  55   /// These describe abbreviations that all blocks of the specified ID inherit.
  56   struct BlockInfo {
  57     unsigned BlockID;
  58     std::vector<BitCodeAbbrev*> Abbrevs;
  59   };
  60   std::vector<BlockInfo> BlockInfoRecords;
  61
  62 public:
  63   explicit BitstreamWriter(std::vector<unsigned char> &O)
  64     : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
  65
  66   ~BitstreamWriter() {
  67     assert(CurBit == 0 && "Unflused data remaining");
  68     assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
  69
  70     // Free the BlockInfoRecords.
  71     while (!BlockInfoRecords.empty()) {
  72       BlockInfo &Info = BlockInfoRecords.back();
  73       // Free blockinfo abbrev info.
  74       for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
  75            i != e; ++i)
  76         Info.Abbrevs[i]->dropRef();
  77       BlockInfoRecords.pop_back();
  78     }
  79   }
  80
  81   std::vector<unsigned char> &getBuffer() { return Out; }
  82
  83   /// \brief Retrieve the current position in the stream, in bits.
  84   uint64_t GetCurrentBitNo() const { return Out.size() * 8 + CurBit; }
  85
  86   //===--------------------------------------------------------------------===//
  87   // Basic Primitives for emitting bits to the stream.
  88   //===--------------------------------------------------------------------===//
  89
  90   void Emit(uint32_t Val, unsigned NumBits) {
  91     assert(NumBits && NumBits <= 32 && "Invalid value size!");
  92     assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
  93     CurValue |= Val << CurBit;
  94     if (CurBit + NumBits < 32) {
  95       CurBit += NumBits;
  96       return;
  97     }
  98
  99     // Add the current word.
 100     unsigned V = CurValue;
 101     Out.push_back((unsigned char)(V >>  0));
 102     Out.push_back((unsigned char)(V >>  8));
 103     Out.push_back((unsigned char)(V >> 16));
 104     Out.push_back((unsigned char)(V >> 24));
 105
 106     if (CurBit)
 107       CurValue = Val >> (32-CurBit);
 108     else
 109       CurValue = 0;
 110     CurBit = (CurBit+NumBits) & 31;
 111   }
 112
 113   void Emit64(uint64_t Val, unsigned NumBits) {
 114     if (NumBits <= 32)
 115       Emit((uint32_t)Val, NumBits);
 116     else {
 117       Emit((uint32_t)Val, 32);
 118       Emit((uint32_t)(Val >> 32), NumBits-32);
 119     }
 120   }
 121
 122   void FlushToWord() {
 123     if (CurBit) {
 124       unsigned V = CurValue;
 125       Out.push_back((unsigned char)(V >>  0));
 126       Out.push_back((unsigned char)(V >>  8));
 127       Out.push_back((unsigned char)(V >> 16));
 128       Out.push_back((unsigned char)(V >> 24));
 129       CurBit = 0;
 130       CurValue = 0;
 131     }
 132   }
 133
 134   void EmitVBR(uint32_t Val, unsigned NumBits) {
 135     uint32_t Threshold = 1U << (NumBits-1);
 136
 137     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 138     while (Val >= Threshold) {
 139       Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
 140       Val >>= NumBits-1;
 141     }
 142
 143     Emit(Val, NumBits);
 144   }
 145
 146   void EmitVBR64(uint64_t Val, unsigned NumBits) {
 147     if ((uint32_t)Val == Val)
 148       return EmitVBR((uint32_t)Val, NumBits);
 149
 150     uint64_t Threshold = 1U << (NumBits-1);
 151
 152     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 153     while (Val >= Threshold) {
 154       Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
 155            (1 << (NumBits-1)), NumBits);
 156       Val >>= NumBits-1;
 157     }
 158
 159     Emit((uint32_t)Val, NumBits);
 160   }
 161
 162   /// EmitCode - Emit the specified code.
 163   void EmitCode(unsigned Val) {
 164     Emit(Val, CurCodeSize);
 165   }
 166
 167   // BackpatchWord - Backpatch a 32-bit word in the output with the specified
 168   // value.
 169   void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
 170     Out[ByteNo++] = (unsigned char)(NewWord >>  0);
 171     Out[ByteNo++] = (unsigned char)(NewWord >>  8);
 172     Out[ByteNo++] = (unsigned char)(NewWord >> 16);
 173     Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
 174   }
 175
 176   //===--------------------------------------------------------------------===//
 177   // Block Manipulation
 178   //===--------------------------------------------------------------------===//
 179
 180   /// getBlockInfo - If there is block info for the specified ID, return it,
 181   /// otherwise return null.
 182   BlockInfo *getBlockInfo(unsigned BlockID) {
 183     // Common case, the most recent entry matches BlockID.
 184     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
 185       return &BlockInfoRecords.back();
 186
 187     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
 188          i != e; ++i)
 189       if (BlockInfoRecords[i].BlockID == BlockID)
 190         return &BlockInfoRecords[i];
 191     return 0;
 192   }
 193
 194   void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
 195     // Block header:
 196     //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 197     EmitCode(bitc::ENTER_SUBBLOCK);
 198     EmitVBR(BlockID, bitc::BlockIDWidth);
 199     EmitVBR(CodeLen, bitc::CodeLenWidth);
 200     FlushToWord();
 201
 202     unsigned BlockSizeWordLoc = static_cast<unsigned>(Out.size());
 203     unsigned OldCodeSize = CurCodeSize;
 204
 205     // Emit a placeholder, which will be replaced when the block is popped.
 206     Emit(0, bitc::BlockSizeWidth);
 207
 208     CurCodeSize = CodeLen;
 209
 210     // Push the outer block's abbrev set onto the stack, start out with an
 211     // empty abbrev set.
 212     BlockScope.push_back(Block(OldCodeSize, BlockSizeWordLoc/4));
 213     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 214
 215     // If there is a blockinfo for this BlockID, add all the predefined abbrevs
 216     // to the abbrev list.
 217     if (BlockInfo *Info = getBlockInfo(BlockID)) {
 218       for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
 219            i != e; ++i) {
 220         CurAbbrevs.push_back(Info->Abbrevs[i]);
 221         Info->Abbrevs[i]->addRef();
 222       }
 223     }
 224   }
 225
 226   void ExitBlock() {
 227     assert(!BlockScope.empty() && "Block scope imbalance!");
 228
 229     // Delete all abbrevs.
 230     for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
 231          i != e; ++i)
 232       CurAbbrevs[i]->dropRef();
 233
 234     const Block &B = BlockScope.back();
 235
 236     // Block tail:
 237     //    [END_BLOCK, <align4bytes>]
 238     EmitCode(bitc::END_BLOCK);
 239     FlushToWord();
 240
 241     // Compute the size of the block, in words, not counting the size field.
 242     unsigned SizeInWords= static_cast<unsigned>(Out.size())/4-B.StartSizeWord-1;
 243     unsigned ByteNo = B.StartSizeWord*4;
 244
 245     // Update the block size field in the header of this sub-block.
 246     BackpatchWord(ByteNo, SizeInWords);
 247
 248     // Restore the inner block's code size and abbrev table.
 249     CurCodeSize = B.PrevCodeSize;
 250     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 251     BlockScope.pop_back();
 252   }
 253
 254   //===--------------------------------------------------------------------===//
 255   // Record Emission
 256   //===--------------------------------------------------------------------===//
 257
 258 private:
 259   /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
 260   /// record.  This is a no-op, since the abbrev specifies the literal to use.
 261   template<typename uintty>
 262   void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
 263     assert(Op.isLiteral() && "Not a literal");
 264     // If the abbrev specifies the literal value to use, don't emit
 265     // anything.
 266     assert(V == Op.getLiteralValue() &&
 267            "Invalid abbrev for record!");
 268   }
 269
 270   /// EmitAbbreviatedField - Emit a single scalar field value with the specified
 271   /// encoding.
 272   template<typename uintty>
 273   void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
 274     assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
 275
 276     // Encode the value as we are commanded.
 277     switch (Op.getEncoding()) {
 278     default: assert(0 && "Unknown encoding!");
 279     case BitCodeAbbrevOp::Fixed:
 280       if (Op.getEncodingData())
 281         Emit((unsigned)V, (unsigned)Op.getEncodingData());
 282       break;
 283     case BitCodeAbbrevOp::VBR:
 284       if (Op.getEncodingData())
 285         EmitVBR64(V, (unsigned)Op.getEncodingData());
 286       break;
 287     case BitCodeAbbrevOp::Char6:
 288       Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
 289       break;
 290     }
 291   }
 292
 293   /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
 294   /// emission code.  If BlobData is non-null, then it specifies an array of
 295   /// data that should be emitted as part of the Blob or Array operand that is
 296   /// known to exist at the end of the record.
 297   template<typename uintty>
 298   void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 299                                 StringRef Blob) {
 300     const char *BlobData = Blob.data();
 301     unsigned BlobLen = (unsigned) Blob.size();
 302     unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
 303     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
 304     BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo];
 305
 306     EmitCode(Abbrev);
 307
 308     unsigned RecordIdx = 0;
 309     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 310          i != e; ++i) {
 311       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 312       if (Op.isLiteral()) {
 313         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 314         EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
 315         ++RecordIdx;
 316       } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
 317         // Array case.
 318         assert(i+2 == e && "array op not second to last?");
 319         const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
 320
 321         // If this record has blob data, emit it, otherwise we must have record
 322         // entries to encode this way.
 323         if (BlobData) {
 324           assert(RecordIdx == Vals.size() &&
 325                  "Blob data and record entries specified for array!");
 326           // Emit a vbr6 to indicate the number of elements present.
 327           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 328
 329           // Emit each field.
 330           for (unsigned i = 0; i != BlobLen; ++i)
 331             EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
 332
 333           // Know that blob data is consumed for assertion below.
 334           BlobData = 0;
 335         } else {
 336           // Emit a vbr6 to indicate the number of elements present.
 337           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 338
 339           // Emit each field.
 340           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
 341             EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
 342         }
 343       } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
 344         // If this record has blob data, emit it, otherwise we must have record
 345         // entries to encode this way.
 346
 347         // Emit a vbr6 to indicate the number of elements present.
 348         if (BlobData) {
 349           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 350           assert(RecordIdx == Vals.size() &&
 351                  "Blob data and record entries specified for blob operand!");
 352         } else {
 353           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 354         }
 355
 356         // Flush to a 32-bit alignment boundary.
 357         FlushToWord();
 358         assert((Out.size() & 3) == 0 && "Not 32-bit aligned");
 359
 360         // Emit each field as a literal byte.
 361         if (BlobData) {
 362           for (unsigned i = 0; i != BlobLen; ++i)
 363             Out.push_back((unsigned char)BlobData[i]);
 364
 365           // Know that blob data is consumed for assertion below.
 366           BlobData = 0;
 367         } else {
 368           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) {
 369             assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob");
 370             Out.push_back((unsigned char)Vals[RecordIdx]);
 371           }
 372         }
 373         // Align end to 32-bits.
 374         while (Out.size() & 3)
 375           Out.push_back(0);
 376
 377       } else {  // Single scalar field.
 378         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 379         EmitAbbreviatedField(Op, Vals[RecordIdx]);
 380         ++RecordIdx;
 381       }
 382     }
 383     assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
 384     assert(BlobData == 0 &&
 385            "Blob data specified for record that doesn't use it!");
 386   }
 387
 388 public:
 389
 390   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
 391   /// we have one to compress the output.
 392   template<typename uintty>
 393   void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
 394                   unsigned Abbrev = 0) {
 395     if (!Abbrev) {
 396       // If we don't have an abbrev to use, emit this in its fully unabbreviated
 397       // form.
 398       EmitCode(bitc::UNABBREV_RECORD);
 399       EmitVBR(Code, 6);
 400       EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
 401       for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
 402         EmitVBR64(Vals[i], 6);
 403       return;
 404     }
 405
 406     // Insert the code into Vals to treat it uniformly.
 407     Vals.insert(Vals.begin(), Code);
 408
 409     EmitRecordWithAbbrev(Abbrev, Vals);
 410   }
 411
 412   /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
 413   /// Unlike EmitRecord, the code for the record should be included in Vals as
 414   /// the first entry.
 415   template<typename uintty>
 416   void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
 417     EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
 418   }
 419
 420   /// EmitRecordWithBlob - Emit the specified record to the stream, using an
 421   /// abbrev that includes a blob at the end.  The blob data to emit is
 422   /// specified by the pointer and length specified at the end.  In contrast to
 423   /// EmitRecord, this routine expects that the first entry in Vals is the code
 424   /// of the record.
 425   template<typename uintty>
 426   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 427                           StringRef Blob) {
 428     EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
 429   }
 430   template<typename uintty>
 431   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 432                           const char *BlobData, unsigned BlobLen) {
 433     return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen));
 434   }
 435
 436   /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
 437   /// that end with an array.
 438   template<typename uintty>
 439   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 440                           StringRef Array) {
 441     EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
 442   }
 443   template<typename uintty>
 444   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 445                           const char *ArrayData, unsigned ArrayLen) {
 446     return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData,
 447                                                             ArrayLen));
 448   }
 449
 450   //===--------------------------------------------------------------------===//
 451   // Abbrev Emission
 452   //===--------------------------------------------------------------------===//
 453
 454 private:
 455   // Emit the abbreviation as a DEFINE_ABBREV record.
 456   void EncodeAbbrev(BitCodeAbbrev *Abbv) {
 457     EmitCode(bitc::DEFINE_ABBREV);
 458     EmitVBR(Abbv->getNumOperandInfos(), 5);
 459     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 460          i != e; ++i) {
 461       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 462       Emit(Op.isLiteral(), 1);
 463       if (Op.isLiteral()) {
 464         EmitVBR64(Op.getLiteralValue(), 8);
 465       } else {
 466         Emit(Op.getEncoding(), 3);
 467         if (Op.hasEncodingData())
 468           EmitVBR64(Op.getEncodingData(), 5);
 469       }
 470     }
 471   }
 472 public:
 473
 474   /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
 475   /// method takes ownership of the specified abbrev.
 476   unsigned EmitAbbrev(BitCodeAbbrev *Abbv) {
 477     // Emit the abbreviation as a record.
 478     EncodeAbbrev(Abbv);
 479     CurAbbrevs.push_back(Abbv);
 480     return static_cast<unsigned>(CurAbbrevs.size())-1 +
 481       bitc::FIRST_APPLICATION_ABBREV;
 482   }
 483
 484   //===--------------------------------------------------------------------===//
 485   // BlockInfo Block Emission
 486   //===--------------------------------------------------------------------===//
 487
 488   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
 489   void EnterBlockInfoBlock(unsigned CodeWidth) {
 490     EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
 491     BlockInfoCurBID = -1U;
 492   }
 493 private:
 494   /// SwitchToBlockID - If we aren't already talking about the specified block
 495   /// ID, emit a BLOCKINFO_CODE_SETBID record.
 496   void SwitchToBlockID(unsigned BlockID) {
 497     if (BlockInfoCurBID == BlockID) return;
 498     SmallVector<unsigned, 2> V;
 499     V.push_back(BlockID);
 500     EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V);
 501     BlockInfoCurBID = BlockID;
 502   }
 503
 504   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
 505     if (BlockInfo *BI = getBlockInfo(BlockID))
 506       return *BI;
 507
 508     // Otherwise, add a new record.
 509     BlockInfoRecords.push_back(BlockInfo());
 510     BlockInfoRecords.back().BlockID = BlockID;
 511     return BlockInfoRecords.back();
 512   }
 513
 514 public:
 515
 516   /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
 517   /// BlockID.
 518   unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) {
 519     SwitchToBlockID(BlockID);
 520     EncodeAbbrev(Abbv);
 521
 522     // Add the abbrev to the specified block record.
 523     BlockInfo &Info = getOrCreateBlockInfo(BlockID);
 524     Info.Abbrevs.push_back(Abbv);
 525
 526     return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
 527   }
 528 };
 529
 530
 531 } // End llvm namespace
 532
 533 #endif