include/llvm/Bitcode/BitstreamWriter.h

   1 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This header defines the BitstreamWriter class.  This class can be used to
  11 // write an arbitrary bitstream, regardless of its contents.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef BITSTREAM_WRITER_H
  16 #define BITSTREAM_WRITER_H
  17
  18 #include "llvm/ADT/StringRef.h"
  19 #include "llvm/ADT/SmallVector.h"
  20 #include "llvm/Bitcode/BitCodes.h"
  21 #include <vector>
  22
  23 namespace llvm {
  24
  25 class BitstreamWriter {
  26   SmallVectorImpl<char> &Out;
  27
  28   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
  29   unsigned CurBit;
  30
  31   /// CurValue - The current value.  Only bits < CurBit are valid.
  32   uint32_t CurValue;
  33
  34   /// CurCodeSize - This is the declared size of code values used for the
  35   /// current block, in bits.
  36   unsigned CurCodeSize;
  37
  38   /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
  39   /// selected BLOCK ID.
  40   unsigned BlockInfoCurBID;
  41
  42   /// CurAbbrevs - Abbrevs installed at in this block.
  43   std::vector<BitCodeAbbrev*> CurAbbrevs;
  44
  45   struct Block {
  46     unsigned PrevCodeSize;
  47     unsigned StartSizeWord;
  48     std::vector<BitCodeAbbrev*> PrevAbbrevs;
  49     Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
  50   };
  51
  52   /// BlockScope - This tracks the current blocks that we have entered.
  53   std::vector<Block> BlockScope;
  54
  55   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
  56   /// These describe abbreviations that all blocks of the specified ID inherit.
  57   struct BlockInfo {
  58     unsigned BlockID;
  59     std::vector<BitCodeAbbrev*> Abbrevs;
  60   };
  61   std::vector<BlockInfo> BlockInfoRecords;
  62
  63   // BackpatchWord - Backpatch a 32-bit word in the output with the specified
  64   // value.
  65   void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
  66     Out[ByteNo++] = (unsigned char)(NewWord >>  0);
  67     Out[ByteNo++] = (unsigned char)(NewWord >>  8);
  68     Out[ByteNo++] = (unsigned char)(NewWord >> 16);
  69     Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
  70   }
  71
  72   void WriteByte(unsigned char Value) {
  73     Out.push_back(Value);
  74   }
  75
  76   void WriteWord(unsigned Value) {
  77     Out.push_back((unsigned char)(Value >>  0));
  78     Out.push_back((unsigned char)(Value >>  8));
  79     Out.push_back((unsigned char)(Value >> 16));
  80     Out.push_back((unsigned char)(Value >> 24));
  81   }
  82
  83   unsigned GetBufferOffset() const {
  84     return Out.size();
  85   }
  86
  87   unsigned GetWordIndex() const {
  88     unsigned Offset = GetBufferOffset();
  89     assert((Offset & 3) == 0 && "Not 32-bit aligned");
  90     return Offset / 4;
  91   }
  92
  93 public:
  94   explicit BitstreamWriter(SmallVectorImpl<char> &O)
  95     : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
  96
  97   ~BitstreamWriter() {
  98     assert(CurBit == 0 && "Unflused data remaining");
  99     assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
 100
 101     // Free the BlockInfoRecords.
 102     while (!BlockInfoRecords.empty()) {
 103       BlockInfo &Info = BlockInfoRecords.back();
 104       // Free blockinfo abbrev info.
 105       for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
 106            i != e; ++i)
 107         Info.Abbrevs[i]->dropRef();
 108       BlockInfoRecords.pop_back();
 109     }
 110   }
 111
 112   /// \brief Retrieve the current position in the stream, in bits.
 113   uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
 114
 115   //===--------------------------------------------------------------------===//
 116   // Basic Primitives for emitting bits to the stream.
 117   //===--------------------------------------------------------------------===//
 118
 119   void Emit(uint32_t Val, unsigned NumBits) {
 120     assert(NumBits && NumBits <= 32 && "Invalid value size!");
 121     assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
 122     CurValue |= Val << CurBit;
 123     if (CurBit + NumBits < 32) {
 124       CurBit += NumBits;
 125       return;
 126     }
 127
 128     // Add the current word.
 129     WriteWord(CurValue);
 130
 131     if (CurBit)
 132       CurValue = Val >> (32-CurBit);
 133     else
 134       CurValue = 0;
 135     CurBit = (CurBit+NumBits) & 31;
 136   }
 137
 138   void Emit64(uint64_t Val, unsigned NumBits) {
 139     if (NumBits <= 32)
 140       Emit((uint32_t)Val, NumBits);
 141     else {
 142       Emit((uint32_t)Val, 32);
 143       Emit((uint32_t)(Val >> 32), NumBits-32);
 144     }
 145   }
 146
 147   void FlushToWord() {
 148     if (CurBit) {
 149       WriteWord(CurValue);
 150       CurBit = 0;
 151       CurValue = 0;
 152     }
 153   }
 154
 155   void EmitVBR(uint32_t Val, unsigned NumBits) {
 156     uint32_t Threshold = 1U << (NumBits-1);
 157
 158     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 159     while (Val >= Threshold) {
 160       Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
 161       Val >>= NumBits-1;
 162     }
 163
 164     Emit(Val, NumBits);
 165   }
 166
 167   void EmitVBR64(uint64_t Val, unsigned NumBits) {
 168     if ((uint32_t)Val == Val)
 169       return EmitVBR((uint32_t)Val, NumBits);
 170
 171     uint64_t Threshold = 1U << (NumBits-1);
 172
 173     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
 174     while (Val >= Threshold) {
 175       Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
 176            (1 << (NumBits-1)), NumBits);
 177       Val >>= NumBits-1;
 178     }
 179
 180     Emit((uint32_t)Val, NumBits);
 181   }
 182
 183   /// EmitCode - Emit the specified code.
 184   void EmitCode(unsigned Val) {
 185     Emit(Val, CurCodeSize);
 186   }
 187
 188   //===--------------------------------------------------------------------===//
 189   // Block Manipulation
 190   //===--------------------------------------------------------------------===//
 191
 192   /// getBlockInfo - If there is block info for the specified ID, return it,
 193   /// otherwise return null.
 194   BlockInfo *getBlockInfo(unsigned BlockID) {
 195     // Common case, the most recent entry matches BlockID.
 196     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
 197       return &BlockInfoRecords.back();
 198
 199     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
 200          i != e; ++i)
 201       if (BlockInfoRecords[i].BlockID == BlockID)
 202         return &BlockInfoRecords[i];
 203     return 0;
 204   }
 205
 206   void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
 207     // Block header:
 208     //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 209     EmitCode(bitc::ENTER_SUBBLOCK);
 210     EmitVBR(BlockID, bitc::BlockIDWidth);
 211     EmitVBR(CodeLen, bitc::CodeLenWidth);
 212     FlushToWord();
 213
 214     unsigned BlockSizeWordIndex = GetWordIndex();
 215     unsigned OldCodeSize = CurCodeSize;
 216
 217     // Emit a placeholder, which will be replaced when the block is popped.
 218     Emit(0, bitc::BlockSizeWidth);
 219
 220     CurCodeSize = CodeLen;
 221
 222     // Push the outer block's abbrev set onto the stack, start out with an
 223     // empty abbrev set.
 224     BlockScope.push_back(Block(OldCodeSize, BlockSizeWordIndex));
 225     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 226
 227     // If there is a blockinfo for this BlockID, add all the predefined abbrevs
 228     // to the abbrev list.
 229     if (BlockInfo *Info = getBlockInfo(BlockID)) {
 230       for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
 231            i != e; ++i) {
 232         CurAbbrevs.push_back(Info->Abbrevs[i]);
 233         Info->Abbrevs[i]->addRef();
 234       }
 235     }
 236   }
 237
 238   void ExitBlock() {
 239     assert(!BlockScope.empty() && "Block scope imbalance!");
 240
 241     // Delete all abbrevs.
 242     for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
 243          i != e; ++i)
 244       CurAbbrevs[i]->dropRef();
 245
 246     const Block &B = BlockScope.back();
 247
 248     // Block tail:
 249     //    [END_BLOCK, <align4bytes>]
 250     EmitCode(bitc::END_BLOCK);
 251     FlushToWord();
 252
 253     // Compute the size of the block, in words, not counting the size field.
 254     unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
 255     unsigned ByteNo = B.StartSizeWord*4;
 256
 257     // Update the block size field in the header of this sub-block.
 258     BackpatchWord(ByteNo, SizeInWords);
 259
 260     // Restore the inner block's code size and abbrev table.
 261     CurCodeSize = B.PrevCodeSize;
 262     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 263     BlockScope.pop_back();
 264   }
 265
 266   //===--------------------------------------------------------------------===//
 267   // Record Emission
 268   //===--------------------------------------------------------------------===//
 269
 270 private:
 271   /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
 272   /// record.  This is a no-op, since the abbrev specifies the literal to use.
 273   template<typename uintty>
 274   void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
 275     assert(Op.isLiteral() && "Not a literal");
 276     // If the abbrev specifies the literal value to use, don't emit
 277     // anything.
 278     assert(V == Op.getLiteralValue() &&
 279            "Invalid abbrev for record!");
 280   }
 281
 282   /// EmitAbbreviatedField - Emit a single scalar field value with the specified
 283   /// encoding.
 284   template<typename uintty>
 285   void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
 286     assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
 287
 288     // Encode the value as we are commanded.
 289     switch (Op.getEncoding()) {
 290     default: llvm_unreachable("Unknown encoding!");
 291     case BitCodeAbbrevOp::Fixed:
 292       if (Op.getEncodingData())
 293         Emit((unsigned)V, (unsigned)Op.getEncodingData());
 294       break;
 295     case BitCodeAbbrevOp::VBR:
 296       if (Op.getEncodingData())
 297         EmitVBR64(V, (unsigned)Op.getEncodingData());
 298       break;
 299     case BitCodeAbbrevOp::Char6:
 300       Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
 301       break;
 302     }
 303   }
 304
 305   /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
 306   /// emission code.  If BlobData is non-null, then it specifies an array of
 307   /// data that should be emitted as part of the Blob or Array operand that is
 308   /// known to exist at the end of the record.
 309   template<typename uintty>
 310   void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 311                                 StringRef Blob) {
 312     const char *BlobData = Blob.data();
 313     unsigned BlobLen = (unsigned) Blob.size();
 314     unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
 315     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
 316     BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo];
 317
 318     EmitCode(Abbrev);
 319
 320     unsigned RecordIdx = 0;
 321     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 322          i != e; ++i) {
 323       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 324       if (Op.isLiteral()) {
 325         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 326         EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
 327         ++RecordIdx;
 328       } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
 329         // Array case.
 330         assert(i+2 == e && "array op not second to last?");
 331         const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
 332
 333         // If this record has blob data, emit it, otherwise we must have record
 334         // entries to encode this way.
 335         if (BlobData) {
 336           assert(RecordIdx == Vals.size() &&
 337                  "Blob data and record entries specified for array!");
 338           // Emit a vbr6 to indicate the number of elements present.
 339           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 340
 341           // Emit each field.
 342           for (unsigned i = 0; i != BlobLen; ++i)
 343             EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
 344
 345           // Know that blob data is consumed for assertion below.
 346           BlobData = 0;
 347         } else {
 348           // Emit a vbr6 to indicate the number of elements present.
 349           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 350
 351           // Emit each field.
 352           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
 353             EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
 354         }
 355       } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
 356         // If this record has blob data, emit it, otherwise we must have record
 357         // entries to encode this way.
 358
 359         // Emit a vbr6 to indicate the number of elements present.
 360         if (BlobData) {
 361           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
 362           assert(RecordIdx == Vals.size() &&
 363                  "Blob data and record entries specified for blob operand!");
 364         } else {
 365           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
 366         }
 367
 368         // Flush to a 32-bit alignment boundary.
 369         FlushToWord();
 370
 371         // Emit each field as a literal byte.
 372         if (BlobData) {
 373           for (unsigned i = 0; i != BlobLen; ++i)
 374             WriteByte((unsigned char)BlobData[i]);
 375
 376           // Know that blob data is consumed for assertion below.
 377           BlobData = 0;
 378         } else {
 379           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) {
 380             assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob");
 381             WriteByte((unsigned char)Vals[RecordIdx]);
 382           }
 383         }
 384
 385         // Align end to 32-bits.
 386         while (GetBufferOffset() & 3)
 387           WriteByte(0);
 388       } else {  // Single scalar field.
 389         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
 390         EmitAbbreviatedField(Op, Vals[RecordIdx]);
 391         ++RecordIdx;
 392       }
 393     }
 394     assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
 395     assert(BlobData == 0 &&
 396            "Blob data specified for record that doesn't use it!");
 397   }
 398
 399 public:
 400
 401   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
 402   /// we have one to compress the output.
 403   template<typename uintty>
 404   void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
 405                   unsigned Abbrev = 0) {
 406     if (!Abbrev) {
 407       // If we don't have an abbrev to use, emit this in its fully unabbreviated
 408       // form.
 409       EmitCode(bitc::UNABBREV_RECORD);
 410       EmitVBR(Code, 6);
 411       EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
 412       for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
 413         EmitVBR64(Vals[i], 6);
 414       return;
 415     }
 416
 417     // Insert the code into Vals to treat it uniformly.
 418     Vals.insert(Vals.begin(), Code);
 419
 420     EmitRecordWithAbbrev(Abbrev, Vals);
 421   }
 422
 423   /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
 424   /// Unlike EmitRecord, the code for the record should be included in Vals as
 425   /// the first entry.
 426   template<typename uintty>
 427   void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
 428     EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
 429   }
 430
 431   /// EmitRecordWithBlob - Emit the specified record to the stream, using an
 432   /// abbrev that includes a blob at the end.  The blob data to emit is
 433   /// specified by the pointer and length specified at the end.  In contrast to
 434   /// EmitRecord, this routine expects that the first entry in Vals is the code
 435   /// of the record.
 436   template<typename uintty>
 437   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 438                           StringRef Blob) {
 439     EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
 440   }
 441   template<typename uintty>
 442   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 443                           const char *BlobData, unsigned BlobLen) {
 444     return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen));
 445   }
 446
 447   /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
 448   /// that end with an array.
 449   template<typename uintty>
 450   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 451                           StringRef Array) {
 452     EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
 453   }
 454   template<typename uintty>
 455   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
 456                           const char *ArrayData, unsigned ArrayLen) {
 457     return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData,
 458                                                             ArrayLen));
 459   }
 460
 461   //===--------------------------------------------------------------------===//
 462   // Abbrev Emission
 463   //===--------------------------------------------------------------------===//
 464
 465 private:
 466   // Emit the abbreviation as a DEFINE_ABBREV record.
 467   void EncodeAbbrev(BitCodeAbbrev *Abbv) {
 468     EmitCode(bitc::DEFINE_ABBREV);
 469     EmitVBR(Abbv->getNumOperandInfos(), 5);
 470     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
 471          i != e; ++i) {
 472       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
 473       Emit(Op.isLiteral(), 1);
 474       if (Op.isLiteral()) {
 475         EmitVBR64(Op.getLiteralValue(), 8);
 476       } else {
 477         Emit(Op.getEncoding(), 3);
 478         if (Op.hasEncodingData())
 479           EmitVBR64(Op.getEncodingData(), 5);
 480       }
 481     }
 482   }
 483 public:
 484
 485   /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
 486   /// method takes ownership of the specified abbrev.
 487   unsigned EmitAbbrev(BitCodeAbbrev *Abbv) {
 488     // Emit the abbreviation as a record.
 489     EncodeAbbrev(Abbv);
 490     CurAbbrevs.push_back(Abbv);
 491     return static_cast<unsigned>(CurAbbrevs.size())-1 +
 492       bitc::FIRST_APPLICATION_ABBREV;
 493   }
 494
 495   //===--------------------------------------------------------------------===//
 496   // BlockInfo Block Emission
 497   //===--------------------------------------------------------------------===//
 498
 499   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
 500   void EnterBlockInfoBlock(unsigned CodeWidth) {
 501     EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
 502     BlockInfoCurBID = -1U;
 503   }
 504 private:
 505   /// SwitchToBlockID - If we aren't already talking about the specified block
 506   /// ID, emit a BLOCKINFO_CODE_SETBID record.
 507   void SwitchToBlockID(unsigned BlockID) {
 508     if (BlockInfoCurBID == BlockID) return;
 509     SmallVector<unsigned, 2> V;
 510     V.push_back(BlockID);
 511     EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V);
 512     BlockInfoCurBID = BlockID;
 513   }
 514
 515   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
 516     if (BlockInfo *BI = getBlockInfo(BlockID))
 517       return *BI;
 518
 519     // Otherwise, add a new record.
 520     BlockInfoRecords.push_back(BlockInfo());
 521     BlockInfoRecords.back().BlockID = BlockID;
 522     return BlockInfoRecords.back();
 523   }
 524
 525 public:
 526
 527   /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
 528   /// BlockID.
 529   unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) {
 530     SwitchToBlockID(BlockID);
 531     EncodeAbbrev(Abbv);
 532
 533     // Add the abbrev to the specified block record.
 534     BlockInfo &Info = getOrCreateBlockInfo(BlockID);
 535     Info.Abbrevs.push_back(Abbv);
 536
 537     return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
 538   }
 539 };
 540
 541
 542 } // End llvm namespace
 543
 544 #endif