#ifndef BITSTREAM_READER_H
#define BITSTREAM_READER_H
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/Bitcode/BitCodes.h"
#include <climits>
+#include <string>
#include <vector>
+#include "llvm/Support/StreamableMemoryObject.h"
namespace llvm {
class Deserializer;
class BitstreamReader {
- const unsigned char *NextChar;
- const unsigned char *LastChar;
- friend class Deserializer;
+public:
+ /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
+ /// These describe abbreviations that all blocks of the specified ID inherit.
+ struct BlockInfo {
+ unsigned BlockID;
+ std::vector<BitCodeAbbrev*> Abbrevs;
+ std::string Name;
+
+ std::vector<std::pair<unsigned, std::string> > RecordNames;
+ };
+private:
+ OwningPtr<StreamableMemoryObject> BitcodeBytes;
+
+ std::vector<BlockInfo> BlockInfoRecords;
+
+ /// IgnoreBlockInfoNames - This is set to true if we don't care about the
+ /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
+ /// uses this.
+ bool IgnoreBlockInfoNames;
+
+ BitstreamReader(const BitstreamReader&); // DO NOT IMPLEMENT
+ void operator=(const BitstreamReader&); // DO NOT IMPLEMENT
+public:
+ BitstreamReader() : IgnoreBlockInfoNames(true) {
+ }
+
+ BitstreamReader(const unsigned char *Start, const unsigned char *End) {
+ IgnoreBlockInfoNames = true;
+ init(Start, End);
+ }
+
+ BitstreamReader(StreamableMemoryObject *bytes) {
+ BitcodeBytes.reset(bytes);
+ }
+
+ void init(const unsigned char *Start, const unsigned char *End) {
+ assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+ BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
+ }
+
+ StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
+
+ ~BitstreamReader() {
+ // Free the BlockInfoRecords.
+ while (!BlockInfoRecords.empty()) {
+ BlockInfo &Info = BlockInfoRecords.back();
+ // Free blockinfo abbrev info.
+ for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
+ i != e; ++i)
+ Info.Abbrevs[i]->dropRef();
+ BlockInfoRecords.pop_back();
+ }
+ }
+
+ /// CollectBlockInfoNames - This is called by clients that want block/record
+ /// name information.
+ void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
+ bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
+
+ //===--------------------------------------------------------------------===//
+ // Block Manipulation
+ //===--------------------------------------------------------------------===//
+
+ /// hasBlockInfoRecords - Return true if we've already read and processed the
+ /// block info block for this Bitstream. We only process it for the first
+ /// cursor that walks over it.
+ bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
+
+ /// getBlockInfo - If there is block info for the specified ID, return it,
+ /// otherwise return null.
+ const BlockInfo *getBlockInfo(unsigned BlockID) const {
+ // Common case, the most recent entry matches BlockID.
+ if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
+ return &BlockInfoRecords.back();
+
+ for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
+ i != e; ++i)
+ if (BlockInfoRecords[i].BlockID == BlockID)
+ return &BlockInfoRecords[i];
+ return 0;
+ }
+
+ BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
+ if (const BlockInfo *BI = getBlockInfo(BlockID))
+ return *const_cast<BlockInfo*>(BI);
+
+ // Otherwise, add a new record.
+ BlockInfoRecords.push_back(BlockInfo());
+ BlockInfoRecords.back().BlockID = BlockID;
+ return BlockInfoRecords.back();
+ }
+
+};
+class BitstreamCursor {
+ friend class Deserializer;
+ BitstreamReader *BitStream;
+ size_t NextChar;
+
/// CurWord - This is the current data we have pulled from the stream but have
/// not returned to the client.
uint32_t CurWord;
-
+
/// BitsInCurWord - This is the number of bits in CurWord that are valid. This
/// is always from [0...31] inclusive.
unsigned BitsInCurWord;
-
+
// CurCodeSize - This is the declared size of code values used for the current
// block, in bits.
unsigned CurCodeSize;
-
+
/// CurAbbrevs - Abbrevs installed at in this block.
std::vector<BitCodeAbbrev*> CurAbbrevs;
-
+
struct Block {
unsigned PrevCodeSize;
std::vector<BitCodeAbbrev*> PrevAbbrevs;
explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
};
-
+
/// BlockScope - This tracks the codesize of parent blocks.
SmallVector<Block, 8> BlockScope;
-
- /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
- /// These describe abbreviations that all blocks of the specified ID inherit.
- struct BlockInfo {
- unsigned BlockID;
- std::vector<BitCodeAbbrev*> Abbrevs;
- };
- std::vector<BlockInfo> BlockInfoRecords;
-
- /// FirstChar - This remembers the first byte of the stream.
- const unsigned char *FirstChar;
+
public:
- BitstreamReader() {
- NextChar = FirstChar = LastChar = 0;
+ BitstreamCursor() : BitStream(0), NextChar(0) {
+ }
+ BitstreamCursor(const BitstreamCursor &RHS) : BitStream(0), NextChar(0) {
+ operator=(RHS);
+ }
+
+ explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
+ NextChar = 0;
CurWord = 0;
BitsInCurWord = 0;
- CurCodeSize = 0;
- }
-
- BitstreamReader(const unsigned char *Start, const unsigned char *End) {
- init(Start, End);
+ CurCodeSize = 2;
}
-
- void init(const unsigned char *Start, const unsigned char *End) {
- NextChar = FirstChar = Start;
- LastChar = End;
- assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+
+ void init(BitstreamReader &R) {
+ freeState();
+
+ BitStream = &R;
+ NextChar = 0;
CurWord = 0;
BitsInCurWord = 0;
CurCodeSize = 2;
}
-
- ~BitstreamReader() {
- // Abbrevs could still exist if the stream was broken. If so, don't leak
- // them.
+
+ ~BitstreamCursor() {
+ freeState();
+ }
+
+ void operator=(const BitstreamCursor &RHS) {
+ freeState();
+
+ BitStream = RHS.BitStream;
+ NextChar = RHS.NextChar;
+ CurWord = RHS.CurWord;
+ BitsInCurWord = RHS.BitsInCurWord;
+ CurCodeSize = RHS.CurCodeSize;
+
+ // Copy abbreviations, and bump ref counts.
+ CurAbbrevs = RHS.CurAbbrevs;
+ for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
+ i != e; ++i)
+ CurAbbrevs[i]->addRef();
+
+ // Copy block scope and bump ref counts.
+ BlockScope = RHS.BlockScope;
+ for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
+ S != e; ++S) {
+ std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+ for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size());
+ i != e; ++i)
+ Abbrevs[i]->addRef();
+ }
+ }
+
+ void freeState() {
+ // Free all the Abbrevs.
for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
i != e; ++i)
CurAbbrevs[i]->dropRef();
-
+ CurAbbrevs.clear();
+
+ // Free all the Abbrevs in the block scope.
for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
S != e; ++S) {
std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
i != e; ++i)
Abbrevs[i]->dropRef();
}
+ BlockScope.clear();
+ }
+
+ /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
+ unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
+
+ bool isEndPos(size_t pos) {
+ return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
+ }
- // Free the BlockInfoRecords.
- while (!BlockInfoRecords.empty()) {
- BlockInfo &Info = BlockInfoRecords.back();
- // Free blockinfo abbrev info.
- for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
- i != e; ++i)
- Info.Abbrevs[i]->dropRef();
- BlockInfoRecords.pop_back();
- }
+ bool canSkipToPos(size_t pos) const {
+ // pos can be skipped to if it is a valid address or one byte past the end.
+ return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
+ static_cast<uint64_t>(pos - 1));
+ }
+
+ unsigned char getByte(size_t pos) {
+ uint8_t byte = -1;
+ BitStream->getBitcodeBytes().readByte(pos, &byte);
+ return byte;
}
- bool AtEndOfStream() const {
- return NextChar == LastChar && BitsInCurWord == 0;
+ uint32_t getWord(size_t pos) {
+ uint32_t word = -1;
+ BitStream->getBitcodeBytes().readBytes(pos,
+ sizeof(word),
+ reinterpret_cast<uint8_t *>(&word),
+ NULL);
+ return word;
}
+ bool AtEndOfStream() {
+ return isEndPos(NextChar) && BitsInCurWord == 0;
+ }
+
/// GetCurrentBitNo - Return the bit # of the bit we are reading.
uint64_t GetCurrentBitNo() const {
- return (NextChar-FirstChar)*CHAR_BIT - BitsInCurWord;
+ return NextChar*CHAR_BIT - BitsInCurWord;
}
-
+
+ BitstreamReader *getBitStreamReader() {
+ return BitStream;
+ }
+ const BitstreamReader *getBitStreamReader() const {
+ return BitStream;
+ }
+
+
/// JumpToBit - Reset the stream to the specified bit number.
void JumpToBit(uint64_t BitNo) {
uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
- assert(ByteNo < (uintptr_t)(LastChar-FirstChar) && "Invalid location");
-
+ assert(canSkipToPos(ByteNo) && "Invalid location");
+
// Move the cursor to the right word.
- NextChar = FirstChar+ByteNo;
+ NextChar = ByteNo;
BitsInCurWord = 0;
CurWord = 0;
-
+
// Skip over any bits that are already consumed.
- if (WordBitNo) {
+ if (WordBitNo)
Read(static_cast<unsigned>(WordBitNo));
- }
}
-
- /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
- unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
-
+
+
uint32_t Read(unsigned NumBits) {
+ assert(NumBits <= 32 && "Cannot return more than 32 bits!");
// If the field is fully contained by CurWord, return it quickly.
if (BitsInCurWord >= NumBits) {
uint32_t R = CurWord & ((1U << NumBits)-1);
}
// If we run out of data, stop at the end of the stream.
- if (NextChar == LastChar) {
+ if (isEndPos(NextChar)) {
CurWord = 0;
BitsInCurWord = 0;
return 0;
unsigned R = CurWord;
// Read the next word from the stream.
- CurWord = (NextChar[0] << 0) | (NextChar[1] << 8) |
- (NextChar[2] << 16) | (NextChar[3] << 24);
+ CurWord = getWord(NextChar);
NextChar += 4;
// Extract NumBits-BitsInCurWord from what we just read.
}
}
+ // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size. The
+ // chunk size of the VBR must still be <= 32 bits though.
uint64_t ReadVBR64(unsigned NumBits) {
- uint64_t Piece = Read(NumBits);
+ uint32_t Piece = Read(NumBits);
if ((Piece & (1U << (NumBits-1))) == 0)
- return Piece;
+ return uint64_t(Piece);
uint64_t Result = 0;
unsigned NextBit = 0;
while (1) {
- Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+ Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
if ((Piece & (1U << (NumBits-1))) == 0)
return Result;
CurWord = 0;
}
-
unsigned ReadCode() {
return Read(CurCodeSize);
}
- //===--------------------------------------------------------------------===//
- // Block Manipulation
- //===--------------------------------------------------------------------===//
-
-private:
- /// getBlockInfo - If there is block info for the specified ID, return it,
- /// otherwise return null.
- BlockInfo *getBlockInfo(unsigned BlockID) {
- // Common case, the most recent entry matches BlockID.
- if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
- return &BlockInfoRecords.back();
-
- for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
- i != e; ++i)
- if (BlockInfoRecords[i].BlockID == BlockID)
- return &BlockInfoRecords[i];
- return 0;
- }
-public:
-
// Block header:
// [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
// Check that the block wasn't partially defined, and that the offset isn't
// bogus.
- if (AtEndOfStream() || NextChar+NumWords*4 > LastChar)
+ size_t SkipTo = NextChar + NumWords*4;
+ if (AtEndOfStream() || !canSkipToPos(SkipTo))
return true;
- NextChar += NumWords*4;
+ NextChar = SkipTo;
return false;
}
BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
// Add the abbrevs specific to this block to the CurAbbrevs list.
- if (BlockInfo *Info = getBlockInfo(BlockID)) {
+ if (const BitstreamReader::BlockInfo *Info =
+ BitStream->getBlockInfo(BlockID)) {
for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
i != e; ++i) {
CurAbbrevs.push_back(Info->Abbrevs[i]);
if (NumWordsP) *NumWordsP = NumWords;
// Validate that this block is sane.
- if (CurCodeSize == 0 || AtEndOfStream() || NextChar+NumWords*4 > LastChar)
+ if (CurCodeSize == 0 || AtEndOfStream())
return true;
return false;
BlockScope.pop_back();
}
- //===--------------------------------------------------------------------===//
+ //===--------------------------------------------------------------------===//
// Record Processing
//===--------------------------------------------------------------------===//
void ReadAbbreviatedField(const BitCodeAbbrevOp &Op,
SmallVectorImpl<uint64_t> &Vals) {
assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
-
+
// Decode the value as we are commanded.
switch (Op.getEncoding()) {
- default: assert(0 && "Unknown encoding!");
+ default: llvm_unreachable("Unknown encoding!");
case BitCodeAbbrevOp::Fixed:
Vals.push_back(Read((unsigned)Op.getEncodingData()));
break;
SkipToWord(); // 32-bit alignment
// Figure out where the end of this blob will be including tail padding.
- const unsigned char *NewEnd = NextChar+((NumElts+3)&~3);
+ size_t NewEnd = NextChar+((NumElts+3)&~3);
// If this would read off the end of the bitcode file, just set the
// record to empty and return.
- if (NewEnd > LastChar) {
+ if (!canSkipToPos(NewEnd)) {
Vals.append(NumElts, 0);
- NextChar = LastChar;
+ NextChar = BitStream->getBitcodeBytes().getExtent();
break;
}
// Otherwise, read the number of bytes. If we can return a reference to
// the data, do so to avoid copying it.
if (BlobStart) {
- *BlobStart = (const char*)NextChar;
+ *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer(
+ NextChar, NumElts);
*BlobLen = NumElts;
} else {
for (; NumElts; ++NextChar, --NumElts)
- Vals.push_back(*NextChar);
+ Vals.push_back(getByte(NextChar));
}
// Skip over tail padding.
NextChar = NewEnd;
}
CurAbbrevs.push_back(Abbv);
}
-
- //===--------------------------------------------------------------------===//
- // BlockInfo Block Reading
- //===--------------------------------------------------------------------===//
-
-private:
- BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
- if (BlockInfo *BI = getBlockInfo(BlockID))
- return *BI;
-
- // Otherwise, add a new record.
- BlockInfoRecords.push_back(BlockInfo());
- BlockInfoRecords.back().BlockID = BlockID;
- return BlockInfoRecords.back();
- }
-
+
public:
bool ReadBlockInfoBlock() {
+ // If this is the second stream to get to the block info block, skip it.
+ if (BitStream->hasBlockInfoRecords())
+ return SkipBlock();
+
if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
SmallVector<uint64_t, 64> Record;
- BlockInfo *CurBlockInfo = 0;
+ BitstreamReader::BlockInfo *CurBlockInfo = 0;
// Read all the records for this module.
while (1) {
default: break; // Default behavior, ignore unknown content.
case bitc::BLOCKINFO_CODE_SETBID:
if (Record.size() < 1) return true;
- CurBlockInfo = &getOrCreateBlockInfo((unsigned)Record[0]);
+ CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
+ break;
+ case bitc::BLOCKINFO_CODE_BLOCKNAME: {
+ if (!CurBlockInfo) return true;
+ if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name.
+ std::string Name;
+ for (unsigned i = 0, e = Record.size(); i != e; ++i)
+ Name += (char)Record[i];
+ CurBlockInfo->Name = Name;
break;
}
+ case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
+ if (!CurBlockInfo) return true;
+ if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name.
+ std::string Name;
+ for (unsigned i = 1, e = Record.size(); i != e; ++i)
+ Name += (char)Record[i];
+ CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
+ Name));
+ break;
+ }
+ }
}
}
};
-
+
} // End llvm namespace
#endif