Enable streaming of bitcode

[oota-llvm.git] / include / llvm / Bitcode / BitstreamReader.h
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h

index d2489d19ac4e71c346068d9817bed22fecccd53c..b7c52f4035e954fda2c7f591789e607e62c8fdc3 100644 (file)
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -15,9 +15,12 @@
  #ifndef BITSTREAM_READER_H
  #define BITSTREAM_READER_H
  
+#include "llvm/ADT/OwningPtr.h"
  #include "llvm/Bitcode/BitCodes.h"
  #include <climits>
+#include <string>
  #include <vector>
+#include "llvm/Support/StreamableMemoryObject.h"
  
  namespace llvm {
  
@@ -30,28 +33,42 @@ public:
    struct BlockInfo {
      unsigned BlockID;
      std::vector<BitCodeAbbrev*> Abbrevs;
+    std::string Name;
+    
+    std::vector<std::pair<unsigned, std::string> > RecordNames;
    };
  private:
-  /// FirstChar/LastChar - This remembers the first and last bytes of the
-  /// stream.
-  const unsigned char *FirstChar, *LastChar;
+  OwningPtr<StreamableMemoryObject> BitcodeBytes;
    
    std::vector<BlockInfo> BlockInfoRecords;
  
+  /// IgnoreBlockInfoNames - This is set to true if we don't care about the
+  /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
+  /// uses this.
+  bool IgnoreBlockInfoNames;
+  
+  BitstreamReader(const BitstreamReader&);  // DO NOT IMPLEMENT
+  void operator=(const BitstreamReader&);  // DO NOT IMPLEMENT
  public:
-  BitstreamReader() : FirstChar(0), LastChar(0) {
+  BitstreamReader() : IgnoreBlockInfoNames(true) {
    }
  
    BitstreamReader(const unsigned char *Start, const unsigned char *End) {
+    IgnoreBlockInfoNames = true;
      init(Start, End);
    }
  
+  BitstreamReader(StreamableMemoryObject *bytes) {
+    BitcodeBytes.reset(bytes);
+  }
+
    void init(const unsigned char *Start, const unsigned char *End) {
-    FirstChar = Start;
-    LastChar = End;
      assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+    BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
    }
  
+  StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
+
    ~BitstreamReader() {
      // Free the BlockInfoRecords.
      while (!BlockInfoRecords.empty()) {
@@ -63,10 +80,12 @@ public:
        BlockInfoRecords.pop_back();
      }
    }
-  
-  const unsigned char *getFirstChar() const { return FirstChar; }
-  const unsigned char *getLastChar() const { return LastChar; }
  
+  /// CollectBlockInfoNames - This is called by clients that want block/record
+  /// name information.
+  void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
+  bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
+  
    //===--------------------------------------------------------------------===//
    // Block Manipulation
    //===--------------------------------------------------------------------===//
@@ -78,7 +97,7 @@ public:
    
    /// getBlockInfo - If there is block info for the specified ID, return it,
    /// otherwise return null.
-  BlockInfo *getBlockInfo(unsigned BlockID) {
+  const BlockInfo *getBlockInfo(unsigned BlockID) const {
      // Common case, the most recent entry matches BlockID.
      if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
        return &BlockInfoRecords.back();
@@ -91,8 +110,8 @@ public:
    }
  
    BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
-    if (BlockInfo *BI = getBlockInfo(BlockID))
-      return *BI;
+    if (const BlockInfo *BI = getBlockInfo(BlockID))
+      return *const_cast<BlockInfo*>(BI);
  
      // Otherwise, add a new record.
      BlockInfoRecords.push_back(BlockInfo());
@@ -105,7 +124,7 @@ public:
  class BitstreamCursor {
    friend class Deserializer;
    BitstreamReader *BitStream;
-  const unsigned char *NextChar;
+  size_t NextChar;
    
    /// CurWord - This is the current data we have pulled from the stream but have
    /// not returned to the client.
@@ -139,8 +158,7 @@ public:
    }
    
    explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
-    NextChar = R.getFirstChar();
-    assert(NextChar && "Bitstream not initialized yet");
+    NextChar = 0;
      CurWord = 0;
      BitsInCurWord = 0;
      CurCodeSize = 2;
@@ -150,8 +168,7 @@ public:
      freeState();
      
      BitStream = &R;
-    NextChar = R.getFirstChar();
-    assert(NextChar && "Bitstream not initialized yet");
+    NextChar = 0;
      CurWord = 0;
      BitsInCurWord = 0;
      CurCodeSize = 2;
@@ -177,6 +194,7 @@ public:
        CurAbbrevs[i]->addRef();
      
      // Copy block scope and bump ref counts.
+    BlockScope = RHS.BlockScope;
      for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
           S != e; ++S) {
        std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
@@ -207,13 +225,45 @@ public:
    /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
    unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
    
-  bool AtEndOfStream() const {
-    return NextChar == BitStream->getLastChar() && BitsInCurWord == 0;
+  bool isEndPos(size_t pos) {
+    return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
+  }
+
+  bool canSkipToPos(size_t pos) const {
+    // pos can be skipped to if it is a valid address or one byte past the end.
+    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
+        static_cast<uint64_t>(pos - 1));
+  }
+
+  unsigned char getByte(size_t pos) {
+    uint8_t byte = -1;
+    BitStream->getBitcodeBytes().readByte(pos, &byte);
+    return byte;
+  }
+
+  uint32_t getWord(size_t pos) {
+    uint32_t word = -1;
+    BitStream->getBitcodeBytes().readBytes(pos,
+                                           sizeof(word),
+                                           reinterpret_cast<uint8_t *>(&word),
+                                           NULL);
+    return word;
+  }
+
+  bool AtEndOfStream() {
+    return isEndPos(NextChar) && BitsInCurWord == 0;
    }
    
    /// GetCurrentBitNo - Return the bit # of the bit we are reading.
    uint64_t GetCurrentBitNo() const {
-    return (NextChar-BitStream->getFirstChar())*CHAR_BIT - BitsInCurWord;
+    return NextChar*CHAR_BIT - BitsInCurWord;
+  }
+  
+  BitstreamReader *getBitStreamReader() {
+    return BitStream;
+  }
+  const BitstreamReader *getBitStreamReader() const {
+    return BitStream;
    }
    
    
@@ -221,12 +271,10 @@ public:
    void JumpToBit(uint64_t BitNo) {
      uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
      uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
-    assert(ByteNo <= (uintptr_t)(BitStream->getLastChar()-
-                                 BitStream->getFirstChar()) &&
-           "Invalid location");
+    assert(canSkipToPos(ByteNo) && "Invalid location");
      
      // Move the cursor to the right word.
-    NextChar = BitStream->getFirstChar()+ByteNo;
+    NextChar = ByteNo;
      BitsInCurWord = 0;
      CurWord = 0;
      
@@ -237,6 +285,7 @@ public:
    
    
    uint32_t Read(unsigned NumBits) {
+    assert(NumBits <= 32 && "Cannot return more than 32 bits!");
      // If the field is fully contained by CurWord, return it quickly.
      if (BitsInCurWord >= NumBits) {
        uint32_t R = CurWord & ((1U << NumBits)-1);
@@ -246,7 +295,7 @@ public:
      }
  
      // If we run out of data, stop at the end of the stream.
-    if (NextChar == BitStream->getLastChar()) {
+    if (isEndPos(NextChar)) {
        CurWord = 0;
        BitsInCurWord = 0;
        return 0;
@@ -255,8 +304,7 @@ public:
      unsigned R = CurWord;
  
      // Read the next word from the stream.
-    CurWord = (NextChar[0] <<  0) | (NextChar[1] << 8) |
-              (NextChar[2] << 16) | (NextChar[3] << 24);
+    CurWord = getWord(NextChar);
      NextChar += 4;
  
      // Extract NumBits-BitsInCurWord from what we just read.
@@ -299,15 +347,17 @@ public:
      }
    }
  
+  // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
+  // chunk size of the VBR must still be <= 32 bits though.
    uint64_t ReadVBR64(unsigned NumBits) {
-    uint64_t Piece = Read(NumBits);
+    uint32_t Piece = Read(NumBits);
      if ((Piece & (1U << (NumBits-1))) == 0)
-      return Piece;
+      return uint64_t(Piece);
  
      uint64_t Result = 0;
      unsigned NextBit = 0;
      while (1) {
-      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
  
        if ((Piece & (1U << (NumBits-1))) == 0)
          return Result;
@@ -348,10 +398,11 @@ public:
  
      // Check that the block wasn't partially defined, and that the offset isn't
      // bogus.
-    if (AtEndOfStream() || NextChar+NumWords*4 > BitStream->getLastChar())
+    size_t SkipTo = NextChar + NumWords*4;
+    if (AtEndOfStream() || !canSkipToPos(SkipTo))
        return true;
  
-    NextChar += NumWords*4;
+    NextChar = SkipTo;
      return false;
    }
  
@@ -363,7 +414,8 @@ public:
      BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
  
      // Add the abbrevs specific to this block to the CurAbbrevs list.
-    if (BitstreamReader::BlockInfo *Info = BitStream->getBlockInfo(BlockID)) {
+    if (const BitstreamReader::BlockInfo *Info =
+          BitStream->getBlockInfo(BlockID)) {
        for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
             i != e; ++i) {
          CurAbbrevs.push_back(Info->Abbrevs[i]);
@@ -378,8 +430,7 @@ public:
      if (NumWordsP) *NumWordsP = NumWords;
  
      // Validate that this block is sane.
-    if (CurCodeSize == 0 || AtEndOfStream() ||
-        NextChar+NumWords*4 > BitStream->getLastChar())
+    if (CurCodeSize == 0 || AtEndOfStream())
        return true;
  
      return false;
@@ -424,10 +475,10 @@ private:
    void ReadAbbreviatedField(const BitCodeAbbrevOp &Op,
                              SmallVectorImpl<uint64_t> &Vals) {
      assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
-    
+
      // Decode the value as we are commanded.
      switch (Op.getEncoding()) {
-    default: assert(0 && "Unknown encoding!");
+    default: llvm_unreachable("Unknown encoding!");
      case BitCodeAbbrevOp::Fixed:
        Vals.push_back(Read((unsigned)Op.getEncodingData()));
        break;
@@ -481,24 +532,25 @@ public:
          SkipToWord();  // 32-bit alignment
  
          // Figure out where the end of this blob will be including tail padding.
-        const unsigned char *NewEnd = NextChar+((NumElts+3)&~3);
+        size_t NewEnd = NextChar+((NumElts+3)&~3);
          
          // If this would read off the end of the bitcode file, just set the
          // record to empty and return.
-        if (NewEnd > BitStream->getLastChar()) {
+        if (!canSkipToPos(NewEnd)) {
            Vals.append(NumElts, 0);
-          NextChar = BitStream->getLastChar();
+          NextChar = BitStream->getBitcodeBytes().getExtent();
            break;
          }
          
          // Otherwise, read the number of bytes.  If we can return a reference to
          // the data, do so to avoid copying it.
          if (BlobStart) {
-          *BlobStart = (const char*)NextChar;
+          *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer(
+              NextChar, NumElts);
            *BlobLen = NumElts;
          } else {
            for (; NumElts; ++NextChar, --NumElts)
-            Vals.push_back(*NextChar);
+            Vals.push_back(getByte(NextChar));
          }
          // Skip over tail padding.
          NextChar = NewEnd;
@@ -585,6 +637,25 @@ public:
          if (Record.size() < 1) return true;
          CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
          break;
+      case bitc::BLOCKINFO_CODE_BLOCKNAME: {
+        if (!CurBlockInfo) return true;
+        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
+        std::string Name;
+        for (unsigned i = 0, e = Record.size(); i != e; ++i)
+          Name += (char)Record[i];
+        CurBlockInfo->Name = Name;
+        break;
+      }
+      case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
+        if (!CurBlockInfo) return true;
+        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
+        std::string Name;
+        for (unsigned i = 1, e = Record.size(); i != e; ++i)
+          Name += (char)Record[i];
+        CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
+                                                           Name));
+        break;
+      }
        }
      }
    }