Emit & read more compressed bytecode by not emitting a bytecodeblock for
authorChris Lattner <sabre@nondot.org>
Mon, 1 Dec 2003 07:05:31 +0000 (07:05 +0000)
committerChris Lattner <sabre@nondot.org>
Mon, 1 Dec 2003 07:05:31 +0000 (07:05 +0000)
each basic block in function.  Instead, just emit a stream of instructions,
chopping up basic blocks based on when we find terminator instructions.  This
saves a fairly substantial chunk of bytecode space.  In stripped, sample
cases, for example, we get this reduction in size:

197.parser: 163036 -> 137180:  18.8% reduction
254.gap   : 844936 -> 689392:  22.6%
255.vortex: 621724 -> 528444:  17.7%

...

Not bad for something this simple.  :)  Note that this doesn't require a new
bytecode version number at all, though version 1.1 should not need to support
the old format.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@10280 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Bytecode/Reader/Reader.cpp
lib/Bytecode/Reader/ReaderInternals.h
lib/Bytecode/Writer/Writer.cpp
lib/Bytecode/Writer/WriterInternals.h

index ed8f9e804c9aee9e115088058c1005f39f804952..097d548b98e1e819b1f56120b97167dae784a77e 100644 (file)
@@ -188,7 +188,8 @@ Constant *BytecodeParser::getConstantValue(unsigned TypeSlot, unsigned Slot) {
   }
 }
 
-
+/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one
+/// basicblock at a time.  This method reads in one of the basicblock packets.
 BasicBlock *BytecodeParser::ParseBasicBlock(const unsigned char *&Buf,
                                             const unsigned char *EndBuf,
                                             unsigned BlockNo) {
@@ -207,6 +208,38 @@ BasicBlock *BytecodeParser::ParseBasicBlock(const unsigned char *&Buf,
   return BB;
 }
 
+
+/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the
+/// body of a function.  In post 1.0 bytecode files, we no longer emit basic
+/// block individually, in order to avoid per-basic-block overhead.
+unsigned BytecodeParser::ParseInstructionList(Function *F,
+                                              const unsigned char *&Buf,
+                                              const unsigned char *EndBuf) {
+  unsigned BlockNo = 0;
+  std::vector<unsigned> Args;
+
+  while (Buf < EndBuf) {
+    BasicBlock *BB;
+    if (ParsedBasicBlocks.size() == BlockNo)
+      ParsedBasicBlocks.push_back(BB = new BasicBlock());
+    else if (ParsedBasicBlocks[BlockNo] == 0)
+      BB = ParsedBasicBlocks[BlockNo] = new BasicBlock();
+    else
+      BB = ParsedBasicBlocks[BlockNo];
+    ++BlockNo;
+    F->getBasicBlockList().push_back(BB);
+
+    // Read instructions into this basic block until we get to a terminator
+    while (Buf < EndBuf && !BB->getTerminator())
+      ParseInstruction(Buf, EndBuf, Args, BB);
+
+    if (!BB->getTerminator())
+      throw std::string("Non-terminated basic block found!");
+  }
+
+  return BlockNo;
+}
+
 void BytecodeParser::ParseSymbolTable(const unsigned char *&Buf,
                                       const unsigned char *EndBuf,
                                       SymbolTable *ST,
@@ -345,6 +378,13 @@ void BytecodeParser::materializeFunction(Function* F) {
       break;
     }
 
+    case BytecodeFormat::InstructionList: {
+      BCR_TRACE(2, "BLOCK BytecodeFormat::InstructionList: {\n");
+      if (BlockNum) throw std::string("Already parsed basic blocks!");
+      BlockNum = ParseInstructionList(F, Buf, Buf+Size);
+      break;
+    }
+
     case BytecodeFormat::SymbolTable:
       BCR_TRACE(2, "BLOCK BytecodeFormat::SymbolTable: {\n");
       ParseSymbolTable(Buf, Buf+Size, &F->getSymbolTable(), F);
index aea45c2bed7d6c1c466146c8f04ff3c8c3609286..fd0a1ed0082753182bc5253ecdffc0ae3969e606 100644 (file)
@@ -162,7 +162,9 @@ private:
   BasicBlock *ParseBasicBlock(const unsigned char *&Buf,
                               const unsigned char *End,
                               unsigned BlockNo);
-
+  unsigned ParseInstructionList(Function *F, const unsigned char *&Buf,
+                                const unsigned char *EndBuf);
+  
   void ParseInstruction(const unsigned char *&Buf, const unsigned char *End,
                         std::vector<unsigned> &Args, BasicBlock *BB);
 
index 9c9e1abcdd82576bc6cba12e109d0d809d84056c..7fa22b81017430601786503ca64615abe1ef23be 100644 (file)
@@ -225,9 +225,13 @@ void BytecodeWriter::outputFunction(const Function *F) {
     // Output information about the constants in the function...
     outputConstants(true);
 
-    // Output basic block nodes...
-    for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
-      processBasicBlock(*I);
+    {  // Output all of the instructions in the body of the function
+      BytecodeBlock ILBlock(BytecodeFormat::InstructionList, Out);
+
+      for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E;++BB)
+        for(BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I)
+          processInstruction(*I);
+    }
     
     // If needed, output the symbol table for the function...
     outputSymbolTable(F->getSymbolTable());
@@ -236,14 +240,6 @@ void BytecodeWriter::outputFunction(const Function *F) {
   }
 }
 
-
-void BytecodeWriter::processBasicBlock(const BasicBlock &BB) {
-  BytecodeBlock FunctionBlock(BytecodeFormat::BasicBlock, Out);
-  // Process all the instructions in the bb...
-  for(BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I)
-    processInstruction(*I);
-}
-
 void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) {
   BytecodeBlock FunctionBlock(BytecodeFormat::SymbolTable, Out);
 
index 8cb4bfd8d6cea47e2033a4e607e41d49e499f475..29465157c7c7ffd0f49d0ac910d629d5dead0029 100644 (file)
@@ -36,7 +36,6 @@ public:
 protected:
   void outputConstants(bool isFunction);
   void outputFunction(const Function *F);
-  void processBasicBlock(const BasicBlock &BB);
   void processInstruction(const Instruction &I);
 
 private :