where possible, encode symtab names with 7 bits per char instead of 8. This
authorChris Lattner <sabre@nondot.org>
Fri, 4 May 2007 20:52:02 +0000 (20:52 +0000)
committerChris Lattner <sabre@nondot.org>
Fri, 4 May 2007 20:52:02 +0000 (20:52 +0000)
shaves 110K off kc++ to 3514K.  Before:

  Block ID #14 (VALUE_SYMTAB):
      Num Instances: 2345
         Total Size: 1.50425e+07b/1.88031e+06B/470077W
       Average Size: 6414.69b/801.837B/200.459W
          % of file: 51.8057
  Tot/Avg SubBlocks: 0/0
    Tot/Avg Abbrevs: 2345/1
    Tot/Avg Records: 120924/51.5667
      % Abbrev Recs: 85.1791

after:

  Block ID #14 (VALUE_SYMTAB):
      Num Instances: 2345
         Total Size: 1.41229e+07b/1.76536e+06B/441341W
       Average Size: 6022.56b/752.82B/188.205W
          % of file: 50.2295
  Tot/Avg SubBlocks: 0/0
    Tot/Avg Abbrevs: 4690/2
    Tot/Avg Records: 120924/51.5667
      % Abbrev Recs: 85.1791

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36758 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Bitcode/Writer/BitcodeWriter.cpp

index 63d9ae4ae713d7f78ee398e834e4c1f9ea644c4a..172db3ac785725a27446bfa1f36d6bb2b43ec8b1 100644 (file)
 #include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
-static const unsigned CurVersion = 0;
+/// These are manifest constants used by the bitcode writer. They do not need to
+/// be kept in sync with the reader, but need to be consistent within this file.
+enum {
+  CurVersion = 0,
+  
+  // VALUE_SYMTAB_BLOCK abbrev id's.
+  VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+  VST_ENTRY_7_ABBREV
+  
+};
+
 
 static unsigned GetEncodedCastOpcode(unsigned Opcode) {
   switch (Opcode) {
@@ -703,13 +713,25 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
   if (VST.empty()) return;
   Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 3);
 
-  // 8-bit fixed width VST_ENTRY strings.
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
-  Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
-  unsigned AbbrevID = Stream.EmitAbbrev(Abbv);
+  { // 8-bit fixed width VST_ENTRY strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+    if (Stream.EmitAbbrev(Abbv) != VST_ENTRY_8_ABBREV)
+      assert(0 && "Unexpected abbrev ordering!");
+  }
+  
+  { // 7-bit fixed width VST_ENTRY strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+    if (Stream.EmitAbbrev(Abbv) != VST_ENTRY_7_ABBREV)
+      assert(0 && "Unexpected abbrev ordering!");
+  }
   
   
   // FIXME: Set up the abbrev, we know how many values there are!
@@ -718,6 +740,18 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
   
   for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
        SI != SE; ++SI) {
+    
+    const ValueName &Name = *SI;
+    
+    // Figure out the encoding to use for the name.
+    bool is7Bit = true;
+    for (unsigned i = 0, e = Name.getKeyLength(); i != e; ++i)
+      if ((unsigned char)Name.getKeyData()[i] & 128) {
+        is7Bit = false;
+        break;
+      }
+    
+    
     unsigned AbbrevToUse = 0;
     
     // VST_ENTRY:   [valueid, namelen, namechar x N]
@@ -727,12 +761,12 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
       Code = bitc::VST_CODE_BBENTRY;
     } else {
       Code = bitc::VST_CODE_ENTRY;
-      AbbrevToUse = AbbrevID;
+      AbbrevToUse = is7Bit ? VST_ENTRY_7_ABBREV : VST_ENTRY_8_ABBREV;
     }
     
     NameVals.push_back(VE.getValueID(SI->getValue()));
-    for (const char *P = SI->getKeyData(),
-         *E = SI->getKeyData()+SI->getKeyLength(); P != E; ++P)
+    for (const char *P = Name.getKeyData(),
+         *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
       NameVals.push_back((unsigned char)*P);
     
     // Emit the finished record.