Cleanups & efficiency improvements

[oota-llvm.git] / lib / VMCore / SlotCalculator.cpp
diff --git a/lib/VMCore/SlotCalculator.cpp b/lib/VMCore/SlotCalculator.cpp

index 0941b7607b3af149c2b49e203bb707024674480b..766b000271223647e7d2a78f0e575d13f0f964e2 100644 (file)
--- a/lib/VMCore/SlotCalculator.cpp
+++ b/lib/VMCore/SlotCalculator.cpp
@@ -1,4 +1,11 @@
  //===-- SlotCalculator.cpp - Calculate what slots values land in ----------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
  //
  // This file implements a useful analysis step to figure out what numbered 
  // slots values in a program will land in (keeping track of per plane
@@ -11,14 +18,15 @@
  
  #include "llvm/SlotCalculator.h"
  #include "llvm/Analysis/ConstantsScanner.h"
-#include "llvm/Module.h"
-#include "llvm/iOther.h"
-#include "llvm/Constant.h"
+#include "llvm/Constants.h"
  #include "llvm/DerivedTypes.h"
+#include "llvm/iOther.h"
+#include "llvm/Module.h"
  #include "llvm/SymbolTable.h"
-#include "Support/DepthFirstIterator.h"
+#include "Support/PostOrderIterator.h"
  #include "Support/STLExtras.h"
  #include <algorithm>
+using namespace llvm;
  
  #if 0
  #define SC_DEBUG(X) std::cerr << X
@@ -26,32 +34,34 @@
  #define SC_DEBUG(X)
  #endif
  
-SlotCalculator::SlotCalculator(const Module *M, bool IgnoreNamed) {
-  IgnoreNamedNodes = IgnoreNamed;
+SlotCalculator::SlotCalculator(const Module *M, bool buildBytecodeInfo) {
+  BuildBytecodeInfo = buildBytecodeInfo;
    TheModule = M;
  
    // Preload table... Make sure that all of the primitive types are in the table
    // and that their Primitive ID is equal to their slot #
    //
+  SC_DEBUG("Inserting primitive types:\n");
    for (unsigned i = 0; i < Type::FirstDerivedTyID; ++i) {
      assert(Type::getPrimitiveType((Type::PrimitiveID)i));
-    insertVal(Type::getPrimitiveType((Type::PrimitiveID)i), true);
+    insertValue(Type::getPrimitiveType((Type::PrimitiveID)i), true);
    }
  
    if (M == 0) return;   // Empty table...
    processModule();
  }
  
-SlotCalculator::SlotCalculator(const Function *M, bool IgnoreNamed) {
-  IgnoreNamedNodes = IgnoreNamed;
+SlotCalculator::SlotCalculator(const Function *M, bool buildBytecodeInfo) {
+  BuildBytecodeInfo = buildBytecodeInfo;
    TheModule = M ? M->getParent() : 0;
  
    // Preload table... Make sure that all of the primitive types are in the table
    // and that their Primitive ID is equal to their slot #
    //
+  SC_DEBUG("Inserting primitive types:\n");
    for (unsigned i = 0; i < Type::FirstDerivedTyID; ++i) {
      assert(Type::getPrimitiveType((Type::PrimitiveID)i));
-    insertVal(Type::getPrimitiveType((Type::PrimitiveID)i), true);
+    insertValue(Type::getPrimitiveType((Type::PrimitiveID)i), true);
    }
  
    if (TheModule == 0) return;   // Empty table...
@@ -71,30 +81,118 @@ void SlotCalculator::processModule() {
    //
    for (Module::const_giterator I = TheModule->gbegin(), E = TheModule->gend();
         I != E; ++I)
-    insertValue(I);
+    getOrCreateSlot(I);
  
    // Scavenge the types out of the functions, then add the functions themselves
    // to the value table...
    //
    for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
         I != E; ++I)
-    insertValue(I);
+    getOrCreateSlot(I);
  
    // Add all of the module level constants used as initializers
    //
    for (Module::const_giterator I = TheModule->gbegin(), E = TheModule->gend();
         I != E; ++I)
      if (I->hasInitializer())
-      insertValue(I->getInitializer());
+      getOrCreateSlot(I->getInitializer());
+
+  // Now that all global constants have been added, rearrange constant planes
+  // that contain constant strings so that the strings occur at the start of the
+  // plane, not somewhere in the middle.
+  //
+  if (BuildBytecodeInfo) {
+    TypePlane &Types = Table[Type::TypeTyID];
+    for (unsigned plane = 0, e = Table.size(); plane != e; ++plane) {
+      if (const ArrayType *AT = dyn_cast<ArrayType>(Types[plane]))
+        if (AT->getElementType() == Type::SByteTy ||
+            AT->getElementType() == Type::UByteTy) {
+          TypePlane &Plane = Table[plane];
+          unsigned FirstNonStringID = 0;
+          for (unsigned i = 0, e = Plane.size(); i != e; ++i)
+            if (cast<ConstantArray>(Plane[i])->isString()) {
+              // Check to see if we have to shuffle this string around.  If not,
+              // don't do anything.
+              if (i != FirstNonStringID) {
+                // Swap the plane entries....
+                std::swap(Plane[i], Plane[FirstNonStringID]);
+                
+                // Keep the NodeMap up to date.
+                NodeMap[Plane[i]] = i;
+                NodeMap[Plane[FirstNonStringID]] = FirstNonStringID;
+              }
+              ++FirstNonStringID;
+            }
+        }
+    }
+  }
+  
+#if 0
+  // FIXME: Empirically, this causes the bytecode files to get BIGGER, because
+  // it explodes the operand size numbers to be bigger than can be handled
+  // compactly, which offsets the ~40% savings in constant sizes.  Whoops.
+
+  // If we are emitting a bytecode file, scan all of the functions for their
+  // constants, which allows us to emit more compact modules.  This is optional,
+  // and is just used to compactify the constants used by different functions
+  // together.
+  if (BuildBytecodeInfo) {
+    SC_DEBUG("Inserting function constants:\n");
+    for (Module::const_iterator F = TheModule->begin(), E = TheModule->end();
+         F != E; ++F)
+      for_each(constant_begin(F), constant_end(F),
+               bind_obj(this, &SlotCalculator::getOrCreateSlot));
+  }
+#endif
  
    // Insert constants that are named at module level into the slot pool so that
    // the module symbol table can refer to them...
    //
-  if (!IgnoreNamedNodes) {
+  if (BuildBytecodeInfo) {
      SC_DEBUG("Inserting SymbolTable values:\n");
      processSymbolTable(&TheModule->getSymbolTable());
    }
  
+  // Now that we have collected together all of the information relevant to the
+  // module, compactify the type table if it is particularly big and outputting
+  // a bytecode file.  The basic problem we run into is that some programs have
+  // a large number of types, which causes the type field to overflow its size,
+  // which causes instructions to explode in size (particularly call
+  // instructions).  To avoid this behavior, we "sort" the type table so that
+  // all non-value types are pushed to the end of the type table, giving nice
+  // low numbers to the types that can be used by instructions, thus reducing
+  // the amount of explodage we suffer.
+  if (BuildBytecodeInfo && Table[Type::TypeTyID].size() >= 64) {
+    // Scan through the type table moving value types to the start of the table.
+    TypePlane *Types = &Table[Type::TypeTyID];
+    unsigned FirstNonValueTypeID = 0;
+    for (unsigned i = 0, e = Types->size(); i != e; ++i)
+      if (cast<Type>((*Types)[i])->isFirstClassType() ||
+          cast<Type>((*Types)[i])->isPrimitiveType()) {
+        // Check to see if we have to shuffle this type around.  If not, don't
+        // do anything.
+        if (i != FirstNonValueTypeID) {
+          assert(i != Type::TypeTyID && FirstNonValueTypeID != Type::TypeTyID &&
+                 "Cannot move around the type plane!");
+
+          // Swap the type ID's.
+          std::swap((*Types)[i], (*Types)[FirstNonValueTypeID]);
+
+          // Keep the NodeMap up to date.
+          NodeMap[(*Types)[i]] = i;
+          NodeMap[(*Types)[FirstNonValueTypeID]] = FirstNonValueTypeID;
+
+          // When we move a type, make sure to move its value plane as needed.
+          if (Table.size() > FirstNonValueTypeID) {
+            if (Table.size() <= i) Table.resize(i+1);
+            std::swap(Table[i], Table[FirstNonValueTypeID]);
+            Types = &Table[Type::TypeTyID];
+          }
+        }
+        ++FirstNonValueTypeID;
+      }
+  }
+
    SC_DEBUG("end processModule!\n");
  }
  
@@ -105,19 +203,19 @@ void SlotCalculator::processSymbolTable(const SymbolTable *ST) {
    for (SymbolTable::const_iterator I = ST->begin(), E = ST->end(); I != E; ++I)
      for (SymbolTable::type_const_iterator TI = I->second.begin(), 
            TE = I->second.end(); TI != TE; ++TI)
-      insertValue(TI->second);
+      getOrCreateSlot(TI->second);
  }
  
  void SlotCalculator::processSymbolTableConstants(const SymbolTable *ST) {
    for (SymbolTable::const_iterator I = ST->begin(), E = ST->end(); I != E; ++I)
      for (SymbolTable::type_const_iterator TI = I->second.begin(), 
            TE = I->second.end(); TI != TE; ++TI)
-      if (isa<Constant>(TI->second))
-       insertValue(TI->second);
+      if (isa<Constant>(TI->second) || isa<Type>(TI->second))
+       getOrCreateSlot(TI->second);
  }
  
  
-void SlotCalculator::incorporateFunction(const Function *M) {
+void SlotCalculator::incorporateFunction(const Function *F) {
    assert(ModuleLevel.size() == 0 && "Module already incorporated!");
  
    SC_DEBUG("begin processFunction!\n");
@@ -129,50 +227,39 @@ void SlotCalculator::incorporateFunction(const Function *M) {
    SC_DEBUG("Inserting function arguments\n");
  
    // Iterate over function arguments, adding them to the value table...
-  for(Function::const_aiterator I = M->abegin(), E = M->aend(); I != E; ++I)
-    insertValue(I);
+  for(Function::const_aiterator I = F->abegin(), E = F->aend(); I != E; ++I)
+    getOrCreateSlot(I);
  
    // Iterate over all of the instructions in the function, looking for constant
    // values that are referenced.  Add these to the value pools before any
    // nonconstant values.  This will be turned into the constant pool for the
    // bytecode writer.
    //
-  if (!IgnoreNamedNodes) {                // Assembly writer does not need this!
-    SC_DEBUG("Inserting function constants:\n";
-            for (constant_iterator I = constant_begin(M), E = constant_end(M);
-                 I != E; ++I) {
-              std::cerr << "  " << *I->getType() << " " << *I << "\n";
-            });
-
+  if (BuildBytecodeInfo) {                // Assembly writer does not need this!
      // Emit all of the constants that are being used by the instructions in the
      // function...
-    for_each(constant_begin(M), constant_end(M),
-            bind_obj(this, &SlotCalculator::insertValue));
+    for_each(constant_begin(F), constant_end(F),
+            bind_obj(this, &SlotCalculator::getOrCreateSlot));
  
      // If there is a symbol table, it is possible that the user has names for
      // constants that are not being used.  In this case, we will have problems
      // if we don't emit the constants now, because otherwise we will get 
-    // symboltable references to constants not in the output.  Scan for these
+    // symbol table references to constants not in the output.  Scan for these
      // constants now.
      //
-    processSymbolTableConstants(&M->getSymbolTable());
+    processSymbolTableConstants(&F->getSymbolTable());
    }
  
-  SC_DEBUG("Inserting Labels:\n");
-
-  // Iterate over basic blocks, adding them to the value table...
-  for (Function::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
-    insertValue(I);
-
    SC_DEBUG("Inserting Instructions:\n");
  
    // Add all of the instructions to the type planes...
-  for_each(inst_begin(M), inst_end(M),
-          bind_obj(this, &SlotCalculator::insertValue));
-
-  if (!IgnoreNamedNodes) {
-    SC_DEBUG("Inserting SymbolTable values:\n");
-    processSymbolTable(&M->getSymbolTable());
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+    getOrCreateSlot(BB);
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+      getOrCreateSlot(I);
+      if (const VANextInst *VAN = dyn_cast<VANextInst>(I))
+        getOrCreateSlot(VAN->getArgType());
+    }
    }
  
    SC_DEBUG("end processFunction!\n");
@@ -219,35 +306,57 @@ void SlotCalculator::purgeFunction() {
    SC_DEBUG("end purgeFunction!\n");
  }
  
-int SlotCalculator::getValSlot(const Value *D) const {
-  std::map<const Value*, unsigned>::const_iterator I = NodeMap.find(D);
-  if (I == NodeMap.end()) return -1;
- 
-  return (int)I->second;
+int SlotCalculator::getSlot(const Value *V) const {
+  std::map<const Value*, unsigned>::const_iterator I = NodeMap.find(V);
+  if (I != NodeMap.end())
+    return (int)I->second;
+
+  // Do not number ConstantPointerRef's at all.  They are an abomination.
+  if (const ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(V))
+    return getSlot(CPR->getValue());
+
+  return -1;
  }
  
  
-int SlotCalculator::insertValue(const Value *V) {
-  int SlotNo = getValSlot(V);        // Check to see if it's already in!
+int SlotCalculator::getOrCreateSlot(const Value *V) {
+  int SlotNo = getSlot(V);        // Check to see if it's already in!
    if (SlotNo != -1) return SlotNo;
  
+  // Do not number ConstantPointerRef's at all.  They are an abomination.
+  if (const ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(V))
+    return getOrCreateSlot(CPR->getValue());
+
    if (!isa<GlobalValue>(V))
      if (const Constant *C = dyn_cast<Constant>(V)) {
-      // This makes sure that if a constant has uses (for example an array of
-      // const ints), that they are inserted also.
-      //
-      for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
-           I != E; ++I)
-        insertValue(*I);
+      // If we are emitting a bytecode file, do not index the characters that
+      // make up constant strings.  We emit constant strings as special
+      // entities that don't require their individual characters to be emitted.
+      if (!BuildBytecodeInfo || !isa<ConstantArray>(C) ||
+          !cast<ConstantArray>(C)->isString()) {
+        // This makes sure that if a constant has uses (for example an array of
+        // const ints), that they are inserted also.
+        //
+        for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
+             I != E; ++I)
+          getOrCreateSlot(*I);
+      } else {
+        assert(ModuleLevel.empty() &&
+               "How can a constant string be directly accessed in a function?");
+        // Otherwise, if we are emitting a bytecode file and this IS a string,
+        // remember it.
+        if (!C->isNullValue())
+          ConstantStrings.push_back(cast<ConstantArray>(C));
+      }
      }
  
-  return insertVal(V);
+  return insertValue(V);
  }
  
  
-int SlotCalculator::insertVal(const Value *D, bool dontIgnore) {
+int SlotCalculator::insertValue(const Value *D, bool dontIgnore) {
    assert(D && "Can't insert a null value!");
-  assert(getValSlot(D) == -1 && "Value is already in the table!");
+  assert(getSlot(D) == -1 && "Value is already in the table!");
  
    // If this node does not contribute to a plane, or if the node has a 
    // name and we don't want names, then ignore the silly node... Note that types
@@ -255,7 +364,7 @@ int SlotCalculator::insertVal(const Value *D, bool dontIgnore) {
    //
    if (!dontIgnore)                               // Don't ignore nonignorables!
      if (D->getType() == Type::VoidTy ||          // Ignore void type nodes
-       (IgnoreNamedNodes &&                     // Ignore named and constants
+       (!BuildBytecodeInfo &&                   // Ignore named and constants
          (D->hasName() || isa<Constant>(D)) && !isa<Type>(D))) {
        SC_DEBUG("ignored value " << *D << "\n");
        return -1;                  // We do need types unconditionally though
@@ -270,38 +379,38 @@ int SlotCalculator::insertVal(const Value *D, bool dontIgnore) {
      //
      //    global { \2 * } { { \2 }* null }
      //
-    int ResultSlot;
-    if ((ResultSlot = getValSlot(TheTy)) == -1) {
-      ResultSlot = doInsertVal(TheTy);
-      SC_DEBUG("  Inserted type: " << TheTy->getDescription() << " slot=" <<
-              ResultSlot << "\n");
-    }
+    int ResultSlot = doInsertValue(TheTy);
+    SC_DEBUG("  Inserted type: " << TheTy->getDescription() << " slot=" <<
+             ResultSlot << "\n");
  
-    // Loop over any contained types in the definition... in depth first order.
+    // Loop over any contained types in the definition... in post
+    // order.
      //
-    for (df_iterator<const Type*> I = df_begin(TheTy), E = df_end(TheTy);
-         I != E; ++I)
+    for (po_iterator<const Type*> I = po_begin(TheTy), E = po_end(TheTy);
+         I != E; ++I) {
        if (*I != TheTy) {
+        const Type *SubTy = *I;
         // If we haven't seen this sub type before, add it to our type table!
-       const Type *SubTy = *I;
-       if (getValSlot(SubTy) == -1) {
-         SC_DEBUG("  Inserting subtype: " << SubTy->getDescription() << "\n");
-         int Slot = doInsertVal(SubTy);
-         SC_DEBUG("  Inserted subtype: " << SubTy->getDescription() << 
-                  " slot=" << Slot << "\n");
-       }
+        if (getSlot(SubTy) == -1) {
+          SC_DEBUG("  Inserting subtype: " << SubTy->getDescription() << "\n");
+          int Slot = doInsertValue(SubTy);
+          SC_DEBUG("  Inserted subtype: " << SubTy->getDescription() << 
+                   " slot=" << Slot << "\n");
+        }
        }
+    }
      return ResultSlot;
    }
  
    // Okay, everything is happy, actually insert the silly value now...
-  return doInsertVal(D);
+  return doInsertValue(D);
  }
  
  
-// doInsertVal - This is a small helper function to be called only be insertVal.
+// doInsertValue - This is a small helper function to be called only
+// be insertValue.
  //
-int SlotCalculator::doInsertVal(const Value *D) {
+int SlotCalculator::doInsertValue(const Value *D) {
    const Type *Typ = D->getType();
    unsigned Ty;
  
@@ -310,10 +419,10 @@ int SlotCalculator::doInsertVal(const Value *D) {
    //  cerr << "Inserting type '" << cast<Type>(D)->getDescription() << "'!\n";
  
    if (Typ->isDerivedType()) {
-    int ValSlot = getValSlot(Typ);
+    int ValSlot = getSlot(Typ);
      if (ValSlot == -1) {                // Have we already entered this type?
        // Nope, this is the first we have seen the type, process it.
-      ValSlot = insertVal(Typ, true);
+      ValSlot = insertValue(Typ, true);
        assert(ValSlot != -1 && "ProcessType returned -1 for a type?");
      }
      Ty = (unsigned)ValSlot;
@@ -326,7 +435,7 @@ int SlotCalculator::doInsertVal(const Value *D) {
  
    // If this is the first value to get inserted into the type plane, make sure
    // to insert the implicit null value...
-  if (Table[Ty].empty() && Ty >= Type::FirstDerivedTyID && !IgnoreNamedNodes) {
+  if (Table[Ty].empty() && Ty >= Type::FirstDerivedTyID && BuildBytecodeInfo) {
      Value *ZeroInitializer = Constant::getNullValue(Typ);
  
      // If we are pushing zeroinit, it will be handled below.