Hexagon V60/HVX DFA scheduler support
[oota-llvm.git] / utils / TableGen / DFAPacketizerEmitter.cpp
index 5060b6e9ce7c0b059fac364b563fa3b1a849ef68..fc70e97d7e21d8715ad01741e9b6c52f4f0c926b 100644 (file)
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "dfa-emitter"
+
 #include "CodeGenTarget.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/DFAPacketizerDefs.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
+#include "llvm/Support/Debug.h"
 #include <list>
 #include <map>
 #include <string>
+#include <queue>
 using namespace llvm;
 
+// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
+//
+// dbgsInsnClass - When debugging, print instruction class stages.
+//
+void dbgsInsnClass(const std::vector<unsigned> &InsnClass);
+//
+// dbgsStateInfo - When debugging, print the set of state info.
+//
+void dbgsStateInfo(const std::set<unsigned> &stateInfo);
+//
+// dbgsIndent - When debugging, indent by the specified amount.
+//
+void dbgsIndent(unsigned indent);
+
 //
 // class DFAPacketizerEmitter: class that generates and prints out the DFA
 // for resource tracking.
@@ -37,20 +57,48 @@ private:
   // allInsnClasses is the set of all possible resources consumed by an
   // InstrStage.
   //
-  DenseSet<unsigned> allInsnClasses;
+  std::vector<std::vector<unsigned>> allInsnClasses;
   RecordKeeper &Records;
 
 public:
   DFAPacketizerEmitter(RecordKeeper &R);
 
   //
-  // collectAllInsnClasses: Populate allInsnClasses which is a set of units
+  // collectAllFuncUnits - Construct a map of function unit names to bits.
+  //
+  int collectAllFuncUnits(std::vector<Record*> &ProcItinList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           int &maxResources,
+                           raw_ostream &OS);
+
+  //
+  // collectAllComboFuncs - Construct a map from a combo function unit bit to
+  //                        the bits of all included functional units.
+  //
+  int collectAllComboFuncs(std::vector<Record*> &ComboFuncList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                           raw_ostream &OS);
+
+  //
+  // collectOneInsnClass - Populate allInsnClasses with one instruction class.
+  //
+  int collectOneInsnClass(const std::string &ProcName,
+                           std::vector<Record*> &ProcItinList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           Record *ItinData,
+                           raw_ostream &OS);
+
+  //
+  // collectAllInsnClasses - Populate allInsnClasses which is a set of units
   // used in each stage.
   //
-  void collectAllInsnClasses(const std::string &Name,
-                             Record *ItinData,
-                             unsigned &NStages,
-                             raw_ostream &OS);
+  int collectAllInsnClasses(const std::string &ProcName,
+                           std::vector<Record*> &ProcItinList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           std::vector<Record*> &ItinDataList,
+                           int &maxStages,
+                           raw_ostream &OS);
 
   void run(raw_ostream &OS);
 };
@@ -87,7 +135,7 @@ class State {
   const int stateNum;
   mutable bool isInitial;
   mutable std::set<unsigned> stateInfo;
-  typedef std::map<unsigned, const State *> TransitionMap;
+  typedef std::map<std::vector<unsigned>, const State *> TransitionMap;
   mutable TransitionMap Transitions;
 
   State();
@@ -97,28 +145,47 @@ class State {
   }
 
   //
-  // canAddInsnClass - Returns true if an instruction of type InsnClass is a
-  // valid transition from this state, i.e., can an instruction of type InsnClass
-  // be added to the packet represented by this state.
+  // canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
+  // may be a valid transition from this state i.e., can an instruction of type
+  // InsnClass be added to the packet represented by this state.
+  //
+  // Note that for multiple stages, this quick check does not take into account
+  // any possible resource competition between the stages themselves.  That is
+  // enforced in AddInsnClassStages which checks the cross product of all
+  // stages for resource availability (which is a more involved check).
+  //
+  bool canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap) const;
+  //
+  // AddInsnClass - Return all combinations of resource reservation
+  // which are possible from this state (PossibleStates).
   //
   // PossibleStates is the set of valid resource states that ensue from valid
   // transitions.
   //
-  bool canAddInsnClass(unsigned InsnClass) const;
+  void AddInsnClass(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        std::set<unsigned> &PossibleStates) const;
   //
-  // AddInsnClass - Return all combinations of resource reservation
+  // AddInsnClassStages - Return all combinations of resource reservation
+  // resulting from the cross product of all stages for this InsnClass
   // which are possible from this state (PossibleStates).
   //
-  void AddInsnClass(unsigned InsnClass, std::set<unsigned> &PossibleStates) const;
-  // 
+  void AddInsnClassStages(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        unsigned chkstage, unsigned numstages,
+                        unsigned prevState, unsigned origState,
+                        DenseSet<unsigned> &VisitedResourceStates,
+                        std::set<unsigned> &PossibleStates) const;
+  //
   // addTransition - Add a transition from this state given the input InsnClass
   //
-  void addTransition(unsigned InsnClass, const State *To) const;
+  void addTransition(std::vector<unsigned> InsnClass, const State *To) const;
   //
   // hasTransition - Returns true if there is a transition from this state
   // given the input InsnClass
   //
-  bool hasTransition(unsigned InsnClass) const;
+  bool hasTransition(std::vector<unsigned> InsnClass) const;
 };
 } // End anonymous namespace.
 
@@ -144,10 +211,52 @@ public:
   //
   // writeTable: Print out a table representing the DFA.
   //
-  void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName);
+  void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName,
+                 int numInsnClasses = 0,
+                 int maxResources = 0, int numCombos = 0, int maxStages = 0);
 };
 } // End anonymous namespace.
 
+// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
+//
+// dbgsInsnClass - When debugging, print instruction class stages.
+//
+void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
+  DEBUG(dbgs() << "InsnClass: ");
+  for (unsigned i = 0; i < InsnClass.size(); ++i) {
+    if (i > 0) {
+      DEBUG(dbgs() << ", ");
+    }
+    DEBUG(dbgs() << "0x" << utohexstr(InsnClass[i]));
+  }
+  DFAInput InsnInput = getDFAInsnInput(InsnClass);
+  DEBUG(dbgs() << " (input: 0x" << utohexstr(InsnInput) << ")");
+}
+
+//
+// dbgsStateInfo - When debugging, print the set of state info.
+//
+void dbgsStateInfo(const std::set<unsigned> &stateInfo) {
+  DEBUG(dbgs() << "StateInfo: ");
+  unsigned i = 0;
+  for (std::set<unsigned>::iterator SI = stateInfo.begin();
+       SI != stateInfo.end(); ++SI, ++i) {
+    unsigned thisState = *SI;
+    if (i > 0) {
+      DEBUG(dbgs() << ", ");
+    }
+    DEBUG(dbgs() << "0x" << utohexstr(thisState));
+  }
+}
+
+//
+// dbgsIndent - When debugging, indent by the specified amount.
+//
+void dbgsIndent(unsigned indent) {
+  for (unsigned i = 0; i < indent; ++i) {
+    DEBUG(dbgs() << " ");
+  }
+}
 
 //
 // Constructors and destructors for State and DFA
@@ -157,10 +266,11 @@ State::State() :
 
 DFA::DFA(): currentState(nullptr) {}
 
-// 
+//
 // addTransition - Add a transition from this state given the input InsnClass
 //
-void State::addTransition(unsigned InsnClass, const State *To) const {
+void State::addTransition(std::vector<unsigned> InsnClass, const State *To)
+      const {
   assert(!Transitions.count(InsnClass) &&
       "Cannot have multiple transitions for the same input");
   Transitions[InsnClass] = To;
@@ -170,7 +280,7 @@ void State::addTransition(unsigned InsnClass, const State *To) const {
 // hasTransition - Returns true if there is a transition from this state
 // given the input InsnClass
 //
-bool State::hasTransition(unsigned InsnClass) const {
+bool State::hasTransition(std::vector<unsigned> InsnClass) const {
   return Transitions.count(InsnClass) > 0;
 }
 
@@ -178,61 +288,167 @@ bool State::hasTransition(unsigned InsnClass) const {
 // AddInsnClass - Return all combinations of resource reservation
 // which are possible from this state (PossibleStates).
 //
-void State::AddInsnClass(unsigned InsnClass,
-                            std::set<unsigned> &PossibleStates) const {
+// PossibleStates is the set of valid resource states that ensue from valid
+// transitions.
+//
+void State::AddInsnClass(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        std::set<unsigned> &PossibleStates) const {
   //
   // Iterate over all resource states in currentState.
   //
+  unsigned numstages = InsnClass.size();
+  assert((numstages > 0) && "InsnClass has no stages");
 
   for (std::set<unsigned>::iterator SI = stateInfo.begin();
        SI != stateInfo.end(); ++SI) {
     unsigned thisState = *SI;
 
-    //
-    // Iterate over all possible resources used in InsnClass.
-    // For ex: for InsnClass = 0x11, all resources = {0x01, 0x10}.
-    //
-
     DenseSet<unsigned> VisitedResourceStates;
-    for (unsigned int j = 0; j < sizeof(InsnClass) * 8; ++j) {
-      if ((0x1 << j) & InsnClass) {
-        //
-        // For each possible resource used in InsnClass, generate the
-        // resource state if that resource was used.
-        //
-        unsigned ResultingResourceState = thisState | (0x1 << j);
+
+    DEBUG(dbgs() << "  thisState: 0x" << utohexstr(thisState) << "\n");
+    AddInsnClassStages(InsnClass, ComboBitToBitsMap,
+                                numstages - 1, numstages,
+                                thisState, thisState,
+                                VisitedResourceStates, PossibleStates);
+  }
+}
+
+void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        unsigned chkstage, unsigned numstages,
+                        unsigned prevState, unsigned origState,
+                        DenseSet<unsigned> &VisitedResourceStates,
+                        std::set<unsigned> &PossibleStates) const {
+
+  assert((chkstage < numstages) && "AddInsnClassStages: stage out of range");
+  unsigned thisStage = InsnClass[chkstage];
+
+  dbgsIndent((1 + numstages - chkstage) << 1);
+  DEBUG(dbgs() << "AddInsnClassStages " << chkstage
+               << " (0x" << utohexstr(thisStage) << ") from ");
+  dbgsInsnClass(InsnClass);
+  DEBUG(dbgs() << "\n");
+
+  //
+  // Iterate over all possible resources used in thisStage.
+  // For ex: for thisStage = 0x11, all resources = {0x01, 0x10}.
+  //
+  for (unsigned int j = 0; j < DFA_MAX_RESOURCES; ++j) {
+    unsigned resourceMask = (0x1 << j);
+    if (resourceMask & thisStage) {
+      unsigned combo = ComboBitToBitsMap[resourceMask];
+      if (combo && ((~prevState & combo) != combo)) {
+        DEBUG(dbgs() << "\tSkipped Add 0x" << utohexstr(prevState)
+                     << " - combo op 0x" << utohexstr(resourceMask)
+                     << " (0x" << utohexstr(combo) <<") cannot be scheduled\n");
+        continue;
+      }
+      //
+      // For each possible resource used in thisStage, generate the
+      // resource state if that resource was used.
+      //
+      unsigned ResultingResourceState = prevState | resourceMask | combo;
+      dbgsIndent((2 + numstages - chkstage) << 1);
+      DEBUG(dbgs() << "0x" << utohexstr(prevState)
+                   << " | 0x" << utohexstr(resourceMask));
+      if (combo) {
+        DEBUG(dbgs() << " | 0x" << utohexstr(combo));
+      }
+      DEBUG(dbgs() << " = 0x" << utohexstr(ResultingResourceState) << " ");
+
+      //
+      // If this is the final stage for this class
+      //
+      if (chkstage == 0) {
         //
         // Check if the resulting resource state can be accommodated in this
         // packet.
-        // We compute ResultingResourceState OR thisState.
-        // If the result of the OR is different than thisState, it implies
+        // We compute resource OR prevState (originally started as origState).
+        // If the result of the OR is different than origState, it implies
         // that there is at least one resource that can be used to schedule
-        // InsnClass in the current packet.
+        // thisStage in the current packet.
         // Insert ResultingResourceState into PossibleStates only if we haven't
         // processed ResultingResourceState before.
         //
-        if ((ResultingResourceState != thisState) &&
-            (VisitedResourceStates.count(ResultingResourceState) == 0)) {
-          VisitedResourceStates.insert(ResultingResourceState);
-          PossibleStates.insert(ResultingResourceState);
+        if (ResultingResourceState != prevState) {
+          if (VisitedResourceStates.count(ResultingResourceState) == 0) {
+            VisitedResourceStates.insert(ResultingResourceState);
+            PossibleStates.insert(ResultingResourceState);
+            DEBUG(dbgs() << "\tResultingResourceState: 0x"
+                         << utohexstr(ResultingResourceState) << "\n");
+          } else {
+            DEBUG(dbgs() << "\tSkipped Add - state already seen\n");
+          }
+        } else {
+          DEBUG(dbgs() << "\tSkipped Add - no final resources available\n");
+        }
+      } else {
+        //
+        // If the current resource can be accommodated, check the next
+        // stage in InsnClass for available resources.
+        //
+        if (ResultingResourceState != prevState) {
+          DEBUG(dbgs() << "\n");
+          AddInsnClassStages(InsnClass, ComboBitToBitsMap,
+                                chkstage - 1, numstages,
+                                ResultingResourceState, origState,
+                                VisitedResourceStates, PossibleStates);
+        } else {
+          DEBUG(dbgs() << "\tSkipped Add - no resources available\n");
         }
       }
     }
   }
-
 }
 
 
 //
-// canAddInsnClass - Quickly verifies if an instruction of type InsnClass is a
-// valid transition from this state i.e., can an instruction of type InsnClass
-// be added to the packet represented by this state.
+// canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
+// may be a valid transition from this state i.e., can an instruction of type
+// InsnClass be added to the packet represented by this state.
 //
-bool State::canAddInsnClass(unsigned InsnClass) const {
+// Note that this routine is performing conservative checks that can be
+// quickly executed acting as a filter before calling AddInsnClassStages.
+// Any cases allowed through here will be caught later in AddInsnClassStages
+// which performs the more expensive exact check.
+//
+bool State::canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
+                    std::map<unsigned, unsigned> &ComboBitToBitsMap) const {
   for (std::set<unsigned>::const_iterator SI = stateInfo.begin();
        SI != stateInfo.end(); ++SI) {
-    if (~*SI & InsnClass)
+
+    // Check to see if all required resources are available.
+    bool available = true;
+
+    // Inspect each stage independently.
+    // note: This is a conservative check as we aren't checking for
+    //       possible resource competition between the stages themselves
+    //       The full cross product is examined later in AddInsnClass.
+    for (unsigned i = 0; i < InsnClass.size(); ++i) {
+      unsigned resources = *SI;
+      if ((~resources & InsnClass[i]) == 0) {
+        available = false;
+        break;
+      }
+      // Make sure _all_ resources for a combo function are available.
+      // note: This is a quick conservative check as it won't catch an
+      //       unscheduleable combo if this stage is an OR expression
+      //       containing a combo.
+      //       These cases are caught later in AddInsnClass.
+      unsigned combo = ComboBitToBitsMap[InsnClass[i]];
+      if (combo && ((~resources & combo) != combo)) {
+        DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" << utohexstr(resources)
+                     << " - combo op 0x" << utohexstr(InsnClass[i])
+                     << " (0x" << utohexstr(combo) <<") cannot be scheduled\n");
+        available = false;
+        break;
+      }
+    }
+
+    if (available) {
       return true;
+    }
   }
   return false;
 }
@@ -244,7 +460,6 @@ const State &DFA::newState() {
   return *IterPair.first;
 }
 
-
 int State::currentStateNum = 0;
 
 DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
@@ -263,57 +478,100 @@ DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
 //                         the ith state.
 //
 //
-void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
-  static const std::string SentinelEntry = "{-1, -1}";
-  DFA::StateSet::iterator SI = states.begin();
+void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
+                           int numInsnClasses,
+                           int maxResources, int numCombos, int maxStages) {
+
+  unsigned numStates = states.size();
+
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+  DEBUG(dbgs() << "writeTableAndAPI\n");
+  DEBUG(dbgs() << "Total states: " << numStates << "\n");
+
+  OS << "namespace llvm {\n";
+
+  OS << "\n// Input format:\n";
+  OS << "#define DFA_MAX_RESTERMS        " << DFA_MAX_RESTERMS
+     << "\t// maximum AND'ed resource terms\n";
+  OS << "#define DFA_MAX_RESOURCES       " << DFA_MAX_RESOURCES
+     << "\t// maximum resource bits in one term\n";
+
+  OS << "\n// " << TargetName << "DFAStateInputTable[][2] = "
+     << "pairs of <Input, NextState> for all valid\n";
+  OS << "//                           transitions.\n";
+  OS << "// " << numStates << "\tstates\n";
+  OS << "// " << numInsnClasses << "\tinstruction classes\n";
+  OS << "// " << maxResources << "\tresources max\n";
+  OS << "// " << numCombos << "\tcombo resources\n";
+  OS << "// " << maxStages << "\tstages max\n";
+  OS << "const " << DFA_TBLTYPE << " "
+     << TargetName << "DFAStateInputTable[][2] = {\n";
+
   // This table provides a map to the beginning of the transitions for State s
   // in DFAStateInputTable.
-  std::vector<int> StateEntry(states.size());
-
-  OS << "namespace llvm {\n\n";
-  OS << "const int " << TargetName << "DFAStateInputTable[][2] = {\n";
+  std::vector<int> StateEntry(numStates+1);
+  static const std::string SentinelEntry = "{-1, -1}";
 
   // Tracks the total valid transitions encountered so far. It is used
   // to construct the StateEntry table.
   int ValidTransitions = 0;
-  for (unsigned i = 0; i < states.size(); ++i, ++SI) {
+  DFA::StateSet::iterator SI = states.begin();
+  for (unsigned i = 0; i < numStates; ++i, ++SI) {
     assert ((SI->stateNum == (int) i) && "Mismatch in state numbers");
     StateEntry[i] = ValidTransitions;
     for (State::TransitionMap::iterator
         II = SI->Transitions.begin(), IE = SI->Transitions.end();
         II != IE; ++II) {
-      OS << "{" << II->first << ", "
+      OS << "{0x" << utohexstr(getDFAInsnInput(II->first)) << ", "
          << II->second->stateNum
-         << "},    ";
+         << "},\t";
     }
     ValidTransitions += SI->Transitions.size();
 
     // If there are no valid transitions from this stage, we need a sentinel
     // transition.
     if (ValidTransitions == StateEntry[i]) {
-      OS << SentinelEntry << ",";
+      OS << SentinelEntry << ",\t";
       ++ValidTransitions;
     }
 
+    OS << " // state " << i << ": " << StateEntry[i];
+    if (StateEntry[i] != (ValidTransitions-1)) {   // More than one transition.
+       OS << "-" << (ValidTransitions-1);
+    }
     OS << "\n";
   }
 
   // Print out a sentinel entry at the end of the StateInputTable. This is
   // needed to iterate over StateInputTable in DFAPacketizer::ReadTable()
-  OS << SentinelEntry << "\n";
-  
+  OS << SentinelEntry << "\t";
+  OS << " // state " << numStates << ": " << ValidTransitions;
+  OS << "\n";
+
   OS << "};\n\n";
+  OS << "// " << TargetName << "DFAStateEntryTable[i] = "
+     << "Index of the first entry in DFAStateInputTable for\n";
+  OS << "//                         "
+     << "the ith state.\n";
+  OS << "// " << numStates << " states\n";
   OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
 
   // Multiply i by 2 since each entry in DFAStateInputTable is a set of
   // two numbers.
-  for (unsigned i = 0; i < states.size(); ++i)
+  unsigned lastState = 0;
+  for (unsigned i = 0; i < numStates; ++i) {
+    if (i && ((i % 10) == 0)) {
+        lastState = i-1;
+        OS << "   // states " << (i-10) << ":" << lastState << "\n";
+    }
     OS << StateEntry[i] << ", ";
+  }
 
   // Print out the index to the sentinel entry in StateInputTable
   OS << ValidTransitions << ", ";
+  OS << "   // states " << (lastState+1) << ":" << numStates << "\n";
 
-  OS << "\n};\n";
+  OS << "};\n";
   OS << "} // namespace\n";
 
 
@@ -332,40 +590,123 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
 
 
 //
-// collectAllInsnClasses - Populate allInsnClasses which is a set of units
-// used in each stage.
+// collectAllFuncUnits - Construct a map of function unit names to bits.
 //
-void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name,
-                                  Record *ItinData,
-                                  unsigned &NStages,
-                                  raw_ostream &OS) {
-  // Collect processor itineraries.
-  std::vector<Record*> ProcItinList =
-    Records.getAllDerivedDefinitions("ProcessorItineraries");
-
-  // If just no itinerary then don't bother.
-  if (ProcItinList.size() < 2)
-    return;
-  std::map<std::string, unsigned> NameToBitsMap;
+int DFAPacketizerEmitter::collectAllFuncUnits(
+                            std::vector<Record*> &ProcItinList,
+                            std::map<std::string, unsigned> &FUNameToBitsMap,
+                            int &maxFUs,
+                            raw_ostream &OS) {
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+  DEBUG(dbgs() << "collectAllFuncUnits");
+  DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
 
+  int totalFUs = 0;
   // Parse functional units for all the itineraries.
   for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
     Record *Proc = ProcItinList[i];
+    const std::string &ProcName = Proc->getName();
     std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
 
+    DEBUG(dbgs() << "    FU:" << i
+                 << " (" << FUs.size() << " FUs) "
+                 << ProcName);
+
+
+    // Convert macros to bits for each stage.
+    unsigned numFUs = FUs.size();
+    for (unsigned j = 0; j < numFUs; ++j) {
+      assert ((j < DFA_MAX_RESOURCES) &&
+                      "Exceeded maximum number of representable resources");
+      unsigned FuncResources = (unsigned) (1U << j);
+      FUNameToBitsMap[FUs[j]->getName()] = FuncResources;
+      DEBUG(dbgs() << " " << FUs[j]->getName()
+                   << ":0x" << utohexstr(FuncResources));
+    }
+    if (((int) numFUs) > maxFUs) {
+      maxFUs = numFUs;
+    }
+    totalFUs += numFUs;
+    DEBUG(dbgs() << "\n");
+  }
+  return totalFUs;
+}
+
+//
+// collectAllComboFuncs - Construct a map from a combo function unit bit to
+//                        the bits of all included functional units.
+//
+int DFAPacketizerEmitter::collectAllComboFuncs(
+                            std::vector<Record*> &ComboFuncList,
+                            std::map<std::string, unsigned> &FUNameToBitsMap,
+                            std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                            raw_ostream &OS) {
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+  DEBUG(dbgs() << "collectAllComboFuncs");
+  DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n");
+
+  int numCombos = 0;
+  for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
+    Record *Func = ComboFuncList[i];
+    const std::string &ProcName = Func->getName();
+    std::vector<Record*> FUs = Func->getValueAsListOfDefs("CFD");
+
+    DEBUG(dbgs() << "    CFD:" << i
+                 << " (" << FUs.size() << " combo FUs) "
+                 << ProcName << "\n");
+
     // Convert macros to bits for each stage.
-    for (unsigned i = 0, N = FUs.size(); i < N; ++i)
-      NameToBitsMap[FUs[i]->getName()] = (unsigned) (1U << i);
+    for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
+      assert ((j < DFA_MAX_RESOURCES) &&
+                      "Exceeded maximum number of DFA resources");
+      Record *FuncData = FUs[j];
+      Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
+      const std::vector<Record*> &FuncList =
+                                   FuncData->getValueAsListOfDefs("FuncList");
+      std::string ComboFuncName = ComboFunc->getName();
+      unsigned ComboBit = FUNameToBitsMap[ComboFuncName];
+      unsigned ComboResources = ComboBit;
+      DEBUG(dbgs() << "      combo: " << ComboFuncName
+                   << ":0x" << utohexstr(ComboResources) << "\n");
+      for (unsigned k = 0, M = FuncList.size(); k < M; ++k) {
+        std::string FuncName = FuncList[k]->getName();
+        unsigned FuncResources = FUNameToBitsMap[FuncName];
+        DEBUG(dbgs() << "        " << FuncName
+                     << ":0x" << utohexstr(FuncResources) << "\n");
+        ComboResources |= FuncResources;
+      }
+      ComboBitToBitsMap[ComboBit] = ComboResources;
+      numCombos++;
+      DEBUG(dbgs() << "          => combo bits: " << ComboFuncName << ":0x"
+                   << utohexstr(ComboBit) << " = 0x"
+                   << utohexstr(ComboResources) << "\n");
+    }
   }
+  return numCombos;
+}
+
+
+//
+// collectOneInsnClass - Populate allInsnClasses with one instruction class
+//
+int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
+                        std::vector<Record*> &ProcItinList,
+                        std::map<std::string, unsigned> &FUNameToBitsMap,
+                        Record *ItinData,
+                        raw_ostream &OS) {
+  // Collect instruction classes.
+  Record *ItinDef = ItinData->getValueAsDef("TheClass");
 
   const std::vector<Record*> &StageList =
     ItinData->getValueAsListOfDefs("Stages");
 
   // The number of stages.
-  NStages = StageList.size();
+  unsigned NStages = StageList.size();
 
-  // For each unit.
-  unsigned UnitBitValue = 0;
+  DEBUG(dbgs() << "    " << ItinDef->getName()
+               << "\n");
+
+  std::vector<unsigned> UnitBits;
 
   // Compute the bitwise or of each unit used in this stage.
   for (unsigned i = 0; i < NStages; ++i) {
@@ -375,18 +716,72 @@ void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name,
     const std::vector<Record*> &UnitList =
       Stage->getValueAsListOfDefs("Units");
 
+    DEBUG(dbgs() << "        stage:" << i
+                 << " [" << UnitList.size() << " units]:");
+    unsigned dbglen = 26;  // cursor after stage dbgs
+
+    // Compute the bitwise or of each unit used in this stage.
+    unsigned UnitBitValue = 0;
     for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
       // Conduct bitwise or.
       std::string UnitName = UnitList[j]->getName();
-      assert(NameToBitsMap.count(UnitName));
-      UnitBitValue |= NameToBitsMap[UnitName];
+      DEBUG(dbgs() << " " << j << ":" << UnitName);
+      dbglen += 3 + UnitName.length();
+      assert(FUNameToBitsMap.count(UnitName));
+      UnitBitValue |= FUNameToBitsMap[UnitName];
     }
 
     if (UnitBitValue != 0)
-      allInsnClasses.insert(UnitBitValue);
+      UnitBits.push_back(UnitBitValue);
+
+    while (dbglen <= 64) {   // line up bits dbgs
+        dbglen += 8;
+        DEBUG(dbgs() << "\t");
+    }
+    DEBUG(dbgs() << " (bits: 0x" << utohexstr(UnitBitValue) << ")\n");
   }
+
+  if (UnitBits.size() > 0)
+    allInsnClasses.push_back(UnitBits);
+
+  DEBUG(dbgs() << "        ");
+  dbgsInsnClass(UnitBits);
+  DEBUG(dbgs() << "\n");
+
+  return NStages;
 }
 
+//
+// collectAllInsnClasses - Populate allInsnClasses which is a set of units
+// used in each stage.
+//
+int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
+                            std::vector<Record*> &ProcItinList,
+                            std::map<std::string, unsigned> &FUNameToBitsMap,
+                            std::vector<Record*> &ItinDataList,
+                            int &maxStages,
+                            raw_ostream &OS) {
+  // Collect all instruction classes.
+  unsigned M = ItinDataList.size();
+
+  int numInsnClasses = 0;
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n"
+               << "collectAllInsnClasses "
+               << ProcName
+               << " (" << M << " classes)\n");
+
+  // Collect stages for each instruction class for all itinerary data
+  for (unsigned j = 0; j < M; j++) {
+    Record *ItinData = ItinDataList[j];
+    int NStages = collectOneInsnClass(ProcName, ProcItinList,
+                                      FUNameToBitsMap, ItinData, OS);
+    if (NStages > maxStages) {
+      maxStages = NStages;
+    }
+    numInsnClasses++;
+  }
+  return numInsnClasses;
+}
 
 //
 // Run the worklist algorithm to generate the DFA.
@@ -398,16 +793,35 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
     Records.getAllDerivedDefinitions("ProcessorItineraries");
 
   //
-  // Collect the instruction classes.
+  // Collect the Functional units.
   //
+  std::map<std::string, unsigned> FUNameToBitsMap;
+  int maxResources = 0;
+  collectAllFuncUnits(ProcItinList,
+                              FUNameToBitsMap, maxResources, OS);
+
+  //
+  // Collect the Combo Functional units.
+  //
+  std::map<unsigned, unsigned> ComboBitToBitsMap;
+  std::vector<Record*> ComboFuncList =
+    Records.getAllDerivedDefinitions("ComboFuncUnits");
+  int numCombos = collectAllComboFuncs(ComboFuncList,
+                              FUNameToBitsMap, ComboBitToBitsMap, OS);
+
+  //
+  // Collect the itineraries.
+  //
+  int maxStages = 0;
+  int numInsnClasses = 0;
   for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
     Record *Proc = ProcItinList[i];
 
     // Get processor itinerary name.
-    const std::string &Name = Proc->getName();
+    const std::string &ProcName = Proc->getName();
 
     // Skip default.
-    if (Name == "NoItineraries")
+    if (ProcName == "NoItineraries")
       continue;
 
     // Sanity check for at least one instruction itinerary class.
@@ -419,15 +833,11 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
     // Get itinerary data list.
     std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
 
-    // Collect instruction classes for all itinerary data.
-    for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) {
-      Record *ItinData = ItinDataList[j];
-      unsigned NStages;
-      collectAllInsnClasses(Name, ItinData, NStages, OS);
-    }
+    // Collect all instruction classes
+    numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList,
+                          FUNameToBitsMap, ItinDataList, maxStages, OS);
   }
 
-
   //
   // Run a worklist algorithm to generate the DFA.
   //
@@ -436,6 +846,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
   Initial->isInitial = true;
   Initial->stateInfo.insert(0x0);
   SmallVector<const State*, 32> WorkList;
+//  std::queue<State*> WorkList;
   std::map<std::set<unsigned>, const State*> Visited;
 
   WorkList.push_back(Initial);
@@ -459,9 +870,15 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
   //
   while (!WorkList.empty()) {
     const State *current = WorkList.pop_back_val();
-    for (DenseSet<unsigned>::iterator CI = allInsnClasses.begin(),
-           CE = allInsnClasses.end(); CI != CE; ++CI) {
-      unsigned InsnClass = *CI;
+    DEBUG(dbgs() << "---------------------\n");
+    DEBUG(dbgs() << "Processing state: " << current->stateNum << " - ");
+    dbgsStateInfo(current->stateInfo);
+    DEBUG(dbgs() << "\n");
+    for (unsigned i = 0; i < allInsnClasses.size(); i++) {
+      std::vector<unsigned> InsnClass = allInsnClasses[i];
+      DEBUG(dbgs() << i << " ");
+      dbgsInsnClass(InsnClass);
+      DEBUG(dbgs() << "\n");
 
       std::set<unsigned> NewStateResources;
       //
@@ -469,32 +886,47 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
       // and the state can accommodate this InsnClass, create a transition.
       //
       if (!current->hasTransition(InsnClass) &&
-          current->canAddInsnClass(InsnClass)) {
-        const State *NewState;
-        current->AddInsnClass(InsnClass, NewStateResources);
-        assert(!NewStateResources.empty() && "New states must be generated");
+          current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) {
+        const State *NewState = NULL;
+        current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources);
+        if (NewStateResources.size() == 0) {
+          DEBUG(dbgs() << "  Skipped - no new states generated\n");
+          continue;
+        }
+
+        DEBUG(dbgs() << "\t");
+        dbgsStateInfo(NewStateResources);
+        DEBUG(dbgs() << "\n");
 
         //
         // If we have seen this state before, then do not create a new state.
         //
-        //
         auto VI = Visited.find(NewStateResources);
-        if (VI != Visited.end())
+        if (VI != Visited.end()) {
           NewState = VI->second;
-        else {
+          DEBUG(dbgs() << "\tFound existing state: "
+                       << NewState->stateNum << " - ");
+          dbgsStateInfo(NewState->stateInfo);
+          DEBUG(dbgs() << "\n");
+        } else {
           NewState = &D.newState();
           NewState->stateInfo = NewStateResources;
           Visited[NewStateResources] = NewState;
           WorkList.push_back(NewState);
+          DEBUG(dbgs() << "\tAccepted new state: "
+                       << NewState->stateNum << " - ");
+          dbgsStateInfo(NewState->stateInfo);
+          DEBUG(dbgs() << "\n");
         }
-        
+
         current->addTransition(InsnClass, NewState);
       }
     }
   }
 
   // Print out the table.
-  D.writeTableAndAPI(OS, TargetName);
+  D.writeTableAndAPI(OS, TargetName,
+               numInsnClasses, maxResources, numCombos, maxStages);
 }
 
 namespace llvm {