Hexagon V60/HVX DFA scheduler support
authorKrzysztof Parzyszek <kparzysz@codeaurora.org>
Sat, 21 Nov 2015 20:00:45 +0000 (20:00 +0000)
committerKrzysztof Parzyszek <kparzysz@codeaurora.org>
Sat, 21 Nov 2015 20:00:45 +0000 (20:00 +0000)
Extended DFA tablegen to:
  - added "-debug-only dfa-emitter" support to llvm-tblgen

  - defined CVI_PIPE* resources for the V60 vector coprocessor

  - allow specification of multiple required resources
    - supports ANDs of ORs
    - e.g. [SLOT2, SLOT3], [CVI_MPY0, CVI_MPY1] means:
           (SLOT2 OR SLOT3) AND (CVI_MPY0 OR CVI_MPY1)

  - added support for combo resources
    - allows specifying ORs of ANDs
    - e.g. [CVI_XLSHF, CVI_MPY01] means:
           (CVI_XLANE AND CVI_SHIFT) OR (CVI_MPY0 AND CVI_MPY1)

  - increased DFA input size from 32-bit to 64-bit
    - allows for a maximum of 4 AND'ed terms of 16 resources

  - supported expressions now include:

    expression     => term [AND term] [AND term] [AND term]
    term           => resource [OR resource]*
    resource       => one_resource | combo_resource
    combo_resource => (one_resource [AND one_resource]*)

Author: Dan Palermo <dpalermo@codeaurora.org>

kparzysz: Verified AMDGPU codegen to be unchanged on all llc
tests, except those dealing with instruction encodings.

Reapply the previous patch, this time without circular dependencies.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253793 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/CodeGen/DFAPacketizer.h
include/llvm/CodeGen/DFAPacketizerDefs.h [new file with mode: 0644]
include/llvm/Target/TargetItinerary.td
lib/CodeGen/DFAPacketizer.cpp
lib/Target/Hexagon/HexagonScheduleV60.td
utils/TableGen/DFAPacketizerEmitter.cpp

index 791b66e..69ea74d 100644 (file)
@@ -27,6 +27,7 @@
 #define LLVM_CODEGEN_DFAPACKETIZER_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/DFAPacketizerDefs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include <map>
 
@@ -42,20 +43,21 @@ class SUnit;
 
 class DFAPacketizer {
 private:
-  typedef std::pair<unsigned, unsigned> UnsignPair;
+  typedef std::pair<unsigned, DFAInput> UnsignPair;
+
   const InstrItineraryData *InstrItins;
   int CurrentState;
-  const int (*DFAStateInputTable)[2];
+  const DFAStateInput (*DFAStateInputTable)[2];
   const unsigned *DFAStateEntryTable;
 
   // CachedTable is a map from <FromState, Input> to ToState.
   DenseMap<UnsignPair, unsigned> CachedTable;
 
   // ReadTable - Read the DFA transition table and update CachedTable.
-  void ReadTable(unsigned int state);
+  void ReadTable(unsigned state);
 
 public:
-  DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+  DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2],
                 const unsigned *SET);
 
   // Reset the current state to make all resources available.
@@ -63,6 +65,14 @@ public:
     CurrentState = 0;
   }
 
+  // getInsnInput - Return the DFAInput for an instruction class.
+  DFAInput getInsnInput(unsigned InsnClass);
+
+  // getInsnInput - Return the DFAInput for an instruction class input vector.
+  static DFAInput getInsnInput(const std::vector<unsigned> &InsnClass) {
+    return getDFAInsnInput(InsnClass);
+  }
+
   // canReserveResources - Check if the resources occupied by a MCInstrDesc
   // are available in the current state.
   bool canReserveResources(const llvm::MCInstrDesc *MID);
diff --git a/include/llvm/CodeGen/DFAPacketizerDefs.h b/include/llvm/CodeGen/DFAPacketizerDefs.h
new file mode 100644 (file)
index 0000000..483a5be
--- /dev/null
@@ -0,0 +1,63 @@
+//=- llvm/CodeGen/DFAPacketizerDefs.h - DFA Packetizer for VLIW ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Common definitions used by TableGen and the DFAPacketizer in CodeGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_DFAPACKETIZERDEFS_H
+#define LLVM_CODEGEN_DFAPACKETIZERDEFS_H
+
+#include <vector>
+
+namespace llvm {
+
+// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
+// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
+//
+// e.g. terms x resource bit combinations that fit in uint32_t:
+//      4 terms x 8  bits = 32 bits
+//      3 terms x 10 bits = 30 bits
+//      2 terms x 16 bits = 32 bits
+//
+// e.g. terms x resource bit combinations that fit in uint64_t:
+//      8 terms x 8  bits = 64 bits
+//      7 terms x 9  bits = 63 bits
+//      6 terms x 10 bits = 60 bits
+//      5 terms x 12 bits = 60 bits
+//      4 terms x 16 bits = 64 bits <--- current
+//      3 terms x 21 bits = 63 bits
+//      2 terms x 32 bits = 64 bits
+//
+#define DFA_MAX_RESTERMS        4   // The max # of AND'ed resource terms.
+#define DFA_MAX_RESOURCES       16  // The max # of resource bits in one term.
+
+typedef uint64_t                DFAInput;
+typedef int64_t                 DFAStateInput;
+#define DFA_TBLTYPE             "int64_t" // For generating DFAStateInputTable.
+
+namespace {
+  DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
+    return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
+  }
+
+  /// Return the DFAInput for an instruction class input vector.
+  /// This function is used in both DFAPacketizer.cpp and in
+  /// DFAPacketizerEmitter.cpp.
+  DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
+    DFAInput InsnInput = 0;
+    assert ((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+            "Exceeded maximum number of DFA terms");
+    for (auto U : InsnClass)
+      InsnInput = addDFAFuncUnits(InsnInput, U);
+    return InsnInput;
+  }
+}
+
+}
+
+#endif
index cc74006..a37bbf2 100644 (file)
@@ -134,3 +134,19 @@ class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp,
 // info. Subtargets using NoItineraries can bypass the scheduler's
 // expensive HazardRecognizer because no reservation table is needed.
 def NoItineraries : ProcessorItineraries<[], [], []>;
+
+//===----------------------------------------------------------------------===//
+// Combo Function Unit data - This is a map of combo function unit names to
+// the list of functional units that are included in the combination.
+//
+class ComboFuncData<FuncUnit ComboFunc, list<FuncUnit> funclist> {
+  FuncUnit TheComboFunc = ComboFunc;
+  list<FuncUnit> FuncList = funclist;
+}
+
+//===----------------------------------------------------------------------===//
+// Combo Function Units - This is a list of all combo function unit data.
+class ComboFuncUnits<list<ComboFuncData> cfd> {
+  list<ComboFuncData> CFD = cfd;
+}
+
index ee50f97..64b7f48 100644 (file)
 #include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
-DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
+                             const DFAStateInput (*SIT)[2],
                              const unsigned *SET):
   InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
-  DFAStateEntryTable(SET) {}
+  DFAStateEntryTable(SET) {
+  // Make sure DFA types are large enough for the number of terms & resources.
+  assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput))
+        && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
+  assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput))
+        && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
+}
 
 
 //
@@ -60,26 +67,37 @@ void DFAPacketizer::ReadTable(unsigned int state) {
       DFAStateInputTable[i][1];
 }
 
+//
+// getInsnInput - Return the DFAInput for an instruction class.
+//
+DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
+  // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
+  DFAInput InsnInput = 0;
+  unsigned i = 0;
+  for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
+        *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) {
+    InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
+    assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
+  }
+  return InsnInput;
+}
 
 // canReserveResources - Check if the resources occupied by a MCInstrDesc
 // are available in the current state.
 bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
   unsigned InsnClass = MID->getSchedClass();
-  const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
-  unsigned FuncUnits = IS->getUnits();
-  UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+  DFAInput InsnInput = getInsnInput(InsnClass);
+  UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
   ReadTable(CurrentState);
   return (CachedTable.count(StateTrans) != 0);
 }
 
-
 // reserveResources - Reserve the resources occupied by a MCInstrDesc and
 // change the current state to reflect that change.
 void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
   unsigned InsnClass = MID->getSchedClass();
-  const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
-  unsigned FuncUnits = IS->getUnits();
-  UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+  DFAInput InsnInput = getInsnInput(InsnClass);
+  UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
   ReadTable(CurrentState);
   assert(CachedTable.count(StateTrans) != 0);
   CurrentState = CachedTable[StateTrans];
index 7cda4a7..2ccff82 100644 (file)
@@ -20,6 +20,15 @@ def CVI_XLSHF  : FuncUnit;
 def CVI_MPY01  : FuncUnit;
 def CVI_ALL    : FuncUnit;
 
+// Combined functional unit data.
+def HexagonComboFuncsV60 :
+    ComboFuncUnits<[
+      ComboFuncData<CVI_XLSHF    , [CVI_XLANE, CVI_SHIFT]>,
+      ComboFuncData<CVI_MPY01    , [CVI_MPY0, CVI_MPY1]>,
+      ComboFuncData<CVI_ALL      , [CVI_ST, CVI_XLANE, CVI_SHIFT,
+                                    CVI_MPY0, CVI_MPY1, CVI_LD]>
+    ]>;
+
 // Note: When adding additional vector scheduling classes, add the
 // corresponding methods to the class HexagonInstrInfo.
 def CVI_VA           : InstrItinClass;
index 5060b6e..fc70e97 100644 (file)
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "dfa-emitter"
+
 #include "CodeGenTarget.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/DFAPacketizerDefs.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
+#include "llvm/Support/Debug.h"
 #include <list>
 #include <map>
 #include <string>
+#include <queue>
 using namespace llvm;
 
+// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
+//
+// dbgsInsnClass - When debugging, print instruction class stages.
+//
+void dbgsInsnClass(const std::vector<unsigned> &InsnClass);
+//
+// dbgsStateInfo - When debugging, print the set of state info.
+//
+void dbgsStateInfo(const std::set<unsigned> &stateInfo);
+//
+// dbgsIndent - When debugging, indent by the specified amount.
+//
+void dbgsIndent(unsigned indent);
+
 //
 // class DFAPacketizerEmitter: class that generates and prints out the DFA
 // for resource tracking.
@@ -37,20 +57,48 @@ private:
   // allInsnClasses is the set of all possible resources consumed by an
   // InstrStage.
   //
-  DenseSet<unsigned> allInsnClasses;
+  std::vector<std::vector<unsigned>> allInsnClasses;
   RecordKeeper &Records;
 
 public:
   DFAPacketizerEmitter(RecordKeeper &R);
 
   //
-  // collectAllInsnClasses: Populate allInsnClasses which is a set of units
+  // collectAllFuncUnits - Construct a map of function unit names to bits.
+  //
+  int collectAllFuncUnits(std::vector<Record*> &ProcItinList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           int &maxResources,
+                           raw_ostream &OS);
+
+  //
+  // collectAllComboFuncs - Construct a map from a combo function unit bit to
+  //                        the bits of all included functional units.
+  //
+  int collectAllComboFuncs(std::vector<Record*> &ComboFuncList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                           raw_ostream &OS);
+
+  //
+  // collectOneInsnClass - Populate allInsnClasses with one instruction class.
+  //
+  int collectOneInsnClass(const std::string &ProcName,
+                           std::vector<Record*> &ProcItinList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           Record *ItinData,
+                           raw_ostream &OS);
+
+  //
+  // collectAllInsnClasses - Populate allInsnClasses which is a set of units
   // used in each stage.
   //
-  void collectAllInsnClasses(const std::string &Name,
-                             Record *ItinData,
-                             unsigned &NStages,
-                             raw_ostream &OS);
+  int collectAllInsnClasses(const std::string &ProcName,
+                           std::vector<Record*> &ProcItinList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           std::vector<Record*> &ItinDataList,
+                           int &maxStages,
+                           raw_ostream &OS);
 
   void run(raw_ostream &OS);
 };
@@ -87,7 +135,7 @@ class State {
   const int stateNum;
   mutable bool isInitial;
   mutable std::set<unsigned> stateInfo;
-  typedef std::map<unsigned, const State *> TransitionMap;
+  typedef std::map<std::vector<unsigned>, const State *> TransitionMap;
   mutable TransitionMap Transitions;
 
   State();
@@ -97,28 +145,47 @@ class State {
   }
 
   //
-  // canAddInsnClass - Returns true if an instruction of type InsnClass is a
-  // valid transition from this state, i.e., can an instruction of type InsnClass
-  // be added to the packet represented by this state.
+  // canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
+  // may be a valid transition from this state i.e., can an instruction of type
+  // InsnClass be added to the packet represented by this state.
+  //
+  // Note that for multiple stages, this quick check does not take into account
+  // any possible resource competition between the stages themselves.  That is
+  // enforced in AddInsnClassStages which checks the cross product of all
+  // stages for resource availability (which is a more involved check).
+  //
+  bool canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap) const;
+  //
+  // AddInsnClass - Return all combinations of resource reservation
+  // which are possible from this state (PossibleStates).
   //
   // PossibleStates is the set of valid resource states that ensue from valid
   // transitions.
   //
-  bool canAddInsnClass(unsigned InsnClass) const;
+  void AddInsnClass(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        std::set<unsigned> &PossibleStates) const;
   //
-  // AddInsnClass - Return all combinations of resource reservation
+  // AddInsnClassStages - Return all combinations of resource reservation
+  // resulting from the cross product of all stages for this InsnClass
   // which are possible from this state (PossibleStates).
   //
-  void AddInsnClass(unsigned InsnClass, std::set<unsigned> &PossibleStates) const;
-  // 
+  void AddInsnClassStages(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        unsigned chkstage, unsigned numstages,
+                        unsigned prevState, unsigned origState,
+                        DenseSet<unsigned> &VisitedResourceStates,
+                        std::set<unsigned> &PossibleStates) const;
+  //
   // addTransition - Add a transition from this state given the input InsnClass
   //
-  void addTransition(unsigned InsnClass, const State *To) const;
+  void addTransition(std::vector<unsigned> InsnClass, const State *To) const;
   //
   // hasTransition - Returns true if there is a transition from this state
   // given the input InsnClass
   //
-  bool hasTransition(unsigned InsnClass) const;
+  bool hasTransition(std::vector<unsigned> InsnClass) const;
 };
 } // End anonymous namespace.
 
@@ -144,10 +211,52 @@ public:
   //
   // writeTable: Print out a table representing the DFA.
   //
-  void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName);
+  void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName,
+                 int numInsnClasses = 0,
+                 int maxResources = 0, int numCombos = 0, int maxStages = 0);
 };
 } // End anonymous namespace.
 
+// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
+//
+// dbgsInsnClass - When debugging, print instruction class stages.
+//
+void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
+  DEBUG(dbgs() << "InsnClass: ");
+  for (unsigned i = 0; i < InsnClass.size(); ++i) {
+    if (i > 0) {
+      DEBUG(dbgs() << ", ");
+    }
+    DEBUG(dbgs() << "0x" << utohexstr(InsnClass[i]));
+  }
+  DFAInput InsnInput = getDFAInsnInput(InsnClass);
+  DEBUG(dbgs() << " (input: 0x" << utohexstr(InsnInput) << ")");
+}
+
+//
+// dbgsStateInfo - When debugging, print the set of state info.
+//
+void dbgsStateInfo(const std::set<unsigned> &stateInfo) {
+  DEBUG(dbgs() << "StateInfo: ");
+  unsigned i = 0;
+  for (std::set<unsigned>::iterator SI = stateInfo.begin();
+       SI != stateInfo.end(); ++SI, ++i) {
+    unsigned thisState = *SI;
+    if (i > 0) {
+      DEBUG(dbgs() << ", ");
+    }
+    DEBUG(dbgs() << "0x" << utohexstr(thisState));
+  }
+}
+
+//
+// dbgsIndent - When debugging, indent by the specified amount.
+//
+void dbgsIndent(unsigned indent) {
+  for (unsigned i = 0; i < indent; ++i) {
+    DEBUG(dbgs() << " ");
+  }
+}
 
 //
 // Constructors and destructors for State and DFA
@@ -157,10 +266,11 @@ State::State() :
 
 DFA::DFA(): currentState(nullptr) {}
 
-// 
+//
 // addTransition - Add a transition from this state given the input InsnClass
 //
-void State::addTransition(unsigned InsnClass, const State *To) const {
+void State::addTransition(std::vector<unsigned> InsnClass, const State *To)
+      const {
   assert(!Transitions.count(InsnClass) &&
       "Cannot have multiple transitions for the same input");
   Transitions[InsnClass] = To;
@@ -170,7 +280,7 @@ void State::addTransition(unsigned InsnClass, const State *To) const {
 // hasTransition - Returns true if there is a transition from this state
 // given the input InsnClass
 //
-bool State::hasTransition(unsigned InsnClass) const {
+bool State::hasTransition(std::vector<unsigned> InsnClass) const {
   return Transitions.count(InsnClass) > 0;
 }
 
@@ -178,61 +288,167 @@ bool State::hasTransition(unsigned InsnClass) const {
 // AddInsnClass - Return all combinations of resource reservation
 // which are possible from this state (PossibleStates).
 //
-void State::AddInsnClass(unsigned InsnClass,
-                            std::set<unsigned> &PossibleStates) const {
+// PossibleStates is the set of valid resource states that ensue from valid
+// transitions.
+//
+void State::AddInsnClass(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        std::set<unsigned> &PossibleStates) const {
   //
   // Iterate over all resource states in currentState.
   //
+  unsigned numstages = InsnClass.size();
+  assert((numstages > 0) && "InsnClass has no stages");
 
   for (std::set<unsigned>::iterator SI = stateInfo.begin();
        SI != stateInfo.end(); ++SI) {
     unsigned thisState = *SI;
 
-    //
-    // Iterate over all possible resources used in InsnClass.
-    // For ex: for InsnClass = 0x11, all resources = {0x01, 0x10}.
-    //
-
     DenseSet<unsigned> VisitedResourceStates;
-    for (unsigned int j = 0; j < sizeof(InsnClass) * 8; ++j) {
-      if ((0x1 << j) & InsnClass) {
-        //
-        // For each possible resource used in InsnClass, generate the
-        // resource state if that resource was used.
-        //
-        unsigned ResultingResourceState = thisState | (0x1 << j);
+
+    DEBUG(dbgs() << "  thisState: 0x" << utohexstr(thisState) << "\n");
+    AddInsnClassStages(InsnClass, ComboBitToBitsMap,
+                                numstages - 1, numstages,
+                                thisState, thisState,
+                                VisitedResourceStates, PossibleStates);
+  }
+}
+
+void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        unsigned chkstage, unsigned numstages,
+                        unsigned prevState, unsigned origState,
+                        DenseSet<unsigned> &VisitedResourceStates,
+                        std::set<unsigned> &PossibleStates) const {
+
+  assert((chkstage < numstages) && "AddInsnClassStages: stage out of range");
+  unsigned thisStage = InsnClass[chkstage];
+
+  dbgsIndent((1 + numstages - chkstage) << 1);
+  DEBUG(dbgs() << "AddInsnClassStages " << chkstage
+               << " (0x" << utohexstr(thisStage) << ") from ");
+  dbgsInsnClass(InsnClass);
+  DEBUG(dbgs() << "\n");
+
+  //
+  // Iterate over all possible resources used in thisStage.
+  // For ex: for thisStage = 0x11, all resources = {0x01, 0x10}.
+  //
+  for (unsigned int j = 0; j < DFA_MAX_RESOURCES; ++j) {
+    unsigned resourceMask = (0x1 << j);
+    if (resourceMask & thisStage) {
+      unsigned combo = ComboBitToBitsMap[resourceMask];
+      if (combo && ((~prevState & combo) != combo)) {
+        DEBUG(dbgs() << "\tSkipped Add 0x" << utohexstr(prevState)
+                     << " - combo op 0x" << utohexstr(resourceMask)
+                     << " (0x" << utohexstr(combo) <<") cannot be scheduled\n");
+        continue;
+      }
+      //
+      // For each possible resource used in thisStage, generate the
+      // resource state if that resource was used.
+      //
+      unsigned ResultingResourceState = prevState | resourceMask | combo;
+      dbgsIndent((2 + numstages - chkstage) << 1);
+      DEBUG(dbgs() << "0x" << utohexstr(prevState)
+                   << " | 0x" << utohexstr(resourceMask));
+      if (combo) {
+        DEBUG(dbgs() << " | 0x" << utohexstr(combo));
+      }
+      DEBUG(dbgs() << " = 0x" << utohexstr(ResultingResourceState) << " ");
+
+      //
+      // If this is the final stage for this class
+      //
+      if (chkstage == 0) {
         //
         // Check if the resulting resource state can be accommodated in this
         // packet.
-        // We compute ResultingResourceState OR thisState.
-        // If the result of the OR is different than thisState, it implies
+        // We compute resource OR prevState (originally started as origState).
+        // If the result of the OR is different than origState, it implies
         // that there is at least one resource that can be used to schedule
-        // InsnClass in the current packet.
+        // thisStage in the current packet.
         // Insert ResultingResourceState into PossibleStates only if we haven't
         // processed ResultingResourceState before.
         //
-        if ((ResultingResourceState != thisState) &&
-            (VisitedResourceStates.count(ResultingResourceState) == 0)) {
-          VisitedResourceStates.insert(ResultingResourceState);
-          PossibleStates.insert(ResultingResourceState);
+        if (ResultingResourceState != prevState) {
+          if (VisitedResourceStates.count(ResultingResourceState) == 0) {
+            VisitedResourceStates.insert(ResultingResourceState);
+            PossibleStates.insert(ResultingResourceState);
+            DEBUG(dbgs() << "\tResultingResourceState: 0x"
+                         << utohexstr(ResultingResourceState) << "\n");
+          } else {
+            DEBUG(dbgs() << "\tSkipped Add - state already seen\n");
+          }
+        } else {
+          DEBUG(dbgs() << "\tSkipped Add - no final resources available\n");
+        }
+      } else {
+        //
+        // If the current resource can be accommodated, check the next
+        // stage in InsnClass for available resources.
+        //
+        if (ResultingResourceState != prevState) {
+          DEBUG(dbgs() << "\n");
+          AddInsnClassStages(InsnClass, ComboBitToBitsMap,
+                                chkstage - 1, numstages,
+                                ResultingResourceState, origState,
+                                VisitedResourceStates, PossibleStates);
+        } else {
+          DEBUG(dbgs() << "\tSkipped Add - no resources available\n");
         }
       }
     }
   }
-
 }
 
 
 //
-// canAddInsnClass - Quickly verifies if an instruction of type InsnClass is a
-// valid transition from this state i.e., can an instruction of type InsnClass
-// be added to the packet represented by this state.
+// canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
+// may be a valid transition from this state i.e., can an instruction of type
+// InsnClass be added to the packet represented by this state.
 //
-bool State::canAddInsnClass(unsigned InsnClass) const {
+// Note that this routine is performing conservative checks that can be
+// quickly executed acting as a filter before calling AddInsnClassStages.
+// Any cases allowed through here will be caught later in AddInsnClassStages
+// which performs the more expensive exact check.
+//
+bool State::canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
+                    std::map<unsigned, unsigned> &ComboBitToBitsMap) const {
   for (std::set<unsigned>::const_iterator SI = stateInfo.begin();
        SI != stateInfo.end(); ++SI) {
-    if (~*SI & InsnClass)
+
+    // Check to see if all required resources are available.
+    bool available = true;
+
+    // Inspect each stage independently.
+    // note: This is a conservative check as we aren't checking for
+    //       possible resource competition between the stages themselves
+    //       The full cross product is examined later in AddInsnClass.
+    for (unsigned i = 0; i < InsnClass.size(); ++i) {
+      unsigned resources = *SI;
+      if ((~resources & InsnClass[i]) == 0) {
+        available = false;
+        break;
+      }
+      // Make sure _all_ resources for a combo function are available.
+      // note: This is a quick conservative check as it won't catch an
+      //       unscheduleable combo if this stage is an OR expression
+      //       containing a combo.
+      //       These cases are caught later in AddInsnClass.
+      unsigned combo = ComboBitToBitsMap[InsnClass[i]];
+      if (combo && ((~resources & combo) != combo)) {
+        DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" << utohexstr(resources)
+                     << " - combo op 0x" << utohexstr(InsnClass[i])
+                     << " (0x" << utohexstr(combo) <<") cannot be scheduled\n");
+        available = false;
+        break;
+      }
+    }
+
+    if (available) {
       return true;
+    }
   }
   return false;
 }
@@ -244,7 +460,6 @@ const State &DFA::newState() {
   return *IterPair.first;
 }
 
-
 int State::currentStateNum = 0;
 
 DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
@@ -263,57 +478,100 @@ DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
 //                         the ith state.
 //
 //
-void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
-  static const std::string SentinelEntry = "{-1, -1}";
-  DFA::StateSet::iterator SI = states.begin();
+void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
+                           int numInsnClasses,
+                           int maxResources, int numCombos, int maxStages) {
+
+  unsigned numStates = states.size();
+
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+  DEBUG(dbgs() << "writeTableAndAPI\n");
+  DEBUG(dbgs() << "Total states: " << numStates << "\n");
+
+  OS << "namespace llvm {\n";
+
+  OS << "\n// Input format:\n";
+  OS << "#define DFA_MAX_RESTERMS        " << DFA_MAX_RESTERMS
+     << "\t// maximum AND'ed resource terms\n";
+  OS << "#define DFA_MAX_RESOURCES       " << DFA_MAX_RESOURCES
+     << "\t// maximum resource bits in one term\n";
+
+  OS << "\n// " << TargetName << "DFAStateInputTable[][2] = "
+     << "pairs of <Input, NextState> for all valid\n";
+  OS << "//                           transitions.\n";
+  OS << "// " << numStates << "\tstates\n";
+  OS << "// " << numInsnClasses << "\tinstruction classes\n";
+  OS << "// " << maxResources << "\tresources max\n";
+  OS << "// " << numCombos << "\tcombo resources\n";
+  OS << "// " << maxStages << "\tstages max\n";
+  OS << "const " << DFA_TBLTYPE << " "
+     << TargetName << "DFAStateInputTable[][2] = {\n";
+
   // This table provides a map to the beginning of the transitions for State s
   // in DFAStateInputTable.
-  std::vector<int> StateEntry(states.size());
-
-  OS << "namespace llvm {\n\n";
-  OS << "const int " << TargetName << "DFAStateInputTable[][2] = {\n";
+  std::vector<int> StateEntry(numStates+1);
+  static const std::string SentinelEntry = "{-1, -1}";
 
   // Tracks the total valid transitions encountered so far. It is used
   // to construct the StateEntry table.
   int ValidTransitions = 0;
-  for (unsigned i = 0; i < states.size(); ++i, ++SI) {
+  DFA::StateSet::iterator SI = states.begin();
+  for (unsigned i = 0; i < numStates; ++i, ++SI) {
     assert ((SI->stateNum == (int) i) && "Mismatch in state numbers");
     StateEntry[i] = ValidTransitions;
     for (State::TransitionMap::iterator
         II = SI->Transitions.begin(), IE = SI->Transitions.end();
         II != IE; ++II) {
-      OS << "{" << II->first << ", "
+      OS << "{0x" << utohexstr(getDFAInsnInput(II->first)) << ", "
          << II->second->stateNum
-         << "},    ";
+         << "},\t";
     }
     ValidTransitions += SI->Transitions.size();
 
     // If there are no valid transitions from this stage, we need a sentinel
     // transition.
     if (ValidTransitions == StateEntry[i]) {
-      OS << SentinelEntry << ",";
+      OS << SentinelEntry << ",\t";
       ++ValidTransitions;
     }
 
+    OS << " // state " << i << ": " << StateEntry[i];
+    if (StateEntry[i] != (ValidTransitions-1)) {   // More than one transition.
+       OS << "-" << (ValidTransitions-1);
+    }
     OS << "\n";
   }
 
   // Print out a sentinel entry at the end of the StateInputTable. This is
   // needed to iterate over StateInputTable in DFAPacketizer::ReadTable()
-  OS << SentinelEntry << "\n";
-  
+  OS << SentinelEntry << "\t";
+  OS << " // state " << numStates << ": " << ValidTransitions;
+  OS << "\n";
+
   OS << "};\n\n";
+  OS << "// " << TargetName << "DFAStateEntryTable[i] = "
+     << "Index of the first entry in DFAStateInputTable for\n";
+  OS << "//                         "
+     << "the ith state.\n";
+  OS << "// " << numStates << " states\n";
   OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
 
   // Multiply i by 2 since each entry in DFAStateInputTable is a set of
   // two numbers.
-  for (unsigned i = 0; i < states.size(); ++i)
+  unsigned lastState = 0;
+  for (unsigned i = 0; i < numStates; ++i) {
+    if (i && ((i % 10) == 0)) {
+        lastState = i-1;
+        OS << "   // states " << (i-10) << ":" << lastState << "\n";
+    }
     OS << StateEntry[i] << ", ";
+  }
 
   // Print out the index to the sentinel entry in StateInputTable
   OS << ValidTransitions << ", ";
+  OS << "   // states " << (lastState+1) << ":" << numStates << "\n";
 
-  OS << "\n};\n";
+  OS << "};\n";
   OS << "} // namespace\n";
 
 
@@ -332,40 +590,123 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
 
 
 //
-// collectAllInsnClasses - Populate allInsnClasses which is a set of units
-// used in each stage.
+// collectAllFuncUnits - Construct a map of function unit names to bits.
 //
-void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name,
-                                  Record *ItinData,
-                                  unsigned &NStages,
-                                  raw_ostream &OS) {
-  // Collect processor itineraries.
-  std::vector<Record*> ProcItinList =
-    Records.getAllDerivedDefinitions("ProcessorItineraries");
-
-  // If just no itinerary then don't bother.
-  if (ProcItinList.size() < 2)
-    return;
-  std::map<std::string, unsigned> NameToBitsMap;
+int DFAPacketizerEmitter::collectAllFuncUnits(
+                            std::vector<Record*> &ProcItinList,
+                            std::map<std::string, unsigned> &FUNameToBitsMap,
+                            int &maxFUs,
+                            raw_ostream &OS) {
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+  DEBUG(dbgs() << "collectAllFuncUnits");
+  DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
 
+  int totalFUs = 0;
   // Parse functional units for all the itineraries.
   for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
     Record *Proc = ProcItinList[i];
+    const std::string &ProcName = Proc->getName();
     std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
 
+    DEBUG(dbgs() << "    FU:" << i
+                 << " (" << FUs.size() << " FUs) "
+                 << ProcName);
+
+
+    // Convert macros to bits for each stage.
+    unsigned numFUs = FUs.size();
+    for (unsigned j = 0; j < numFUs; ++j) {
+      assert ((j < DFA_MAX_RESOURCES) &&
+                      "Exceeded maximum number of representable resources");
+      unsigned FuncResources = (unsigned) (1U << j);
+      FUNameToBitsMap[FUs[j]->getName()] = FuncResources;
+      DEBUG(dbgs() << " " << FUs[j]->getName()
+                   << ":0x" << utohexstr(FuncResources));
+    }
+    if (((int) numFUs) > maxFUs) {
+      maxFUs = numFUs;
+    }
+    totalFUs += numFUs;
+    DEBUG(dbgs() << "\n");
+  }
+  return totalFUs;
+}
+
+//
+// collectAllComboFuncs - Construct a map from a combo function unit bit to
+//                        the bits of all included functional units.
+//
+int DFAPacketizerEmitter::collectAllComboFuncs(
+                            std::vector<Record*> &ComboFuncList,
+                            std::map<std::string, unsigned> &FUNameToBitsMap,
+                            std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                            raw_ostream &OS) {
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+  DEBUG(dbgs() << "collectAllComboFuncs");
+  DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n");
+
+  int numCombos = 0;
+  for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
+    Record *Func = ComboFuncList[i];
+    const std::string &ProcName = Func->getName();
+    std::vector<Record*> FUs = Func->getValueAsListOfDefs("CFD");
+
+    DEBUG(dbgs() << "    CFD:" << i
+                 << " (" << FUs.size() << " combo FUs) "
+                 << ProcName << "\n");
+
     // Convert macros to bits for each stage.
-    for (unsigned i = 0, N = FUs.size(); i < N; ++i)
-      NameToBitsMap[FUs[i]->getName()] = (unsigned) (1U << i);
+    for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
+      assert ((j < DFA_MAX_RESOURCES) &&
+                      "Exceeded maximum number of DFA resources");
+      Record *FuncData = FUs[j];
+      Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
+      const std::vector<Record*> &FuncList =
+                                   FuncData->getValueAsListOfDefs("FuncList");
+      std::string ComboFuncName = ComboFunc->getName();
+      unsigned ComboBit = FUNameToBitsMap[ComboFuncName];
+      unsigned ComboResources = ComboBit;
+      DEBUG(dbgs() << "      combo: " << ComboFuncName
+                   << ":0x" << utohexstr(ComboResources) << "\n");
+      for (unsigned k = 0, M = FuncList.size(); k < M; ++k) {
+        std::string FuncName = FuncList[k]->getName();
+        unsigned FuncResources = FUNameToBitsMap[FuncName];
+        DEBUG(dbgs() << "        " << FuncName
+                     << ":0x" << utohexstr(FuncResources) << "\n");
+        ComboResources |= FuncResources;
+      }
+      ComboBitToBitsMap[ComboBit] = ComboResources;
+      numCombos++;
+      DEBUG(dbgs() << "          => combo bits: " << ComboFuncName << ":0x"
+                   << utohexstr(ComboBit) << " = 0x"
+                   << utohexstr(ComboResources) << "\n");
+    }
   }
+  return numCombos;
+}
+
+
+//
+// collectOneInsnClass - Populate allInsnClasses with one instruction class
+//
+int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
+                        std::vector<Record*> &ProcItinList,
+                        std::map<std::string, unsigned> &FUNameToBitsMap,
+                        Record *ItinData,
+                        raw_ostream &OS) {
+  // Collect instruction classes.
+  Record *ItinDef = ItinData->getValueAsDef("TheClass");
 
   const std::vector<Record*> &StageList =
     ItinData->getValueAsListOfDefs("Stages");
 
   // The number of stages.
-  NStages = StageList.size();
+  unsigned NStages = StageList.size();
 
-  // For each unit.
-  unsigned UnitBitValue = 0;
+  DEBUG(dbgs() << "    " << ItinDef->getName()
+               << "\n");
+
+  std::vector<unsigned> UnitBits;
 
   // Compute the bitwise or of each unit used in this stage.
   for (unsigned i = 0; i < NStages; ++i) {
@@ -375,18 +716,72 @@ void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name,
     const std::vector<Record*> &UnitList =
       Stage->getValueAsListOfDefs("Units");
 
+    DEBUG(dbgs() << "        stage:" << i
+                 << " [" << UnitList.size() << " units]:");
+    unsigned dbglen = 26;  // cursor after stage dbgs
+
+    // Compute the bitwise or of each unit used in this stage.
+    unsigned UnitBitValue = 0;
     for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
       // Conduct bitwise or.
       std::string UnitName = UnitList[j]->getName();
-      assert(NameToBitsMap.count(UnitName));
-      UnitBitValue |= NameToBitsMap[UnitName];
+      DEBUG(dbgs() << " " << j << ":" << UnitName);
+      dbglen += 3 + UnitName.length();
+      assert(FUNameToBitsMap.count(UnitName));
+      UnitBitValue |= FUNameToBitsMap[UnitName];
     }
 
     if (UnitBitValue != 0)
-      allInsnClasses.insert(UnitBitValue);
+      UnitBits.push_back(UnitBitValue);
+
+    while (dbglen <= 64) {   // line up bits dbgs
+        dbglen += 8;
+        DEBUG(dbgs() << "\t");
+    }
+    DEBUG(dbgs() << " (bits: 0x" << utohexstr(UnitBitValue) << ")\n");
   }
+
+  if (UnitBits.size() > 0)
+    allInsnClasses.push_back(UnitBits);
+
+  DEBUG(dbgs() << "        ");
+  dbgsInsnClass(UnitBits);
+  DEBUG(dbgs() << "\n");
+
+  return NStages;
 }
 
+//
+// collectAllInsnClasses - Populate allInsnClasses which is a set of units
+// used in each stage.
+//
+int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
+                            std::vector<Record*> &ProcItinList,
+                            std::map<std::string, unsigned> &FUNameToBitsMap,
+                            std::vector<Record*> &ItinDataList,
+                            int &maxStages,
+                            raw_ostream &OS) {
+  // Collect all instruction classes.
+  unsigned M = ItinDataList.size();
+
+  int numInsnClasses = 0;
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n"
+               << "collectAllInsnClasses "
+               << ProcName
+               << " (" << M << " classes)\n");
+
+  // Collect stages for each instruction class for all itinerary data
+  for (unsigned j = 0; j < M; j++) {
+    Record *ItinData = ItinDataList[j];
+    int NStages = collectOneInsnClass(ProcName, ProcItinList,
+                                      FUNameToBitsMap, ItinData, OS);
+    if (NStages > maxStages) {
+      maxStages = NStages;
+    }
+    numInsnClasses++;
+  }
+  return numInsnClasses;
+}
 
 //
 // Run the worklist algorithm to generate the DFA.
@@ -398,16 +793,35 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
     Records.getAllDerivedDefinitions("ProcessorItineraries");
 
   //
-  // Collect the instruction classes.
+  // Collect the Functional units.
   //
+  std::map<std::string, unsigned> FUNameToBitsMap;
+  int maxResources = 0;
+  collectAllFuncUnits(ProcItinList,
+                              FUNameToBitsMap, maxResources, OS);
+
+  //
+  // Collect the Combo Functional units.
+  //
+  std::map<unsigned, unsigned> ComboBitToBitsMap;
+  std::vector<Record*> ComboFuncList =
+    Records.getAllDerivedDefinitions("ComboFuncUnits");
+  int numCombos = collectAllComboFuncs(ComboFuncList,
+                              FUNameToBitsMap, ComboBitToBitsMap, OS);
+
+  //
+  // Collect the itineraries.
+  //
+  int maxStages = 0;
+  int numInsnClasses = 0;
   for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
     Record *Proc = ProcItinList[i];
 
     // Get processor itinerary name.
-    const std::string &Name = Proc->getName();
+    const std::string &ProcName = Proc->getName();
 
     // Skip default.
-    if (Name == "NoItineraries")
+    if (ProcName == "NoItineraries")
       continue;
 
     // Sanity check for at least one instruction itinerary class.
@@ -419,15 +833,11 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
     // Get itinerary data list.
     std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
 
-    // Collect instruction classes for all itinerary data.
-    for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) {
-      Record *ItinData = ItinDataList[j];
-      unsigned NStages;
-      collectAllInsnClasses(Name, ItinData, NStages, OS);
-    }
+    // Collect all instruction classes
+    numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList,
+                          FUNameToBitsMap, ItinDataList, maxStages, OS);
   }
 
-
   //
   // Run a worklist algorithm to generate the DFA.
   //
@@ -436,6 +846,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
   Initial->isInitial = true;
   Initial->stateInfo.insert(0x0);
   SmallVector<const State*, 32> WorkList;
+//  std::queue<State*> WorkList;
   std::map<std::set<unsigned>, const State*> Visited;
 
   WorkList.push_back(Initial);
@@ -459,9 +870,15 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
   //
   while (!WorkList.empty()) {
     const State *current = WorkList.pop_back_val();
-    for (DenseSet<unsigned>::iterator CI = allInsnClasses.begin(),
-           CE = allInsnClasses.end(); CI != CE; ++CI) {
-      unsigned InsnClass = *CI;
+    DEBUG(dbgs() << "---------------------\n");
+    DEBUG(dbgs() << "Processing state: " << current->stateNum << " - ");
+    dbgsStateInfo(current->stateInfo);
+    DEBUG(dbgs() << "\n");
+    for (unsigned i = 0; i < allInsnClasses.size(); i++) {
+      std::vector<unsigned> InsnClass = allInsnClasses[i];
+      DEBUG(dbgs() << i << " ");
+      dbgsInsnClass(InsnClass);
+      DEBUG(dbgs() << "\n");
 
       std::set<unsigned> NewStateResources;
       //
@@ -469,32 +886,47 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
       // and the state can accommodate this InsnClass, create a transition.
       //
       if (!current->hasTransition(InsnClass) &&
-          current->canAddInsnClass(InsnClass)) {
-        const State *NewState;
-        current->AddInsnClass(InsnClass, NewStateResources);
-        assert(!NewStateResources.empty() && "New states must be generated");
+          current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) {
+        const State *NewState = NULL;
+        current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources);
+        if (NewStateResources.size() == 0) {
+          DEBUG(dbgs() << "  Skipped - no new states generated\n");
+          continue;
+        }
+
+        DEBUG(dbgs() << "\t");
+        dbgsStateInfo(NewStateResources);
+        DEBUG(dbgs() << "\n");
 
         //
         // If we have seen this state before, then do not create a new state.
         //
-        //
         auto VI = Visited.find(NewStateResources);
-        if (VI != Visited.end())
+        if (VI != Visited.end()) {
           NewState = VI->second;
-        else {
+          DEBUG(dbgs() << "\tFound existing state: "
+                       << NewState->stateNum << " - ");
+          dbgsStateInfo(NewState->stateInfo);
+          DEBUG(dbgs() << "\n");
+        } else {
           NewState = &D.newState();
           NewState->stateInfo = NewStateResources;
           Visited[NewStateResources] = NewState;
           WorkList.push_back(NewState);
+          DEBUG(dbgs() << "\tAccepted new state: "
+                       << NewState->stateNum << " - ");
+          dbgsStateInfo(NewState->stateInfo);
+          DEBUG(dbgs() << "\n");
         }
-        
+
         current->addTransition(InsnClass, NewState);
       }
     }
   }
 
   // Print out the table.
-  D.writeTableAndAPI(OS, TargetName);
+  D.writeTableAndAPI(OS, TargetName,
+               numInsnClasses, maxResources, numCombos, maxStages);
 }
 
 namespace llvm {