*** empty log message ***
[oota-llvm.git] / lib / Target / SparcV9 / InstrSched / SchedGraph.cpp
index fd09e9e77785ac2da81d9431d4fcbcf207c2ed85..781604eaae7c1f33b25ec7a758c848b6ee674748 100644 (file)
 //**************************************************************************/
 
 #include "SchedGraph.h"
-#include "llvm/InstrTypes.h"
-#include "llvm/Instruction.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Method.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Target/MachineInstrInfo.h"
+#include "llvm/CodeGen/InstrSelection.h"
+#include "llvm/CodeGen/MachineCodeForInstruction.h"
+#include "llvm/CodeGen/MachineCodeForBasicBlock.h"
 #include "llvm/Target/MachineRegInfo.h"
-#include "llvm/Support/StringExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
 #include "llvm/iOther.h"
-#include <algorithm>
+#include "Support/StringExtras.h"
+#include "Support/STLExtras.h"
+#include <iostream>
 
+using std::vector;
+using std::pair;
+using std::cerr;
 
 //*********************** Internal Data Structures *************************/
 
-typedef vector< pair<SchedGraphNode*, unsigned int> > RefVec;
+// The following two types need to be classes, not typedefs, so we can use
+// opaque declarations in SchedGraph.h
+// 
+struct RefVec: public vector< pair<SchedGraphNode*, int> > {
+  typedef vector< pair<SchedGraphNode*, int> >::      iterator       iterator;
+  typedef vector< pair<SchedGraphNode*, int> >::const_iterator const_iterator;
+};
 
-// The following needs to be a class, not a typedef, so we can use
-// an opaque declaration in SchedGraph.h
-class RegToRefVecMap: public hash_map<int, RefVec> {
-  typedef hash_map<int, RefVec>::      iterator iterator;
+struct RegToRefVecMap: public hash_map<int, RefVec> {
+  typedef hash_map<int, RefVec>::      iterator       iterator;
   typedef hash_map<int, RefVec>::const_iterator const_iterator;
 };
 
+struct ValueToDefVecMap: public hash_map<const Instruction*, RefVec> {
+  typedef hash_map<const Instruction*, RefVec>::      iterator       iterator;
+  typedef hash_map<const Instruction*, RefVec>::const_iterator const_iterator;
+};
+
 // 
 // class SchedGraphEdge
 // 
@@ -44,15 +57,16 @@ class RegToRefVecMap: public hash_map<int, RefVec> {
 SchedGraphEdge::SchedGraphEdge(SchedGraphNode* _src,
                               SchedGraphNode* _sink,
                               SchedGraphEdgeDepType _depType,
-                              DataDepOrderType _depOrderType,
+                              unsigned int     _depOrderType,
                               int _minDelay)
   : src(_src),
     sink(_sink),
     depType(_depType),
     depOrderType(_depOrderType),
-    val(NULL),
-    minDelay((_minDelay >= 0)? _minDelay : _src->getLatency())
+    minDelay((_minDelay >= 0)? _minDelay : _src->getLatency()),
+    val(NULL)
 {
+  assert(src != sink && "Self-loop in scheduling graph!");
   src->addOutEdge(this);
   sink->addInEdge(this);
 }
@@ -62,15 +76,16 @@ SchedGraphEdge::SchedGraphEdge(SchedGraphNode* _src,
 SchedGraphEdge::SchedGraphEdge(SchedGraphNode*  _src,
                               SchedGraphNode*  _sink,
                               const Value*     _val,
-                              DataDepOrderType _depOrderType,
+                              unsigned int     _depOrderType,
                               int              _minDelay)
   : src(_src),
     sink(_sink),
-    depType(DefUseDep),
+    depType(ValueDep),
     depOrderType(_depOrderType),
-    val(_val),
-    minDelay((_minDelay >= 0)? _minDelay : _src->getLatency())
+    minDelay((_minDelay >= 0)? _minDelay : _src->getLatency()),
+    val(_val)
 {
+  assert(src != sink && "Self-loop in scheduling graph!");
   src->addOutEdge(this);
   sink->addInEdge(this);
 }
@@ -80,7 +95,7 @@ SchedGraphEdge::SchedGraphEdge(SchedGraphNode*  _src,
 SchedGraphEdge::SchedGraphEdge(SchedGraphNode*  _src,
                               SchedGraphNode*  _sink,
                               unsigned int     _regNum,
-                              DataDepOrderType _depOrderType,
+                              unsigned int     _depOrderType,
                               int             _minDelay)
   : src(_src),
     sink(_sink),
@@ -89,6 +104,7 @@ SchedGraphEdge::SchedGraphEdge(SchedGraphNode*  _src,
     minDelay((_minDelay >= 0)? _minDelay : _src->getLatency()),
     machineRegNum(_regNum)
 {
+  assert(src != sink && "Self-loop in scheduling graph!");
   src->addOutEdge(this);
   sink->addInEdge(this);
 }
@@ -106,6 +122,7 @@ SchedGraphEdge::SchedGraphEdge(SchedGraphNode* _src,
     minDelay((_minDelay >= 0)? _minDelay : _src->getLatency()),
     resourceId(_resourceId)
 {
+  assert(src != sink && "Self-loop in scheduling graph!");
   src->addOutEdge(this);
   sink->addInEdge(this);
 }
@@ -115,8 +132,8 @@ SchedGraphEdge::~SchedGraphEdge()
 {
 }
 
-void SchedGraphEdge::dump(int indent=0) const {
-  printIndent(indent); cout << *this; 
+void SchedGraphEdge::dump(int indent) const {
+  cerr << std::string(indent*2, ' ') << *this; 
 }
 
 
@@ -126,12 +143,14 @@ void SchedGraphEdge::dump(int indent=0) const {
 
 /*ctor*/
 SchedGraphNode::SchedGraphNode(unsigned int _nodeId,
-                              const Instruction* _instr,
+                               const BasicBlock*   _bb,
                               const MachineInstr* _minstr,
+                               int   indexInBB,
                               const TargetMachine& target)
   : nodeId(_nodeId),
-    instr(_instr),
+    bb(_bb),
     minstr(_minstr),
+    origIndexInBB(indexInBB),
     latency(0)
 {
   if (minstr)
@@ -147,10 +166,13 @@ SchedGraphNode::SchedGraphNode(unsigned int _nodeId,
 /*dtor*/
 SchedGraphNode::~SchedGraphNode()
 {
+  // for each node, delete its out-edges
+  std::for_each(beginOutEdges(), endOutEdges(),
+                deleter<SchedGraphEdge>);
 }
 
-void SchedGraphNode::dump(int indent=0) const {
-  printIndent(indent); cout << *this; 
+void SchedGraphNode::dump(int indent) const {
+  cerr << std::string(indent*2, ' ') << *this; 
 }
 
 
@@ -204,49 +226,41 @@ SchedGraph::SchedGraph(const BasicBlock* bb,
                       const TargetMachine& target)
 {
   bbVec.push_back(bb);
-  this->buildGraph(target);
+  buildGraph(target);
 }
 
 
 /*dtor*/
 SchedGraph::~SchedGraph()
 {
-  for (iterator I=begin(); I != end(); ++I)
-    {
-      SchedGraphNode* node = (*I).second;
-      
-      // for each node, delete its out-edges
-      for (SchedGraphNode::iterator I = node->beginOutEdges();
-          I != node->endOutEdges(); ++I)
-       delete *I;
-      
-      // then delete the node itself.
-      delete node;
-    }
+  for (const_iterator I = begin(); I != end(); ++I)
+    delete I->second;
+  delete graphRoot;
+  delete graphLeaf;
 }
 
 
 void
 SchedGraph::dump() const
 {
-  cout << "  Sched Graph for Basic Blocks: ";
+  cerr << "  Sched Graph for Basic Blocks: ";
   for (unsigned i=0, N=bbVec.size(); i < N; i++)
     {
-      cout << (bbVec[i]->hasName()? bbVec[i]->getName() : "block")
+      cerr << (bbVec[i]->hasName()? bbVec[i]->getName() : "block")
           << " (" << bbVec[i] << ")"
           << ((i == N-1)? "" : ", ");
     }
   
-  cout << endl << endl << "    Actual Root nodes : ";
+  cerr << "\n\n    Actual Root nodes : ";
   for (unsigned i=0, N=graphRoot->outEdges.size(); i < N; i++)
-    cout << graphRoot->outEdges[i]->getSink()->getNodeId()
+    cerr << graphRoot->outEdges[i]->getSink()->getNodeId()
         << ((i == N-1)? "" : ", ");
   
-  cout << endl << "    Graph Nodes:" << endl;
+  cerr << "\n    Graph Nodes:\n";
   for (const_iterator I=begin(); I != end(); ++I)
-    cout << endl << * (*I).second;
+    cerr << "\n" << *I->second;
   
-  cout << endl;
+  cerr << "\n";
 }
 
 
@@ -330,12 +344,12 @@ SchedGraph::addCDEdges(const TerminatorInst* term,
                       const TargetMachine& target)
 {
   const MachineInstrInfo& mii = target.getInstrInfo();
-  MachineCodeForVMInstr& termMvec = term->getMachineInstrVec();
+  MachineCodeForInstruction &termMvec = MachineCodeForInstruction::get(term);
   
   // Find the first branch instr in the sequence of machine instrs for term
   // 
   unsigned first = 0;
-  while (! mii.isBranch(termMvec[first]->getOpCode()))
+  while (!mii.isBranch(termMvec[first]->getOpCode()))
     ++first;
   assert(first < termMvec.size() &&
         "No branch instructions for BR?  Ok, but weird!  Delete assertion.");
@@ -346,6 +360,7 @@ SchedGraph::addCDEdges(const TerminatorInst* term,
   
   // Add CD edges from each instruction in the sequence to the
   // *last preceding* branch instr. in the sequence 
+  // Use a latency of 0 because we only need to prevent out-of-order issue.
   // 
   for (int i = (int) termMvec.size()-1; i > (int) first; i--) 
     {
@@ -364,7 +379,7 @@ SchedGraph::addCDEdges(const TerminatorInst* term,
     }
   
   // Add CD edges from each instruction preceding the first branch
-  // to the first branch
+  // to the first branch.  Use a latency of 0 as above.
   // 
   for (int i = first-1; i >= 0; i--) 
     {
@@ -374,109 +389,140 @@ SchedGraph::addCDEdges(const TerminatorInst* term,
                                SchedGraphEdge::NonDataDep, 0);
     }
   
-  // Now add CD edges to the first branch instruction in the sequence
-  // from all preceding instructions in the basic block.
+  // Now add CD edges to the first branch instruction in the sequence from
+  // all preceding instructions in the basic block.  Use 0 latency again.
   // 
-  const BasicBlock* bb = term->getParent();
-  for (BasicBlock::const_iterator II = bb->begin(); II != bb->end(); ++II)
+  const BasicBlock* bb = firstBrNode->getBB();
+  const MachineCodeForBasicBlock& mvec = MachineCodeForBasicBlock::get(bb);
+  for (unsigned i=0, N=mvec.size(); i < N; i++) 
     {
-      if ((*II) == (const Instruction*) term)  // special case, handled above
-       continue;
+      if (mvec[i] == termMvec[first]) // reached the first branch
+        break;
       
-      assert(! (*II)->isTerminator() && "Two terminators in basic block?");
+      SchedGraphNode* fromNode = this->getGraphNodeForInstr(mvec[i]);
+      if (fromNode == NULL)
+        continue;                      // dummy instruction, e.g., PHI
       
-      const MachineCodeForVMInstr& mvec = (*II)->getMachineInstrVec();
-      for (unsigned i=0, N=mvec.size(); i < N; i++) 
-       {
-         SchedGraphNode* fromNode = this->getGraphNodeForInstr(mvec[i]);
-         if (fromNode == NULL)
-           continue;                   // dummy instruction, e.g., PHI
-         
-         (void) new SchedGraphEdge(fromNode, firstBrNode,
-                                   SchedGraphEdge::CtrlDep,
-                                   SchedGraphEdge::NonDataDep, 0);
-         
-         // If we find any other machine instructions (other than due to
-         // the terminator) that also have delay slots, add an outgoing edge
-         // from the instruction to the instructions in the delay slots.
-         // 
-         unsigned d = mii.getNumDelaySlots(mvec[i]->getOpCode());
-         assert(i+d < N && "Insufficient delay slots for instruction?");
-         
-         for (unsigned j=1; j <= d; j++)
-           {
-             SchedGraphNode* toNode = this->getGraphNodeForInstr(mvec[i+j]);
-             assert(toNode && "No node for machine instr in delay slot?");
-             (void) new SchedGraphEdge(fromNode, toNode,
-                                       SchedGraphEdge::CtrlDep,
-                                     SchedGraphEdge::NonDataDep, 0);
-           }
-       }
+      (void) new SchedGraphEdge(fromNode, firstBrNode,
+                                SchedGraphEdge::CtrlDep,
+                                SchedGraphEdge::NonDataDep, 0);
+      
+      // If we find any other machine instructions (other than due to
+      // the terminator) that also have delay slots, add an outgoing edge
+      // from the instruction to the instructions in the delay slots.
+      // 
+      unsigned d = mii.getNumDelaySlots(mvec[i]->getOpCode());
+      assert(i+d < N && "Insufficient delay slots for instruction?");
+      
+      for (unsigned j=1; j <= d; j++)
+        {
+          SchedGraphNode* toNode = this->getGraphNodeForInstr(mvec[i+j]);
+          assert(toNode && "No node for machine instr in delay slot?");
+          (void) new SchedGraphEdge(fromNode, toNode,
+                                    SchedGraphEdge::CtrlDep,
+                                    SchedGraphEdge::NonDataDep, 0);
+        }
     }
 }
 
+static const int SG_LOAD_REF  = 0;
+static const int SG_STORE_REF = 1;
+static const int SG_CALL_REF  = 2;
+
+static const unsigned int SG_DepOrderArray[][3] = {
+  { SchedGraphEdge::NonDataDep,
+            SchedGraphEdge::AntiDep,
+                        SchedGraphEdge::AntiDep },
+  { SchedGraphEdge::TrueDep,
+            SchedGraphEdge::OutputDep,
+                        SchedGraphEdge::TrueDep | SchedGraphEdge::OutputDep },
+  { SchedGraphEdge::TrueDep,
+            SchedGraphEdge::AntiDep | SchedGraphEdge::OutputDep,
+                        SchedGraphEdge::TrueDep | SchedGraphEdge::AntiDep
+                                                | SchedGraphEdge::OutputDep }
+};
+
 
+// Add a dependence edge between every pair of machine load/store/call
+// instructions, where at least one is a store or a call.
+// Use latency 1 just to ensure that memory operations are ordered;
+// latency does not otherwise matter (true dependences enforce that).
+// 
 void
-SchedGraph::addMemEdges(const vector<const Instruction*>& memVec,
+SchedGraph::addMemEdges(const vector<SchedGraphNode*>& memNodeVec,
                        const TargetMachine& target)
 {
   const MachineInstrInfo& mii = target.getInstrInfo();
   
-  for (unsigned im=0, NM=memVec.size(); im < NM; im++)
+  // Instructions in memNodeVec are in execution order within the basic block,
+  // so simply look at all pairs <memNodeVec[i], memNodeVec[j: j > i]>.
+  // 
+  for (unsigned im=0, NM=memNodeVec.size(); im < NM; im++)
     {
-      const Instruction* fromInstr = memVec[im];
-      bool fromIsLoad = fromInstr->getOpcode() == Instruction::Load;
-      
+      MachineOpCode fromOpCode = memNodeVec[im]->getOpCode();
+      int fromType = mii.isCall(fromOpCode)? SG_CALL_REF
+                       : mii.isLoad(fromOpCode)? SG_LOAD_REF
+                                               : SG_STORE_REF;
       for (unsigned jm=im+1; jm < NM; jm++)
        {
-         const Instruction* toInstr = memVec[jm];
-         bool toIsLoad = toInstr->getOpcode() == Instruction::Load;
-         SchedGraphEdge::DataDepOrderType depOrderType;
-         
-         if (fromIsLoad)
-           {
-             if (toIsLoad) continue;   // both instructions are loads
-             depOrderType = SchedGraphEdge::AntiDep;
-           }
-         else
-           {
-             depOrderType = (toIsLoad)? SchedGraphEdge::TrueDep
-               : SchedGraphEdge::OutputDep;
-           }
-         
-         MachineCodeForVMInstr& fromInstrMvec=fromInstr->getMachineInstrVec();
-         MachineCodeForVMInstr& toInstrMvec = toInstr->getMachineInstrVec();
-         
-         // We have two VM memory instructions, and at least one is a store.
-         // Add edges between all machine load/store instructions.
-         // 
-         for (unsigned i=0, N=fromInstrMvec.size(); i < N; i++) 
-           {
-             MachineOpCode fromOpCode = fromInstrMvec[i]->getOpCode();
-             if (mii.isLoad(fromOpCode) || mii.isStore(fromOpCode))
-               {
-                 SchedGraphNode* fromNode =
-                   this->getGraphNodeForInstr(fromInstrMvec[i]);
-                 assert(fromNode && "No node for memory instr?");
-                 
-                 for (unsigned j=0, M=toInstrMvec.size(); j < M; j++) 
-                   {
-                     MachineOpCode toOpCode = toInstrMvec[j]->getOpCode();
-                     if (mii.isLoad(toOpCode) || mii.isStore(toOpCode))
-                       {
-                         SchedGraphNode* toNode =
-                           this->getGraphNodeForInstr(toInstrMvec[j]);
-                         assert(toNode && "No node for memory instr?");
-                         
-                         (void) new SchedGraphEdge(fromNode, toNode,
-                                                   SchedGraphEdge::MemoryDep,
-                                                   depOrderType, 1);
-                       }
-                   }
-               }
-           }
-       }
+          MachineOpCode toOpCode = memNodeVec[jm]->getOpCode();
+          int toType = mii.isCall(toOpCode)? SG_CALL_REF
+                         : mii.isLoad(toOpCode)? SG_LOAD_REF
+                                               : SG_STORE_REF;
+          
+          if (fromType != SG_LOAD_REF || toType != SG_LOAD_REF)
+            (void) new SchedGraphEdge(memNodeVec[im], memNodeVec[jm],
+                                      SchedGraphEdge::MemoryDep,
+                                      SG_DepOrderArray[fromType][toType], 1);
+        }
     }
+} 
+
+// Add edges from/to CC reg instrs to/from call instrs.
+// Essentially this prevents anything that sets or uses a CC reg from being
+// reordered w.r.t. a call.
+// Use a latency of 0 because we only need to prevent out-of-order issue,
+// like with control dependences.
+// 
+void
+SchedGraph::addCallCCEdges(const vector<SchedGraphNode*>& memNodeVec,
+                           MachineCodeForBasicBlock& bbMvec,
+                           const TargetMachine& target)
+{
+  const MachineInstrInfo& mii = target.getInstrInfo();
+  vector<SchedGraphNode*> callNodeVec;
+  
+  // Find the call instruction nodes and put them in a vector.
+  for (unsigned im=0, NM=memNodeVec.size(); im < NM; im++)
+    if (mii.isCall(memNodeVec[im]->getOpCode()))
+      callNodeVec.push_back(memNodeVec[im]);
+  
+  // Now walk the entire basic block, looking for CC instructions *and*
+  // call instructions, and keep track of the order of the instructions.
+  // Use the call node vec to quickly find earlier and later call nodes
+  // relative to the current CC instruction.
+  // 
+  int lastCallNodeIdx = -1;
+  for (unsigned i=0, N=bbMvec.size(); i < N; i++)
+    if (mii.isCall(bbMvec[i]->getOpCode()))
+      {
+        ++lastCallNodeIdx;
+        for ( ; lastCallNodeIdx < (int)callNodeVec.size(); ++lastCallNodeIdx)
+          if (callNodeVec[lastCallNodeIdx]->getMachineInstr() == bbMvec[i])
+            break;
+        assert(lastCallNodeIdx < (int)callNodeVec.size() && "Missed Call?");
+      }
+    else if (mii.isCCInstr(bbMvec[i]->getOpCode()))
+      { // Add incoming/outgoing edges from/to preceding/later calls
+        SchedGraphNode* ccNode = this->getGraphNodeForInstr(bbMvec[i]);
+        int j=0;
+        for ( ; j <= lastCallNodeIdx; j++)
+          (void) new SchedGraphEdge(callNodeVec[j], ccNode,
+                                    MachineCCRegsRID, 0);
+        for ( ; j < (int) callNodeVec.size(); j++)
+          (void) new SchedGraphEdge(ccNode, callNodeVec[j],
+                                    MachineCCRegsRID, 0);
+      }
 }
 
 
@@ -504,7 +550,9 @@ SchedGraph::addMachineRegEdges(RegToRefVecMap& regToRefVecMap,
          SchedGraphNode* node = regRefVec[i].first;
          unsigned int opNum   = regRefVec[i].second;
          bool isDef = node->getMachineInstr()->operandIsDefined(opNum);
-               
+         bool isDefAndUse =
+            node->getMachineInstr()->operandIsDefinedAndUsed(opNum);
+          
           for (unsigned p=0; p < i; ++p)
             {
               SchedGraphNode* prevNode = regRefVec[p].first;
@@ -513,14 +561,22 @@ SchedGraph::addMachineRegEdges(RegToRefVecMap& regToRefVecMap,
                   unsigned int prevOpNum = regRefVec[p].second;
                   bool prevIsDef =
                     prevNode->getMachineInstr()->operandIsDefined(prevOpNum);
-                  
+                  bool prevIsDefAndUse =
+                    prevNode->getMachineInstr()->operandIsDefinedAndUsed(prevOpNum);
                   if (isDef)
-                    new SchedGraphEdge(prevNode, node, regNum,
-                                       (prevIsDef)? SchedGraphEdge::OutputDep
-                                                  : SchedGraphEdge::AntiDep);
-                  else if (prevIsDef)
-                    new SchedGraphEdge(prevNode, node, regNum,
-                                       SchedGraphEdge::TrueDep);
+                    {
+                      if (prevIsDef)
+                        new SchedGraphEdge(prevNode, node, regNum,
+                                           SchedGraphEdge::OutputDep);
+                      if (!prevIsDef || prevIsDefAndUse)
+                        new SchedGraphEdge(prevNode, node, regNum,
+                                           SchedGraphEdge::AntiDep);
+                    }
+                  
+                  if (prevIsDef)
+                    if (!isDef || isDefAndUse)
+                      new SchedGraphEdge(prevNode, node, regNum,
+                                         SchedGraphEdge::TrueDep);
                 }
             }
         }
@@ -528,87 +584,76 @@ SchedGraph::addMachineRegEdges(RegToRefVecMap& regToRefVecMap,
 }
 
 
+// Adds dependences to/from refNode from/to all other defs
+// in the basic block.  refNode may be a use, a def, or both.
+// We do not consider other uses because we are not building use-use deps.
+// 
 void
-SchedGraph::addSSAEdge(SchedGraphNode* node,
-                      const Value* val,
-                      const TargetMachine& target)
-{
-  if (!isa<Instruction>(val)) return;
-  
-  const Instruction* thisVMInstr = node->getInstr();
-  const Instruction* defVMInstr  = cast<const Instruction>(val);
-  
-  // Phi instructions are the only ones that produce a value but don't get
-  // any non-dummy machine instructions.  Return here as an optimization.
-  // 
-  if (isa<PHINode>(defVMInstr))
-    return;
-  
-  // Now add the graph edge for the appropriate machine instruction(s).
-  // Note that multiple machine instructions generated for the
-  // def VM instruction may modify the register for the def value.
-  // 
-  MachineCodeForVMInstr& defMvec = defVMInstr->getMachineInstrVec();
-  const MachineInstrInfo& mii = target.getInstrInfo();
-  
-  for (unsigned i=0, N=defMvec.size(); i < N; i++)
-    for (int o=0, N = mii.getNumOperands(defMvec[i]->getOpCode()); o < N; o++)
-      {
-       const MachineOperand& defOp = defMvec[i]->getOperand(o); 
-       
-       if (defOp.opIsDef()
-           && (defOp.getOperandType() == MachineOperand::MO_VirtualRegister
-               || defOp.getOperandType() == MachineOperand::MO_CCRegister)
-           && (defOp.getVRegValue() == val))
-         {
-           // this instruction does define value `val'.
-           // if there is a node for it in the same graph, add an edge.
-           SchedGraphNode* defNode = this->getGraphNodeForInstr(defMvec[i]);
-           if (defNode != NULL && defNode != node)
-              (void) new SchedGraphEdge(defNode, node, val);
-         }
-      }
+SchedGraph::addEdgesForValue(SchedGraphNode* refNode,
+                             const RefVec& defVec,
+                             const Value* defValue,
+                             bool  refNodeIsDef,
+                             bool  refNodeIsDefAndUse,
+                             const TargetMachine& target)
+{
+  bool refNodeIsUse = !refNodeIsDef || refNodeIsDefAndUse;
+  
+  // Add true or output dep edges from all def nodes before refNode in BB.
+  // Add anti or output dep edges to all def nodes after refNode.
+  for (RefVec::const_iterator I=defVec.begin(), E=defVec.end(); I != E; ++I)
+    {
+      if ((*I).first == refNode)
+        continue;                       // Dont add any self-loops
+      
+      if ((*I).first->getOrigIndexInBB() < refNode->getOrigIndexInBB())
+        { // (*).first is before refNode
+          if (refNodeIsDef)
+            (void) new SchedGraphEdge((*I).first, refNode, defValue,
+                                      SchedGraphEdge::OutputDep);
+          if (refNodeIsUse)
+            (void) new SchedGraphEdge((*I).first, refNode, defValue,
+                                      SchedGraphEdge::TrueDep);
+        }
+      else
+        { // (*).first is after refNode
+          if (refNodeIsDef)
+            (void) new SchedGraphEdge(refNode, (*I).first, defValue,
+                                      SchedGraphEdge::OutputDep);
+          if (refNodeIsUse)
+            (void) new SchedGraphEdge(refNode, (*I).first, defValue,
+                                      SchedGraphEdge::AntiDep);
+        }
+    }
 }
 
 
 void
 SchedGraph::addEdgesForInstruction(const MachineInstr& minstr,
-                                  RegToRefVecMap& regToRefVecMap,
+                                   const ValueToDefVecMap& valueToDefVecMap,
                                   const TargetMachine& target)
 {
   SchedGraphNode* node = this->getGraphNodeForInstr(&minstr);
   if (node == NULL)
     return;
   
-  assert(node->getInstr() && "Should be no dummy nodes here!");
-  const Instruction& instr = * node->getInstr();
-  
   // Add edges for all operands of the machine instruction.
-  // Also, record all machine register references to add reg. deps. later.
   // 
   for (unsigned i=0, numOps=minstr.getNumOperands(); i < numOps; i++)
     {
       const MachineOperand& mop = minstr.getOperand(i);
-      
-      // if this writes to a machine register other than the hardwired
-      // "zero" register, record the reference.
-      if (mop.getOperandType() == MachineOperand::MO_MachineRegister
-         && (mop.getMachineRegNum()
-             != (unsigned) target.getRegInfo().getZeroRegNum()))
-       {
-         regToRefVecMap[mop.getMachineRegNum()].push_back(make_pair(node, i));
-       }
-      
-      // ignore all other def operands
-      if (minstr.operandIsDefined(i))
-       continue;
-      
       switch(mop.getOperandType())
        {
        case MachineOperand::MO_VirtualRegister:
        case MachineOperand::MO_CCRegister:
-         if (mop.getVRegValue())
-           addSSAEdge(node, mop.getVRegValue(), target);
+         if (const Instruction* srcI =
+              dyn_cast_or_null<Instruction>(mop.getVRegValue()))
+            {
+              ValueToDefVecMap::const_iterator I = valueToDefVecMap.find(srcI);
+              if (I != valueToDefVecMap.end())
+                addEdgesForValue(node, (*I).second, mop.getVRegValue(),
+                                 minstr.operandIsDefined(i),
+                                 minstr.operandIsDefinedAndUsed(i), target);
+            }
          break;
          
        case MachineOperand::MO_MachineRegister:
@@ -624,103 +669,187 @@ SchedGraph::addEdgesForInstruction(const MachineInstr& minstr,
          break;
        }
     }
-
-  // Add edges for values implicitly used by the machine instruction sequence
-  // for the VM instruction but not made explicit operands.  Examples include
-  // function arguments to a Call instructions or the return value of a Ret
-  // instruction.  We'll conservatively add the dependences to every machine
-  // machine instruction in the instruction sequence for this VM instr
-  // (at least for now, there is never more than one machine instr).
+  
+  // Add edges for values implicitly used by the machine instruction.
+  // Examples include function arguments to a Call instructions or the return
+  // value of a Ret instruction.
   // 
-  const vector<const Value*>& implicitUses =
-    instr.getMachineInstrVec().getImplicitUses();
-  for (unsigned i=0; i < implicitUses.size(); ++i)
-    addSSAEdge(node, implicitUses[i], target);
+  for (unsigned i=0, N=minstr.getNumImplicitRefs(); i < N; ++i)
+    if (! minstr.implicitRefIsDefined(i) ||
+        minstr.implicitRefIsDefinedAndUsed(i))
+      if (const Instruction* srcI =
+          dyn_cast_or_null<Instruction>(minstr.getImplicitRef(i)))
+        {
+          ValueToDefVecMap::const_iterator I = valueToDefVecMap.find(srcI);
+          if (I != valueToDefVecMap.end())
+            addEdgesForValue(node, (*I).second, minstr.getImplicitRef(i),
+                             minstr.implicitRefIsDefined(i),
+                             minstr.implicitRefIsDefinedAndUsed(i), target);
+        }
 }
 
 
 void
-SchedGraph::addNonSSAEdgesForValue(const Instruction* instr,
-                                   const TargetMachine& target)
+SchedGraph::findDefUseInfoAtInstr(const TargetMachine& target,
+                                  SchedGraphNode* node,
+                                  vector<SchedGraphNode*>& memNodeVec,
+                                  RegToRefVecMap& regToRefVecMap,
+                                  ValueToDefVecMap& valueToDefVecMap)
 {
-  if (isa<PHINode>(instr))
-    return;
-
-  MachineCodeForVMInstr& mvec = instr->getMachineInstrVec();
   const MachineInstrInfo& mii = target.getInstrInfo();
-  RefVec refVec;
   
-  for (unsigned i=0, N=mvec.size(); i < N; i++)
-    for (int o=0, N = mii.getNumOperands(mvec[i]->getOpCode()); o < N; o++)
-      {
-       const MachineOperand& op = mvec[i]->getOperand(o); 
-       
-       if ((op.getOperandType() == MachineOperand::MO_VirtualRegister ||
-             op.getOperandType() == MachineOperand::MO_CCRegister)
-           && op.getVRegValue() == (Value*) instr)
-          {
-           // this operand is a definition or use of value `instr'
-           SchedGraphNode* node = this->getGraphNodeForInstr(mvec[i]);
-            assert(node && "No node for machine instruction in this BB?");
-            refVec.push_back(make_pair(node, o));
-          }
-      }
   
-  // refVec is ordered by control flow order of the machine instructions
-  for (unsigned i=0; i < refVec.size(); ++i)
+  MachineOpCode opCode = node->getOpCode();
+  if (mii.isLoad(opCode) || mii.isStore(opCode) || mii.isCall(opCode))
+    memNodeVec.push_back(node);
+  
+  // Collect the register references and value defs. for explicit operands
+  // 
+  const MachineInstr& minstr = * node->getMachineInstr();
+  for (int i=0, numOps = (int) minstr.getNumOperands(); i < numOps; i++)
     {
-      SchedGraphNode* node = refVec[i].first;
-      unsigned int   opNum = refVec[i].second;
-      bool isDef = node->getMachineInstr()->operandIsDefined(opNum);
+      const MachineOperand& mop = minstr.getOperand(i);
       
-      if (isDef)
-        // add output and/or anti deps to this definition
-        for (unsigned p=0; p < i; ++p)
-          {
-            SchedGraphNode* prevNode = refVec[p].first;
-            if (prevNode != node)
-              {
-                bool prevIsDef = prevNode->getMachineInstr()->
-                  operandIsDefined(refVec[p].second);
-                new SchedGraphEdge(prevNode, node, SchedGraphEdge::DefUseDep,
-                                   (prevIsDef)? SchedGraphEdge::OutputDep
-                                              : SchedGraphEdge::AntiDep);
-              }
-          }
+      // if this references a register other than the hardwired
+      // "zero" register, record the reference.
+      if (mop.getOperandType() == MachineOperand::MO_MachineRegister)
+        {
+          int regNum = mop.getMachineRegNum();
+         if (regNum != target.getRegInfo().getZeroRegNum())
+            regToRefVecMap[mop.getMachineRegNum()].push_back(
+                                                  std::make_pair(node, i));
+          continue;                     // nothing more to do
+       }
+      
+      // ignore all other non-def operands
+      if (! minstr.operandIsDefined(i))
+       continue;
+      
+      // We must be defining a value.
+      assert((mop.getOperandType() == MachineOperand::MO_VirtualRegister ||
+              mop.getOperandType() == MachineOperand::MO_CCRegister)
+             && "Do not expect any other kind of operand to be defined!");
+      
+      const Instruction* defInstr = cast<Instruction>(mop.getVRegValue());
+      valueToDefVecMap[defInstr].push_back(std::make_pair(node, i)); 
     }
+  
+  // 
+  // Collect value defs. for implicit operands.  The interface to extract
+  // them assumes they must be virtual registers!
+  // 
+  for (int i=0, N = (int) minstr.getNumImplicitRefs(); i < N; ++i)
+    if (minstr.implicitRefIsDefined(i))
+      if (const Instruction* defInstr =
+          dyn_cast_or_null<Instruction>(minstr.getImplicitRef(i)))
+        {
+          valueToDefVecMap[defInstr].push_back(std::make_pair(node, -i)); 
+        }
 }
 
 
 void
-SchedGraph::buildNodesforVMInstr(const TargetMachine& target,
-                                 const Instruction* instr)
+SchedGraph::buildNodesforBB(const TargetMachine& target,
+                            const BasicBlock* bb,
+                            vector<SchedGraphNode*>& memNodeVec,
+                            RegToRefVecMap& regToRefVecMap,
+                            ValueToDefVecMap& valueToDefVecMap)
 {
   const MachineInstrInfo& mii = target.getInstrInfo();
-  const MachineCodeForVMInstr& mvec = instr->getMachineInstrVec();
+  
+  // Build graph nodes for each VM instruction and gather def/use info.
+  // Do both those together in a single pass over all machine instructions.
+  const MachineCodeForBasicBlock& mvec = MachineCodeForBasicBlock::get(bb);
   for (unsigned i=0; i < mvec.size(); i++)
     if (! mii.isDummyPhiInstr(mvec[i]->getOpCode()))
       {
-        SchedGraphNode* node = new SchedGraphNode(getNumNodes(),
-                                                  instr, mvec[i], target);
+        SchedGraphNode* node = new SchedGraphNode(getNumNodes(), bb,
+                                                  mvec[i], i, target);
         this->noteGraphNodeForInstr(mvec[i], node);
+        
+        // Remember all register references and value defs
+        findDefUseInfoAtInstr(target, node,
+                              memNodeVec, regToRefVecMap,valueToDefVecMap);
       }
+  
+#undef REALLY_NEED_TO_SEARCH_SUCCESSOR_PHIS
+#ifdef REALLY_NEED_TO_SEARCH_SUCCESSOR_PHIS
+  // This is a BIG UGLY HACK.  IT NEEDS TO BE ELIMINATED.
+  // Look for copy instructions inserted in this BB due to Phi instructions
+  // in the successor BBs.
+  // There MUST be exactly one copy per Phi in successor nodes.
+  // 
+  for (BasicBlock::succ_const_iterator SI=bb->succ_begin(), SE=bb->succ_end();
+       SI != SE; ++SI)
+    for (BasicBlock::const_iterator PI=(*SI)->begin(), PE=(*SI)->end();
+         PI != PE; ++PI)
+      {
+        if ((*PI)->getOpcode() != Instruction::PHINode)
+          break;                        // No more Phis in this successor
+        
+        // Find the incoming value from block bb to block (*SI)
+        int bbIndex = cast<PHINode>(*PI)->getBasicBlockIndex(bb);
+        assert(bbIndex >= 0 && "But I know bb is a predecessor of (*SI)?");
+        Value* inVal = cast<PHINode>(*PI)->getIncomingValue(bbIndex);
+        assert(inVal != NULL && "There must be an in-value on every edge");
+        
+        // Find the machine instruction that makes a copy of inval to (*PI).
+        // This must be in the current basic block (bb).
+        const MachineCodeForVMInstr& mvec = MachineCodeForBasicBlock::get(*PI);
+        const MachineInstr* theCopy = NULL;
+        for (unsigned i=0; i < mvec.size() && theCopy == NULL; i++)
+          if (! mii.isDummyPhiInstr(mvec[i]->getOpCode()))
+            // not a Phi: assume this is a copy and examine its operands
+            for (int o=0, N=(int) mvec[i]->getNumOperands(); o < N; o++)
+              {
+                const MachineOperand& mop = mvec[i]->getOperand(o);
+                
+                if (mvec[i]->operandIsDefined(o))
+                  assert(mop.getVRegValue() == (*PI) && "dest shd be my Phi");
+                
+                if (! mvec[i]->operandIsDefined(o) ||
+                    NOT NEEDED? mvec[i]->operandIsDefinedAndUsed(o))
+                  if (mop.getVRegValue() == inVal)
+                    { // found the copy!
+                      theCopy = mvec[i];
+                      break;
+                    }
+              }
+        
+        // Found the dang instruction.  Now create a node and do the rest...
+        if (theCopy != NULL)
+          {
+            SchedGraphNode* node = new SchedGraphNode(getNumNodes(), bb,
+                                            theCopy, origIndexInBB++, target);
+            this->noteGraphNodeForInstr(theCopy, node);
+            findDefUseInfoAtInstr(target, node,
+                                  memNodeVec, regToRefVecMap,valueToDefVecMap);
+          }
+      }
+#endif  //REALLY_NEED_TO_SEARCH_SUCCESSOR_PHIS
 }
 
 
 void
 SchedGraph::buildGraph(const TargetMachine& target)
 {
-  const MachineInstrInfo& mii = target.getInstrInfo();
   const BasicBlock* bb = bbVec[0];
   
   assert(bbVec.size() == 1 && "Only handling a single basic block here");
   
-  // Use this data structures to note all LLVM memory instructions.
+  // Use this data structure to note all machine operands that compute
+  // ordinary LLVM values.  These must be computed defs (i.e., instructions). 
+  // Note that there may be multiple machine instructions that define
+  // each Value.
+  ValueToDefVecMap valueToDefVecMap;
+  
+  // Use this data structure to note all memory instructions.
   // We use this to add memory dependence edges without a second full walk.
   // 
-  vector<const Instruction*> memVec;
+  // vector<const Instruction*> memVec;
+  vector<SchedGraphNode*> memNodeVec;
   
-  // Use this data structures to note any uses or definitions of
+  // Use this data structure to note any uses or definitions of
   // machine registers so we can add edges for those later without
   // extra passes over the nodes.
   // The vector holds an ordered list of references to the machine reg,
@@ -731,8 +860,8 @@ SchedGraph::buildGraph(const TargetMachine& target)
   RegToRefVecMap regToRefVecMap;
   
   // Make a dummy root node.  We'll add edges to the real roots later.
-  graphRoot = new SchedGraphNode(0, NULL, NULL, target);
-  graphLeaf = new SchedGraphNode(1, NULL, NULL, target);
+  graphRoot = new SchedGraphNode(0, NULL, NULL, -1, target);
+  graphLeaf = new SchedGraphNode(1, NULL, NULL, -1, target);
 
   //----------------------------------------------------------------
   // First add nodes for all the machine instructions in the basic block
@@ -741,18 +870,7 @@ SchedGraph::buildGraph(const TargetMachine& target)
   // Also, remember the load/store instructions to add memory deps later.
   //----------------------------------------------------------------
   
-  for (BasicBlock::const_iterator II = bb->begin(); II != bb->end(); ++II)
-    {
-      const Instruction *instr = *II;
-
-      // Build graph nodes for this VM instruction
-      buildNodesforVMInstr(target, instr);
-      
-      // Remember the load/store instructions to add memory deps later.
-      if (instr->getOpcode() == Instruction::Load ||
-         instr->getOpcode() == Instruction::Store) 
-       memVec.push_back(instr);
-    } 
+  buildNodesforBB(target, bb, memNodeVec, regToRefVecMap, valueToDefVecMap);
   
   //----------------------------------------------------------------
   // Now add edges for the following (all are incoming edges except (4)):
@@ -770,29 +888,30 @@ SchedGraph::buildGraph(const TargetMachine& target)
   // 
   //----------------------------------------------------------------
       
+  MachineCodeForBasicBlock& bbMvec = MachineCodeForBasicBlock::get(bb);
+  
   // First, add edges to the terminator instruction of the basic block.
   this->addCDEdges(bb->getTerminator(), target);
       
-  // Then add memory dep edges: store->load, load->store, and store->store
-  this->addMemEdges(memVec, target);
-      
-  // Then add other edges for all instructions in the block.
-  // Do this in machine code order and find all references to machine regs.
-  MachineCodeForBasicBlock& mvec = bb->getMachineInstrVec();
-  for (unsigned i=0, N=mvec.size(); i < N; i++)
-    addEdgesForInstruction(*mvec[i], regToRefVecMap, target);
-  
-  // Since the code is no longer in SSA form, add output dep. edges
-  // between machine instructions that define the same Value, and anti-dep.
-  // edges from those to other machine instructions for the same VM instr.
+  // Then add memory dep edges: store->load, load->store, and store->store.
+  // Call instructions are treated as both load and store.
+  this->addMemEdges(memNodeVec, target);
+
+  // Then add edges between call instructions and CC set/use instructions
+  this->addCallCCEdges(memNodeVec, bbMvec, target);
+  
+  // Then add incoming def-use (SSA) edges for each machine instruction.
+  for (unsigned i=0, N=bbMvec.size(); i < N; i++)
+    addEdgesForInstruction(*bbMvec[i], valueToDefVecMap, target);
+  
+#ifdef NEED_SEPARATE_NONSSA_EDGES_CODE
+  // Then add non-SSA edges for all VM instructions in the block.
   // We assume that all machine instructions that define a value are
   // generated from the VM instruction corresponding to that value.
-  // 
+  // TODO: This could probably be done much more efficiently.
   for (BasicBlock::const_iterator II = bb->begin(); II != bb->end(); ++II)
-    {
-      const Instruction *instr = *II;
-      this->addNonSSAEdgesForValue(instr, target);
-    }
+    this->addNonSSAEdgesForValue(*II, target);
+#endif //NEED_SEPARATE_NONSSA_EDGES_CODE
   
   // Then add edges for dependences on machine registers
   this->addMachineRegEdges(regToRefVecMap, target);
@@ -807,9 +926,9 @@ SchedGraph::buildGraph(const TargetMachine& target)
 // 
 
 /*ctor*/
-SchedGraphSet::SchedGraphSet(const Method* _method,
+SchedGraphSet::SchedGraphSet(const Function* _function,
                             const TargetMachine& target) :
-  method(_method)
+  method(_function)
 {
   buildGraphsForMethod(method, target);
 }
@@ -819,90 +938,72 @@ SchedGraphSet::SchedGraphSet(const Method* _method,
 SchedGraphSet::~SchedGraphSet()
 {
   // delete all the graphs
-  for (iterator I=begin(); I != end(); ++I)
-    delete (*I).second;
+  for(iterator I = begin(), E = end(); I != E; ++I)
+    delete *I;  // destructor is a friend
 }
 
 
 void
 SchedGraphSet::dump() const
 {
-  cout << "======== Sched graphs for method `"
-       << (method->hasName()? method->getName() : "???")
-       << "' ========" << endl << endl;
+  cerr << "======== Sched graphs for function `" << method->getName()
+       << "' ========\n\n";
   
   for (const_iterator I=begin(); I != end(); ++I)
-    (*I).second->dump();
+    (*I)->dump();
   
-  cout << endl << "====== End graphs for method `"
-       << (method->hasName()? method->getName() : "")
-       << "' ========" << endl << endl;
+  cerr << "\n====== End graphs for function `" << method->getName()
+       << "' ========\n\n";
 }
 
 
 void
-SchedGraphSet::buildGraphsForMethod(const Method *method,
+SchedGraphSet::buildGraphsForMethod(const Function *F,
                                    const TargetMachine& target)
 {
-  for (Method::const_iterator BI = method->begin(); BI != method->end(); ++BI)
-    {
-      SchedGraph* graph = new SchedGraph(*BI, target);
-      this->noteGraphForBlock(*BI, graph);
-    }   
+  for (Function::const_iterator BI = F->begin(); BI != F->end(); ++BI)
+    addGraph(new SchedGraph(BI, target));
 }
 
 
-
-ostream&
-operator<<(ostream& os, const SchedGraphEdge& edge)
+std::ostream &operator<<(std::ostream &os, const SchedGraphEdge& edge)
 {
   os << "edge [" << edge.src->getNodeId() << "] -> ["
      << edge.sink->getNodeId() << "] : ";
   
   switch(edge.depType) {
   case SchedGraphEdge::CtrlDep:                os<< "Control Dep"; break;
-  case SchedGraphEdge::DefUseDep:      os<< "Reg Value " << edge.val; break;
-  case SchedGraphEdge::MemoryDep:      os<< "Mem Value " << edge.val; break;
+  case SchedGraphEdge::ValueDep:        os<< "Reg Value " << edge.val; break;
+  case SchedGraphEdge::MemoryDep:      os<< "Memory Dep"; break;
   case SchedGraphEdge::MachineRegister: os<< "Reg " <<edge.machineRegNum;break;
   case SchedGraphEdge::MachineResource: os<<"Resource "<<edge.resourceId;break;
   default: assert(0); break;
   }
   
-  os << " : delay = " << edge.minDelay << endl;
+  os << " : delay = " << edge.minDelay << "\n";
   
   return os;
 }
 
-ostream&
-operator<<(ostream& os, const SchedGraphNode& node)
+std::ostream &operator<<(std::ostream &os, const SchedGraphNode& node)
 {
-  printIndent(4, os);
-  os << "Node " << node.nodeId << " : "
-     << "latency = " << node.latency << endl;
-  
-  printIndent(6, os);
+  os << std::string(8, ' ')
+     << "Node " << node.nodeId << " : "
+     << "latency = " << node.latency << "\n" << std::string(12, ' ');
   
   if (node.getMachineInstr() == NULL)
-    os << "(Dummy node)" << endl;
+    os << "(Dummy node)\n";
   else
     {
-      os << *node.getMachineInstr() << endl;
-  
-      printIndent(6, os);
-      os << node.inEdges.size() << " Incoming Edges:" << endl;
+      os << *node.getMachineInstr() << "\n" << std::string(12, ' ');
+      os << node.inEdges.size() << " Incoming Edges:\n";
       for (unsigned i=0, N=node.inEdges.size(); i < N; i++)
-       {
-         printIndent(8, os);
-         os << * node.inEdges[i];
-       }
+         os << std::string(16, ' ') << *node.inEdges[i];
   
-      printIndent(6, os);
-      os << node.outEdges.size() << " Outgoing Edges:" << endl;
+      os << std::string(12, ' ') << node.outEdges.size()
+         << " Outgoing Edges:\n";
       for (unsigned i=0, N=node.outEdges.size(); i < N; i++)
-       {
-         printIndent(8, os);
-         os << * node.outEdges[i];
-       }
+        os << std::string(16, ' ') << *node.outEdges[i];
     }
   
   return os;