*** empty log message ***
authorGuochun Shi <gshi1@uiuc.edu>
Thu, 27 Mar 2003 17:57:44 +0000 (17:57 +0000)
committerGuochun Shi <gshi1@uiuc.edu>
Thu, 27 Mar 2003 17:57:44 +0000 (17:57 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@5755 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/ModuloScheduling/Makefile [new file with mode: 0644]
lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp [new file with mode: 0644]
lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h [new file with mode: 0644]
lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp [new file with mode: 0644]
lib/CodeGen/ModuloScheduling/ModuloScheduling.h [new file with mode: 0644]
lib/Target/SparcV9/ModuloScheduling/Makefile [new file with mode: 0644]
lib/Target/SparcV9/ModuloScheduling/ModuloSchedGraph.cpp [new file with mode: 0644]
lib/Target/SparcV9/ModuloScheduling/ModuloSchedGraph.h [new file with mode: 0644]
lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp [new file with mode: 0644]
lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h [new file with mode: 0644]

diff --git a/lib/CodeGen/ModuloScheduling/Makefile b/lib/CodeGen/ModuloScheduling/Makefile
new file mode 100644 (file)
index 0000000..adbc021
--- /dev/null
@@ -0,0 +1,7 @@
+LEVEL = ../../..
+
+DIRS  = 
+
+LIBRARYNAME = modulosched
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp b/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp
new file mode 100644 (file)
index 0000000..4e36b07
--- /dev/null
@@ -0,0 +1,1313 @@
+#include <math.h>
+#include "ModuloSchedGraph.h"
+#include "llvm/CodeGen/InstrSelection.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/iOther.h"
+#include "Support/StringExtras.h"
+#include "Support/STLExtras.h"
+#include <iostream>
+#include <swig.h>
+#include "llvm/iOperators.h"
+#include "llvm/iOther.h"
+#include "llvm/iPHINode.h"
+#include "llvm/iTerminators.h"
+#include "llvm/iMemory.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineCodeForInstruction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/MachineSchedInfo.h"
+
+#define UNIDELAY 1
+#define min(a, b)       ((a) < (b) ? (a) : (b))
+#define max(a, b)       ((a) < (b) ? (b) : (a))
+
+using std::vector;
+using std::pair;
+//using std::hash_map;
+using std::cerr;
+extern std::ostream modSched_os;
+extern ModuloSchedDebugLevel_t ModuloSchedDebugLevel;
+//*********************** Internal Data Structures *************************/
+
+// The following two types need to be classes, not typedefs, so we can use
+// opaque declarations in SchedGraph.h
+// 
+struct RefVec: public vector< pair<ModuloSchedGraphNode*, int> > {
+  typedef vector< pair<ModuloSchedGraphNode*, int> >::      iterator       iterator;
+  typedef vector< pair<ModuloSchedGraphNode*, int> >::const_iterator const_iterator;
+};
+
+struct RegToRefVecMap: public hash_map<int, RefVec> {
+  typedef hash_map<int, RefVec>::      iterator       iterator;
+  typedef hash_map<int, RefVec>::const_iterator const_iterator;
+};
+
+struct ValueToDefVecMap: public hash_map<const Instruction*, RefVec> {
+  typedef hash_map<const Instruction*, RefVec>::      iterator       iterator;
+  typedef hash_map<const Instruction*, RefVec>::const_iterator const_iterator;
+};
+
+
+
+// class Modulo SchedGraphNode
+
+/*ctor*/
+ModuloSchedGraphNode::ModuloSchedGraphNode(unsigned int _nodeId,
+                                    const BasicBlock* _bb,
+                                    const Instruction* _inst,
+                                    int indexInBB,
+                                    const TargetMachine& target)
+  :
+  SchedGraphNodeCommon(_nodeId, _bb, indexInBB),
+  inst(_inst)
+{
+  if(inst)
+    {
+      //FIXME: find the latency 
+      //currently setthe latency to zero
+      latency =0;
+    }
+
+}
+                                    
+//class ModuloScheGraph
+
+
+void
+ModuloSchedGraph::addDummyEdges()
+{
+  assert(graphRoot->outEdges.size() == 0);
+  
+  for (const_iterator I=begin(); I != end(); ++I)
+    {
+      ModuloSchedGraphNode* node = (ModuloSchedGraphNode*)( (*I).second);
+      assert(node != graphRoot && node != graphLeaf);
+      if (node->beginInEdges() == node->endInEdges())
+       (void) new SchedGraphEdge(graphRoot, node, SchedGraphEdge::CtrlDep,
+                                 SchedGraphEdge::NonDataDep, 0);
+      if (node->beginOutEdges() == node->endOutEdges())
+       (void) new SchedGraphEdge(node, graphLeaf, SchedGraphEdge::CtrlDep,
+                                 SchedGraphEdge::NonDataDep, 0);
+    }
+}
+
+bool isDefinition(const Instruction* I)
+{
+  //if(TerminatorInst::classof(I)||FreeInst::classof(I) || StoreInst::classof(I) || CallInst::classof(I))
+  if(!I->hasName())
+    return false;
+  else
+    return true;
+}
+
+void ModuloSchedGraph::addDefUseEdges(const BasicBlock* bb)
+{
+  //collect def instructions, store them in vector
+  const MachineInstrInfo& mii = target.getInstrInfo();
+  
+  typedef std::vector<ModuloSchedGraphNode*> DefVec;
+  DefVec defVec;
+  
+  //find those def instructions
+  for(BasicBlock::const_iterator I=bb->begin(), E=bb->end(); I !=E; I++)
+    if(I->getType() != Type::VoidTy)
+      {
+       defVec.push_back(this->getGraphNodeForInst(I)) ;
+      }
+  
+  for(unsigned int i=0; i < defVec.size();i++)
+    for(Value::use_const_iterator I=defVec[i]->getInst()->use_begin();I !=defVec[i]->getInst()->use_end()  ;I++)
+      {
+       //for each use of a def, add a flow edge from the def instruction to the ref instruction
+
+       
+       const Instruction* value=defVec[i]->getInst();
+       Instruction *inst=(Instruction*)(*I);
+       ModuloSchedGraphNode* node=NULL;
+
+       for(BasicBlock::const_iterator I=bb->begin(), E=bb->end(); I !=E; I++)
+         if((const Instruction*)I == inst)
+           {
+             node=(*this)[inst];
+             break;
+           }
+       
+       assert(inst != NULL &&" Use not an Instruction ?" );
+       
+       if(node == NULL) //inst is not an instruction in this block
+         {}
+       else
+         {
+           //add a flow edge from the def instruction to the ref instruction
+
+           //self loop will not happen in SSA form
+           assert(defVec[i] != node && "same node?");
+           
+
+           //this is a true dependence, so the delay is equal to the delay of the pred node.
+           int delay=0;
+           MachineCodeForInstruction& tempMvec=  MachineCodeForInstruction::get(value);
+           for(unsigned j=0;j< tempMvec.size();j++)
+             {
+               MachineInstr* temp=tempMvec[j];
+               //delay +=mii.minLatency(temp->getOpCode());
+               delay = max(delay, mii.minLatency(temp->getOpCode()));
+             }
+
+           SchedGraphEdge* trueEdge=new SchedGraphEdge(defVec[i],node,value, SchedGraphEdge::TrueDep,delay);
+
+           //if the ref instruction is before the def instrution
+           //then the def instruction must be a phi instruction 
+           //add an anti-dependence edge to from the ref instruction to the def instruction
+           if( node->getOrigIndexInBB() < defVec[i]->getOrigIndexInBB())
+             {
+               assert(PHINode::classof(inst) && "the ref instruction befre def is not PHINode?");
+               trueEdge->setIteDiff(1);
+             }
+           
+         }
+       
+      }
+}
+
+void ModuloSchedGraph::addCDEdges(const BasicBlock* bb)
+{
+  //find the last instruction in the basic block
+  //see if it is an branch instruction. 
+  //If yes, then add an edge from each node expcept the last node to the last node
+
+  const Instruction* inst=&(bb->back());
+  ModuloSchedGraphNode* lastNode=(*this)[inst];
+  if( TerminatorInst::classof(inst))
+    for(BasicBlock::const_iterator I=bb->begin(),E=bb->end(); I != E; I++)
+      {  
+       if(inst != I)
+         {
+           ModuloSchedGraphNode* node = (*this)[I];
+           //use latency of 0
+           (void) new SchedGraphEdge(node,lastNode,SchedGraphEdge::CtrlDep,
+                                     SchedGraphEdge::NonDataDep,0);
+         }
+       
+      }
+  
+}
+
+
+
+
+static const int SG_LOAD_REF  = 0;
+static const int SG_STORE_REF = 1;
+static const int SG_CALL_REF  = 2;
+
+static const unsigned int SG_DepOrderArray[][3] = {
+  { SchedGraphEdge::NonDataDep,
+            SchedGraphEdge::AntiDep,
+                        SchedGraphEdge::AntiDep },
+  { SchedGraphEdge::TrueDep,
+            SchedGraphEdge::OutputDep,
+                        SchedGraphEdge::TrueDep | SchedGraphEdge::OutputDep },
+  { SchedGraphEdge::TrueDep,
+            SchedGraphEdge::AntiDep | SchedGraphEdge::OutputDep,
+                        SchedGraphEdge::TrueDep | SchedGraphEdge::AntiDep
+                                                | SchedGraphEdge::OutputDep }
+};
+
+
+// Add a dependence edge between every pair of machine load/store/call
+// instructions, where at least one is a store or a call.
+// Use latency 1 just to ensure that memory operations are ordered;
+// latency does not otherwise matter (true dependences enforce that).
+// 
+void
+ModuloSchedGraph::addMemEdges(const BasicBlock* bb)
+{
+  
+  vector<ModuloSchedGraphNode*> memNodeVec;
+
+  //construct the memNodeVec
+  
+  for( BasicBlock::const_iterator I=bb->begin(), E=bb->end();I != E; I++)
+    if(LoadInst::classof(I) ||StoreInst::classof(I) || CallInst::classof(I))
+      {
+       ModuloSchedGraphNode* node =(*this)[(const Instruction*)I];
+       memNodeVec.push_back(node);
+      }
+  // Instructions in memNodeVec are in execution order within the basic block,
+  // so simply look at all pairs <memNodeVec[i], memNodeVec[j: j > i]>.
+  // 
+  for (unsigned im=0, NM=memNodeVec.size(); im < NM; im++)
+    {
+      const Instruction* fromInst = memNodeVec[im]->getInst();
+      int fromType = CallInst::classof(fromInst)? SG_CALL_REF
+       : LoadInst::classof(fromInst)? SG_LOAD_REF
+       : SG_STORE_REF;
+      for (unsigned jm=im+1; jm < NM; jm++)
+       {
+         const Instruction* toInst=memNodeVec[jm]->getInst();
+         int toType = CallInst::classof(toInst)? SG_CALL_REF
+           : LoadInst::classof(toInst)? SG_LOAD_REF
+           : SG_STORE_REF;
+          if (fromType != SG_LOAD_REF || toType != SG_LOAD_REF)
+           {
+             (void) new SchedGraphEdge(memNodeVec[im], memNodeVec[jm],
+                                       SchedGraphEdge::MemoryDep,
+                                       SG_DepOrderArray[fromType][toType], 1);
+             
+             SchedGraphEdge* edge=new SchedGraphEdge(memNodeVec[jm],memNodeVec[im],
+                                                     SchedGraphEdge::MemoryDep,
+                                                     SG_DepOrderArray[toType][fromType],1);
+             edge->setIteDiff(1);
+             
+           }
+       }
+    }
+} 
+
+
+
+void ModuloSchedGraph::buildNodesforBB  (const TargetMachine& target,
+                                     const BasicBlock* bb,
+                                     std::vector<ModuloSchedGraphNode*>& memNode,
+                                     RegToRefVecMap& regToRefVecMap,
+                                     ValueToDefVecMap& valueToDefVecMap)
+{
+  //const MachineInstrInfo& mii=target.getInstrInfo();
+  
+  //Build graph nodes for each LLVM instruction and gather def/use info.
+  //Do both together in a single pass over all machine instructions.
+  
+  int i=0;
+  for(BasicBlock::const_iterator I=bb->begin(), E=bb->end(); I!=E; I++)
+    {
+      ModuloSchedGraphNode* node=new ModuloSchedGraphNode(getNumNodes(), bb,I,i, target);
+      i++;
+      this->noteModuloSchedGraphNodeForInst(I,node);
+    }
+
+  //this function finds some info about instruction in basic block for later use
+  //findDefUseInfoAtInstr(target, node, memNode,regToRefVecMap,valueToDefVecMap);
+
+
+}
+
+
+bool ModuloSchedGraph::isLoop(const BasicBlock* bb)
+{
+  //only if the last instruction in the basicblock is branch instruction and 
+  //there is at least an option to branch itself
+
+  
+  const Instruction* inst=&(bb->back());
+  if( BranchInst::classof(inst))
+    for(unsigned i=0;i < ((BranchInst*)inst)->getNumSuccessors();i++)
+      {
+       BasicBlock* sb=((BranchInst*)inst)->getSuccessor(i);
+       if(sb ==bb) return true;
+      }
+  
+  return false;
+  
+}
+bool ModuloSchedGraph::isLoop()
+{
+  //only if the last instruction in the basicblock is branch instruction and 
+  //there is at least an option to branch itself
+  
+  assert(bbVec.size() ==1 &&"only 1 basicblock in a graph");
+  const BasicBlock* bb=bbVec[0];
+  const Instruction* inst=&(bb->back());
+  if( BranchInst::classof(inst))
+    for(unsigned i=0;i < ((BranchInst*)inst)->getNumSuccessors();i++)
+      {
+       BasicBlock* sb=((BranchInst*)inst)->getSuccessor(i);
+       if(sb ==bb) return true;
+      }
+  
+  return false;
+  
+}
+
+void ModuloSchedGraph::computeNodeASAP(const BasicBlock* bb)
+{
+
+ //FIXME: now assume the only backward edges come from the edges from other nodes to the phi Node
+  //so i will ignore all edges to the phi node; after this, there shall be no recurrence. 
+  
+  unsigned numNodes=bb->size();
+  for(unsigned i=2;i < numNodes+2;i++)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->setPropertyComputed(false);
+    }
+
+  for(unsigned i=2;i < numNodes+2; i++)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->ASAP=0;
+      if(i==2 ||node->getNumInEdges() ==0) 
+       { node->setPropertyComputed(true);continue;}
+      for(ModuloSchedGraphNode::const_iterator I=node->beginInEdges(), E=node->endInEdges();I !=E; I++)
+       {
+         SchedGraphEdge* edge=*I;
+         ModuloSchedGraphNode* pred=(ModuloSchedGraphNode*)(edge->getSrc());
+         assert(pred->getPropertyComputed()&&"pred node property is not computed!");
+         int temp=pred->ASAP + edge->getMinDelay() - edge->getIteDiff()*this->MII;
+         node->ASAP= max(node->ASAP,temp);
+       }
+      node->setPropertyComputed(true);
+    }
+}
+
+void ModuloSchedGraph::computeNodeALAP(const BasicBlock* bb)
+{
+  
+  unsigned numNodes=bb->size();
+  int maxASAP=0;
+  for(unsigned i=numNodes+1;i >= 2;i--)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->setPropertyComputed(false);
+      //cerr<< " maxASAP= " <<maxASAP<<" node->ASAP= "<< node->ASAP<<"\n";
+      maxASAP=max(maxASAP,node->ASAP);
+    }
+
+  //cerr<<"maxASAP is "<<maxASAP<<"\n";
+  
+  for(unsigned i=numNodes+1;i >= 2; i--)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->ALAP=maxASAP;
+      for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges(); I!=E; I++)
+       {
+         SchedGraphEdge* edge= *I;
+         ModuloSchedGraphNode* succ=(ModuloSchedGraphNode*) (edge->getSink());
+         if(PHINode::classof(succ->getInst()))continue;
+         assert(succ->getPropertyComputed()&&"succ node property is not computed!");
+         int temp=succ->ALAP - edge->getMinDelay()+edge->getIteDiff()*this->MII;
+         node->ALAP =min(node->ALAP,temp);
+       }
+      node->setPropertyComputed(true);
+      
+    }
+}
+
+void ModuloSchedGraph::computeNodeMov(const BasicBlock* bb)
+{
+  unsigned numNodes=bb->size();
+  for(unsigned i =2;i < numNodes +2 ;i++)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->mov=node->ALAP-node->ASAP;
+      assert(node->mov >=0 &&"move freedom for this node is less than zero? ");
+    }
+}
+
+
+void ModuloSchedGraph::computeNodeDepth(const BasicBlock* bb)
+{
+
+  unsigned numNodes=bb->size();
+  for(unsigned i=2;i < numNodes+2;i++)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->setPropertyComputed(false);
+    }
+
+  for(unsigned i=2;i < numNodes+2; i++)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->depth=0;
+      if(i==2 ||node->getNumInEdges() ==0) 
+       { node->setPropertyComputed(true);continue;}
+      for(ModuloSchedGraphNode::const_iterator I=node->beginInEdges(), E=node->endInEdges();I !=E; I++)
+       {
+         SchedGraphEdge* edge=*I;
+         ModuloSchedGraphNode* pred=(ModuloSchedGraphNode*)(edge->getSrc());
+         assert(pred->getPropertyComputed()&&"pred node property is not computed!");
+         int temp=pred->depth + edge->getMinDelay();
+         node->depth= max(node->depth,temp);
+       }
+      node->setPropertyComputed(true);
+    }
+
+}
+
+
+void ModuloSchedGraph::computeNodeHeight(const BasicBlock* bb)
+{
+  unsigned numNodes=bb->size();
+  for(unsigned i=numNodes+1;i >= 2;i--)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->setPropertyComputed(false);
+    }
+  
+  for(unsigned i=numNodes+1;i >= 2; i--)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->height=0;
+      for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges(); I!=E; I++)
+       {
+         SchedGraphEdge* edge= *I;
+         ModuloSchedGraphNode* succ=(ModuloSchedGraphNode*)(edge->getSink());
+         if(PHINode::classof(succ->getInst())) continue;
+         assert(succ->getPropertyComputed()&&"succ node property is not computed!");
+         node->height=max(node->height, succ->height+edge->getMinDelay());
+         
+       }
+      node->setPropertyComputed(true);
+      
+    }
+
+}
+
+void ModuloSchedGraph::computeNodeProperty(const BasicBlock* bb)
+{
+  //FIXME: now assume the only backward edges come from the edges from other nodes to the phi Node
+  //so i will ignore all edges to the phi node; after this, there shall be no recurrence. 
+  
+  this->computeNodeASAP(bb);
+  this->computeNodeALAP(bb);
+  this->computeNodeMov(bb);
+  this->computeNodeDepth(bb);
+  this->computeNodeHeight(bb); 
+}
+
+
+//do not consider the backedge in these two functions:
+// i.e. don't consider the edge with destination in phi node
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::predSet(std::vector<ModuloSchedGraphNode*> set, unsigned backEdgeSrc, unsigned backEdgeSink)
+{
+  std::vector<ModuloSchedGraphNode*> predS;
+  for(unsigned i=0; i < set.size(); i++)
+    {
+      ModuloSchedGraphNode* node = set[i];
+      for(ModuloSchedGraphNode::const_iterator I=node->beginInEdges(), E=node->endInEdges(); I !=E; I++)
+       {
+         SchedGraphEdge* edge= *I;
+         if(edge->getSrc()->getNodeId() ==backEdgeSrc && edge->getSink()->getNodeId() == backEdgeSink) continue;
+         ModuloSchedGraphNode* pred= (ModuloSchedGraphNode*)(edge->getSrc());
+         //if pred is not in the predSet, push it in vector
+         bool alreadyInset=false;
+         for(unsigned j=0; j < predS.size(); j++)
+           if(predS[j]->getNodeId() == pred->getNodeId() ) 
+             {alreadyInset=true;break;}
+         
+         for(unsigned j=0; j < set.size(); j++)
+           if(set[j]->getNodeId()  == pred->getNodeId() ) 
+             {alreadyInset=true; break;}
+         
+         if(! alreadyInset)
+           predS.push_back(pred);
+       }
+    }
+  return predS;
+}
+
+ModuloSchedGraph::NodeVec ModuloSchedGraph::predSet(NodeVec set)
+{
+  //node number increases from 2
+  return predSet(set,0, 0);
+}
+
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::predSet(ModuloSchedGraphNode* _node, unsigned backEdgeSrc, unsigned backEdgeSink)
+{
+  
+  std::vector<ModuloSchedGraphNode*> set;
+  set.push_back(_node);
+  return predSet(set, backEdgeSrc, backEdgeSink);
+
+}
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::predSet(ModuloSchedGraphNode* _node)
+{
+  return predSet(_node,0,0);
+}
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::succSet(std::vector<ModuloSchedGraphNode*> set,unsigned src, unsigned sink)
+{
+  vector<ModuloSchedGraphNode*> succS;
+  for(unsigned i=0; i < set.size(); i++)
+    {
+      ModuloSchedGraphNode* node = set[i];
+      for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges(); I !=E; I++)
+       {
+         SchedGraphEdge* edge= *I;
+         if(edge->getSrc()->getNodeId() == src && edge->getSink()->getNodeId() == sink) continue;
+         ModuloSchedGraphNode* succ= (ModuloSchedGraphNode*)(edge->getSink());
+         //if pred is not in the predSet, push it in vector
+         bool alreadyInset=false;
+         for(unsigned j=0; j < succS.size(); j++)
+           if(succS[j]->getNodeId() == succ->getNodeId() ) 
+             {alreadyInset=true;break;}
+
+         for(unsigned j=0; j < set.size(); j++)
+           if(set[j]->getNodeId() == succ->getNodeId() ) 
+             {alreadyInset=true;break;}
+         if(! alreadyInset)
+           succS.push_back(succ);
+       }
+    }
+  return succS; 
+}
+
+ModuloSchedGraph::NodeVec ModuloSchedGraph::succSet(NodeVec set)
+{
+  return succSet(set,0,0);
+}
+
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::succSet(ModuloSchedGraphNode* _node,unsigned src, unsigned sink)
+{
+  std::vector<ModuloSchedGraphNode*> set;
+  set.push_back(_node);
+  return succSet(set, src, sink);
+  
+  
+}
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::succSet(ModuloSchedGraphNode* _node)
+{
+  return succSet(_node, 0, 0);
+}
+
+SchedGraphEdge* ModuloSchedGraph::getMaxDelayEdge(unsigned srcId, unsigned sinkId)
+{
+  
+  ModuloSchedGraphNode* node =getNode(srcId);
+  SchedGraphEdge* maxDelayEdge=NULL;
+  int maxDelay=-1;
+  for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges();I !=E; I++)
+    {
+      SchedGraphEdge* edge= *I;
+      if(edge->getSink()->getNodeId() == sinkId)
+       if(edge->getMinDelay() > maxDelay){
+         maxDelayEdge=edge;
+         maxDelay=edge->getMinDelay(); 
+       }
+    }
+  assert(maxDelayEdge != NULL&&"no edge between the srcId and sinkId?");
+  return maxDelayEdge;
+}
+
+void ModuloSchedGraph::dumpCircuits()
+{
+  modSched_os<<"dumping circuits for graph: "<<"\n";
+  int j=-1;
+  while(circuits[++j][0] !=0){
+    int k=-1;
+    while(circuits[j][++k] !=0)
+      modSched_os<< circuits[j][k]<<"\t";
+    modSched_os<<"\n";
+  }
+}
+
+void ModuloSchedGraph::dumpSet(std::vector<ModuloSchedGraphNode*> set)
+{
+  for(unsigned i=0;i < set.size() ; i++)
+    modSched_os<< set[i]->getNodeId()<<"\t";
+  modSched_os<<"\n";
+}
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::vectorUnion(std::vector<ModuloSchedGraphNode*> set1,std::vector<ModuloSchedGraphNode*> set2 )
+{
+  std::vector<ModuloSchedGraphNode*> unionVec;
+  for(unsigned i=0;i<set1.size();i++)
+    unionVec.push_back(set1[i]);
+  for(unsigned j=0;j < set2.size();j++){
+    bool inset=false;
+    for(unsigned i=0;i < unionVec.size();i++)
+      if(set2[j]==unionVec[i]) inset=true;
+    if(!inset)unionVec.push_back(set2[j]);
+  }
+  return unionVec;
+}
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::vectorConj(std::vector<ModuloSchedGraphNode*> set1,std::vector<ModuloSchedGraphNode*> set2 )
+{
+   std::vector<ModuloSchedGraphNode*> conjVec;
+  for(unsigned i=0;i<set1.size();i++)
+    for(unsigned j=0;j < set2.size();j++)
+      if(set1[i]==set2[j]) conjVec.push_back(set1[i]);
+  return conjVec; 
+}
+
+ModuloSchedGraph::NodeVec ModuloSchedGraph::vectorSub(NodeVec set1, NodeVec set2)
+{
+  NodeVec newVec;
+  for(NodeVec::iterator I=set1.begin(); I != set1.end(); I++){
+    bool inset=false;
+    for(NodeVec::iterator II=set2.begin(); II!=set2.end(); II++)
+      if( (*I)->getNodeId() ==(*II)->getNodeId()) 
+       {inset=true;break;}
+    if(!inset) newVec.push_back(*I);
+  }
+  return newVec;
+}
+  
+void ModuloSchedGraph::orderNodes(){
+  oNodes.clear();
+  
+  std::vector<ModuloSchedGraphNode*> set;
+  const BasicBlock* bb=bbVec[0];
+  unsigned numNodes = bb->size();
+  
+  //first order all the sets
+  int j=-1;
+  int totalDelay=-1;
+  int preDelay=-1;
+  while(circuits[++j][0] !=0){
+    int k=-1;
+    preDelay=totalDelay;
+
+    while(circuits[j][++k] !=0){
+      ModuloSchedGraphNode* node =getNode(circuits[j][k]);
+      unsigned nextNodeId;
+      nextNodeId =circuits[j][k+1] !=0? circuits[j][k+1]:circuits[j][0];
+      SchedGraphEdge* edge = getMaxDelayEdge(circuits[j][k], nextNodeId);
+      totalDelay += edge->getMinDelay();
+    }
+    if(preDelay != -1 && totalDelay > preDelay){
+      //swap circuits[j][] and cuicuits[j-1][]
+      unsigned temp[MAXNODE];
+      for(int k=0;k<MAXNODE;k++){
+       temp[k]=circuits[j-1][k];       
+       circuits[j-1][k]=circuits[j][k];
+       circuits[j][k]=temp[k];
+      }
+      //restart
+      j=-1;
+    }
+  }
+
+  //build the first set
+  int backEdgeSrc;
+  int backEdgeSink;
+  if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+    modSched_os<<"building the first set"<<"\n";
+  int setSeq=-1;
+  int k=-1;
+  setSeq++;
+  while(circuits[setSeq][++k]!=0)
+    set.push_back(getNode(circuits[setSeq][k]));
+  if(circuits[setSeq][0]!=0){
+    backEdgeSrc=circuits[setSeq][k-1];
+    backEdgeSink=circuits[setSeq][0];
+  }
+  if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+    modSched_os<<"the first set is:";
+    dumpSet(set);
+  }
+  
+  //implement the ordering algorithm
+  enum OrderSeq{ bottom_up, top_down};
+  OrderSeq order;
+  std::vector<ModuloSchedGraphNode*> R;
+  while(!set.empty())
+    {
+      std::vector<ModuloSchedGraphNode*> pset=predSet(oNodes);
+      std::vector<ModuloSchedGraphNode*> sset=succSet(oNodes);
+      
+      if(!pset.empty() && !vectorConj(pset,set).empty())
+       {R=vectorConj(pset,set);order=bottom_up;}
+      else if( !sset.empty() && !vectorConj(sset,set).empty())
+       {R=vectorConj(sset,set);order=top_down;}
+      else
+       {
+         int maxASAP=-1;
+         int position=-1;
+         for(unsigned i=0;i<set.size();i++){
+           int temp= set[i]->getASAP();
+           if(temp>maxASAP ) {
+             maxASAP=temp;
+             position=i;
+           }
+         }
+         R.push_back(set[position]);
+         order=bottom_up;      
+       }
+      
+      while(!R.empty()){
+       if(order== top_down){
+         if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+           modSched_os<<"in top_down round"<<"\n";
+         while(!R.empty()){
+           int maxHeight=-1;
+           NodeVec::iterator chosenI;
+           for(NodeVec::iterator I=R.begin();I != R.end();I++){
+             int temp=(*I)->height;
+             if( (temp > maxHeight) || ( temp == maxHeight && (*I)->mov <= (*chosenI)->mov ) ){
+               
+               if((temp > maxHeight) || ( temp == maxHeight && (*I)->mov < (*chosenI)->mov )){
+                 maxHeight=temp;
+                 chosenI=I;
+                 continue;
+               }
+               //possible case: instruction A and B has the same height and mov, but A has dependence to B
+               //e.g B is the branch instruction in the end, or A is the phi instruction at the beginning
+               if((*I)->mov == (*chosenI)->mov)
+                 for(ModuloSchedGraphNode::const_iterator oe=(*I)->beginOutEdges(), end=(*I)->endOutEdges();oe !=end; oe++){
+                   if((*oe)->getSink() == (*chosenI)){
+                     maxHeight=temp;
+                     chosenI=I;
+                     continue;
+                   }
+                 }
+             }
+           }
+           ModuloSchedGraphNode* mu= *chosenI;
+           oNodes.push_back(mu);
+           R.erase(chosenI);
+           std::vector<ModuloSchedGraphNode*> succ_mu= succSet(mu,backEdgeSrc,backEdgeSink);
+             std::vector<ModuloSchedGraphNode*> comm=vectorConj(succ_mu,set);
+             comm=vectorSub(comm,oNodes);
+           R = vectorUnion(comm, R);
+         }
+         order=bottom_up;
+         R= vectorConj(predSet(oNodes), set);
+       } else{
+         if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+           modSched_os<<"in bottom up round"<<"\n";
+         while(!R.empty()){
+           int maxDepth=-1;
+           NodeVec::iterator chosenI;
+           for(NodeVec::iterator I=R.begin();I != R.end();I++){
+             int temp=(*I)->depth;
+             if( (temp > maxDepth) || ( temp == maxDepth && (*I)->mov < (*chosenI)->mov )){
+               maxDepth=temp;
+               chosenI=I;
+             }
+           }
+           ModuloSchedGraphNode* mu=*chosenI;
+           oNodes.push_back(mu);
+           R.erase(chosenI);
+           std::vector<ModuloSchedGraphNode*> pred_mu= predSet(mu,backEdgeSrc,backEdgeSink);
+           std::vector<ModuloSchedGraphNode*> comm=vectorConj(pred_mu,set);
+           comm=vectorSub(comm,oNodes);
+           R = vectorUnion(comm, R);
+         }
+         order=top_down;
+         R= vectorConj(succSet(oNodes), set);
+       }
+      }
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+       modSched_os<<"order finished"<<"\n";
+       modSched_os<<"dumping the ordered nodes: "<<"\n";
+       dumpSet(oNodes);
+       dumpCircuits();
+      }
+      //create a new set
+      //FIXME: the nodes between onodes and this circuit should also be include in this set
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os<<"building the next set"<<"\n";
+      set.clear();
+      int k=-1;
+      setSeq++;
+      while(circuits[setSeq][++k]!=0)
+       set.push_back(getNode(circuits[setSeq][k]));
+      if(circuits[setSeq][0]!=0){
+       backEdgeSrc=circuits[setSeq][k-1];
+       backEdgeSink=circuits[setSeq][0];
+      }
+      if(set.empty()){
+       //no circuits any more
+       //collect all other nodes
+       if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+         modSched_os<<"no circuits any more, collect the rest nodes"<<"\n";
+       for(unsigned i=2;i<numNodes+2;i++){
+         bool inset=false;
+         for(unsigned j=0;j< oNodes.size();j++)
+           if(oNodes[j]->getNodeId() == i) 
+             {inset=true;break;}
+         if(!inset)set.push_back(getNode(i));
+       }
+      }
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+       modSched_os<<"next set is: "<<"\n";
+       dumpSet(set);
+      }
+    }//while(!set.empty())
+  
+}
+
+
+
+
+
+
+void ModuloSchedGraph::buildGraph (const TargetMachine& target)
+{
+  const BasicBlock* bb=bbVec[0]; 
+
+  assert(bbVec.size() ==1 &&"only handling a single basic block here");
+
+  // Use this data structure to note all machine operands that compute
+  // ordinary LLVM values.  These must be computed defs (i.e., instructions). 
+  // Note that there may be multiple machine instructions that define
+  // each Value.
+  ValueToDefVecMap valueToDefVecMap;
+
+  // Use this data structure to note all memory instructions.
+  // We use this to add memory dependence edges without a second full walk.
+  // 
+  // vector<const Instruction*> memVec;
+  vector<ModuloSchedGraphNode*> memNodeVec;
+
+   // Use this data structure to note any uses or definitions of
+  // machine registers so we can add edges for those later without
+  // extra passes over the nodes.
+  // The vector holds an ordered list of references to the machine reg,
+  // ordered according to control-flow order.  This only works for a
+  // single basic block, hence the assertion.  Each reference is identified
+  // by the pair: <node, operand-number>.
+  // 
+  RegToRefVecMap regToRefVecMap;
+  
+
+
+ // Make a dummy root node.  We'll add edges to the real roots later.
+  graphRoot = new ModuloSchedGraphNode(0, NULL, NULL, -1,target);
+  graphLeaf = new ModuloSchedGraphNode(1, NULL, NULL, -1,target);
+ //----------------------------------------------------------------
+  // First add nodes for all the LLVM instructions in the basic block
+  // because this greatly simplifies identifying which edges to add.
+  // Do this one VM instruction at a time since the ModuloSchedGraphNode needs that.
+  // Also, remember the load/store instructions to add memory deps later.
+  //----------------------------------------------------------------
+
+  //FIXME:if there is call instruction, then we shall quit somewhere
+  //      currently we leave call instruction and continue construct graph
+
+  //dump only the blocks which are from loops
+
+
+  if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+    this->dump(bb);
+
+  if(!isLoop(bb)){
+    modSched_os <<" dumping non-loop BB:"<<endl;
+    dump(bb);
+  }
+  if( isLoop(bb))
+    {
+      buildNodesforBB(target, bb, memNodeVec, regToRefVecMap, valueToDefVecMap);
+      
+      this->addDefUseEdges(bb);
+      
+      this->addCDEdges(bb);
+      
+      this->addMemEdges(bb);
+      
+      //this->dump();
+      
+      int ResII=this->computeResII(bb);
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os << "ResII is " << ResII<<"\n";;
+      int RecII=this->computeRecII(bb);      
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os << "RecII is " <<RecII<<"\n";
+      
+      this->MII=max(ResII, RecII);
+      
+      this->computeNodeProperty(bb);
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       this->dumpNodeProperty();
+      
+      this->orderNodes();
+      
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       this->dump();
+      //this->instrScheduling();
+      
+      //this->dumpScheduling();
+      
+      
+    }
+}
+
+ModuloSchedGraphNode* ModuloSchedGraph::getNode  (const unsigned nodeId) const
+{
+  for(const_iterator I=begin(), E=end();I !=E;I++)
+    if((*I).second->getNodeId() ==nodeId)
+      return (ModuloSchedGraphNode*)(*I).second;
+  return NULL;
+}
+
+int ModuloSchedGraph::computeRecII(const BasicBlock* bb)
+{
+
+  int  RecII=0;
+
+  //os<<"begining computerRecII()"<<"\n";
+
+
+  //FIXME: only deal with circuits starting at the first node: the phi node nodeId=2;
+
+  //search all elementary circuits in the dependance graph
+  //assume maximum number of nodes is MAXNODE
+  
+  unsigned path[MAXNODE];
+  unsigned stack[MAXNODE][MAXNODE];
+
+
+  for(int j=0;j<MAXNODE;j++)
+    {path[j]=0;
+    for(int k=0;k<MAXNODE;k++)
+      stack[j][k]=0;
+    }
+  //in our graph, the node number starts at 2
+
+  //number of nodes, because each instruction will result in one node
+  const unsigned numNodes= bb->size();
+  
+  int i=0;
+  path[i]=2;
+
+  ModuloSchedGraphNode* initNode=getNode(path[0]);
+  unsigned initNodeId=initNode->getNodeId();
+  ModuloSchedGraphNode* currentNode=initNode;
+  
+  while(currentNode != NULL)
+    {
+      unsigned currentNodeId=currentNode->getNodeId();
+      //      modSched_os<<"current node is "<<currentNodeId<<"\n";
+      
+      ModuloSchedGraphNode* nextNode=NULL;
+      for(ModuloSchedGraphNode::const_iterator I=currentNode->beginOutEdges(), E=currentNode->endOutEdges(); I !=E; I++)
+       {
+         //modSched_os <<" searching in outgoint edges of node "<<currentNodeId<<"\n";
+         unsigned nodeId=((SchedGraphEdge*) *I)->getSink()->getNodeId();
+         bool inpath=false,instack=false;
+         int k;
+
+         //modSched_os<<"nodeId is "<<nodeId<<"\n";
+
+         k=-1;
+         while(path[++k] !=0)
+           if(nodeId == path[k])
+             {inpath=true;break;}
+         
+
+
+         k=-1;
+         while(stack[i][++k] !=0 )
+           if(nodeId == stack[i][k])
+             {instack =true; break;}
+
+
+         if( nodeId > currentNodeId && !inpath && !instack)
+           {nextNode=(ModuloSchedGraphNode*) ((SchedGraphEdge*)*I)->getSink();break;}
+         
+       }
+      if(nextNode != NULL)
+       {
+         //modSched_os<<"find the next Node "<<nextNode->getNodeId()<<"\n";
+         
+         
+         int j=0;
+         while( stack[i][j] !=0) j++;
+         stack[i][j]=nextNode->getNodeId();
+
+
+         i++;
+         path[i]=nextNode->getNodeId();
+         currentNode=nextNode;
+       }
+      else
+       {
+         //modSched_os<<"no expansion any more"<<"\n";
+         //confirmCircuit();
+         for(ModuloSchedGraphNode::const_iterator I=currentNode->beginOutEdges(), E=currentNode->endOutEdges() ; I != E; I++)
+           {
+             unsigned nodeId=((SchedGraphEdge*)*I)->getSink()->getNodeId();
+             if(nodeId == initNodeId)
+               {
+
+                 int j=-1;
+                 while(circuits[++j][0] !=0);
+                 for(int k=0;k<MAXNODE;k++)circuits[j][k]=path[k];
+
+               }
+           }
+         //remove this node in the path and clear the corresponding entries in the stack
+         path[i]=0;
+         int j=0;
+         for(j=0;j<MAXNODE;j++)stack[i][j]=0;
+         i--;
+         currentNode=getNode(path[i]);
+       }
+      if(i==0) 
+       {
+
+         if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+           modSched_os<<"circuits found are: "<<"\n";
+         int j=-1;
+         while(circuits[++j][0] !=0){
+           int k=-1;
+           while(circuits[j][++k] !=0)
+             if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+               modSched_os<< circuits[j][k]<<"\t";
+           if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+             modSched_os<<"\n";
+         
+           
+           //for this circuit, compute the sum of all edge delay
+           int sumDelay=0;
+           k=-1;
+           while(circuits[j][++k] !=0)
+             {
+               //ModuloSchedGraphNode* node =getNode(circuits[j][k]);
+               unsigned nextNodeId;
+               nextNodeId =circuits[j][k+1] !=0? circuits[j][k+1]:circuits[j][0];
+
+               /*
+               for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges();I !=E; I++)
+               {
+                 SchedGraphEdge* edge= *I;
+                   if(edge->getSink()->getNodeId() == nextNodeId)
+                   {sumDelay  += edge->getMinDelay();break;}
+                     }
+               */
+
+               sumDelay += getMaxDelayEdge(circuits[j][k],nextNodeId)->getMinDelay();
+
+             }
+           //       assume we have distance 1, in this case the sumDelay is RecII
+           //       this is correct for SSA form only
+           //      
+           if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+             modSched_os<<"The total Delay in the circuit is " <<sumDelay<<"\n";
+           
+           RecII= RecII > sumDelay? RecII: sumDelay;
+           
+         }
+         return RecII;
+       }
+      
+    }
+  
+  return -1;
+}
+
+void  ModuloSchedGraph::addResourceUsage(std::vector<pair<int,int> >& ruVec, int rid){
+  //modSched_os<<"\nadding a resouce , current resouceUsage vector size is "<<ruVec.size()<<"\n";
+  bool alreadyExists=false;
+  for(unsigned i=0;i <ruVec.size() ; i++){
+    if(rid == ruVec[i].first) {
+      ruVec[i].second++;
+      alreadyExists=true;
+      break;
+    }
+  }
+  if( !alreadyExists) ruVec.push_back(std::make_pair(rid, 1));
+  //modSched_os<<"current resouceUsage vector size is "<<ruVec.size()<<"\n";
+
+}
+void ModuloSchedGraph::dumpResourceUsage(std::vector< pair<int,int> > &ru)
+{
+  MachineSchedInfo& msi = (MachineSchedInfo&)target.getSchedInfo();
+  
+  std::vector<pair<int,int> > resourceNumVector = msi.resourceNumVector;
+  modSched_os <<"resourceID\t"<<"resourceNum"<<"\n";
+  for(unsigned i=0;i<resourceNumVector.size();i++)
+    modSched_os <<resourceNumVector[i].first<<"\t"<<resourceNumVector[i].second<<"\n";
+  
+  modSched_os <<" maxNumIssueTotal(issue slot in one cycle) = "<<msi.maxNumIssueTotal<<"\n";
+  modSched_os<<"resourceID\t resourceUsage\t ResourceNum"<<"\n";
+  for(unsigned i=0;i<ru.size();i++){
+    modSched_os<<ru[i].first<<"\t"<<ru[i].second;
+    const unsigned  resNum=msi.getCPUResourceNum(ru[i].first);
+    modSched_os<<"\t"<<resNum<<"\n";
+  }
+}
+
+int ModuloSchedGraph::computeResII(const BasicBlock* bb)
+{
+  
+  const MachineInstrInfo& mii = target.getInstrInfo();
+  const MachineSchedInfo& msi = target.getSchedInfo();
+  
+  int ResII;
+  std::vector<pair<int,int> > resourceUsage; //pair<int resourceid, int resourceUsageTimes_in_the_whole_block>
+  
+  //FIXME: number of functional units the target machine can provide should be get from Target
+  //       here I temporary hardcode it
+  
+  for(BasicBlock::const_iterator I=bb->begin(),E=bb->end(); I !=E; I++)
+    {
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+       modSched_os<<"machine instruction for llvm instruction( node "<<getGraphNodeForInst(I)->getNodeId()<<")" <<"\n";
+       modSched_os<<"\t"<<*I;
+      }
+      MachineCodeForInstruction& tempMvec=  MachineCodeForInstruction::get(I);
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os <<"size =" <<tempMvec.size()<<"\n";
+      for(unsigned i=0;i< tempMvec.size();i++)
+       {
+         MachineInstr* minstr=tempMvec[i];
+         
+         unsigned minDelay=mii.minLatency(minstr->getOpCode());
+         InstrRUsage rUsage=msi.getInstrRUsage(minstr->getOpCode());
+         InstrClassRUsage classRUsage=msi.getClassRUsage(mii.getSchedClass(minstr->getOpCode()));
+         unsigned totCycles= classRUsage.totCycles;
+         
+         std::vector<std::vector<resourceId_t> > resources
+           =rUsage.resourcesByCycle;
+         assert(totCycles == resources.size());
+         if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+           modSched_os <<"resources Usage for this Instr(totCycles=" << totCycles << ",mindLatency="<<mii.minLatency(minstr->getOpCode())<<"): "<< *minstr <<"\n";
+         for(unsigned j=0;j< resources.size();j++){
+           if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+             modSched_os<<"cycle "<<j<<": ";
+           for(unsigned k=0;k< resources[j].size(); k++){
+             if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+               modSched_os<<"\t"<<resources[j][k];
+             addResourceUsage(resourceUsage,resources[j][k]);
+           }
+           if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+             modSched_os<<"\n";
+         }
+       }  
+    }
+  if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)  
+    this->dumpResourceUsage(resourceUsage);
+
+  //compute ResII
+  ResII=0;
+  int issueSlots=msi.maxNumIssueTotal;
+  for(unsigned i=0;i<resourceUsage.size();i++){
+    int resourceNum=msi.getCPUResourceNum(resourceUsage[i].first);
+    int useNum=resourceUsage[i].second;
+    double tempII;
+    if(resourceNum <= issueSlots)
+      tempII=ceil(1.0*useNum/resourceNum);
+    else
+      tempII=ceil(1.0*useNum/issueSlots);
+    ResII=max((int)tempII,ResII);
+  }
+  return ResII;
+}
+
+ModuloSchedGraphSet::ModuloSchedGraphSet(const Function* function, const TargetMachine& target)
+  :method(function)
+{
+  buildGraphsForMethod(method,target);
+  
+}
+
+
+ModuloSchedGraphSet::~ModuloSchedGraphSet()
+{
+  //delete all the graphs
+  for(iterator I=begin(), E=end();I !=E; ++I)
+    delete *I;
+}
+
+void ModuloSchedGraphSet::dump() const 
+{
+  modSched_os << " ====== ModuloSched graphs for function `" <<method->getName()
+       << "' =========\n\n";
+  for(const_iterator I=begin(); I != end(); ++I)
+    (*I)->dump();
+    
+  modSched_os << "\n=========End graphs for funtion` "<<method->getName()
+       << "' ==========\n\n";
+}
+
+void ModuloSchedGraph::dump(const BasicBlock* bb)
+{
+  modSched_os<<"dumping basic block:";
+  modSched_os<<(bb->hasName()?bb->getName(): "block")
+      <<" (" << bb << ")"<<"\n";
+
+}
+
+void ModuloSchedGraph::dump(const BasicBlock* bb, std::ostream& os)
+{
+  os<<"dumping basic block:";
+  os<<(bb->hasName()?bb->getName(): "block")
+    <<" (" << bb << ")"<<"\n";
+}
+
+void 
+ModuloSchedGraph::dump() const
+{
+  modSched_os << " ModuloSchedGraph for basic Blocks:";
+  for(unsigned i=0, N =bbVec.size(); i< N; i++)
+    {
+      modSched_os<<(bbVec[i]->hasName()?bbVec[i]->getName(): "block")
+         <<" (" << bbVec[i] << ")"
+         << ((i==N-1)?"":", ");
+    }
+  
+  modSched_os <<"\n\n    Actual Root nodes : ";
+  for (unsigned i=0, N=graphRoot->outEdges.size(); i < N; i++)
+    modSched_os << graphRoot->outEdges[i]->getSink()->getNodeId()
+        << ((i == N-1)? "" : ", ");
+
+  modSched_os << "\n    Graph Nodes:\n";
+  //for (const_iterator I=begin(); I != end(); ++I)
+  //modSched_os << "\n" << *I->second;
+  unsigned numNodes=bbVec[0]->size();
+  for(unsigned i=2;i< numNodes+2;i++)
+    {
+      ModuloSchedGraphNode* node= getNode(i);
+      modSched_os << "\n" << *node;
+    }
+
+  modSched_os << "\n";
+}
+void 
+ModuloSchedGraph::dumpNodeProperty() const
+{
+  const BasicBlock* bb=bbVec[0];
+  unsigned numNodes=bb->size();
+  for(unsigned i=2;i < numNodes+2;i++)
+    { 
+      ModuloSchedGraphNode* node=getNode(i);
+      modSched_os<<"NodeId "<<node->getNodeId()<<"\t";
+      modSched_os<<"ASAP "<<node->getASAP()<<"\t";
+      modSched_os<<"ALAP "<<node->getALAP()<<"\t";
+      modSched_os<<"mov " <<node->getMov() <<"\t";
+      modSched_os<<"depth "<<node->getDepth()<<"\t";
+      modSched_os<<"height "<<node->getHeight()<<"\t";
+      modSched_os<<"\n";
+    }
+}
+
+void ModuloSchedGraphSet::buildGraphsForMethod (const Function *F, const TargetMachine& target)
+{
+  for(Function::const_iterator BI = F->begin(); BI != F->end(); ++BI)
+    addGraph(new ModuloSchedGraph(BI,target));
+}
+
+std::ostream &operator<<(std::ostream &os, const ModuloSchedGraphNode& node)
+{
+  os << std::string(8, ' ')
+     << "Node " << node.nodeId << " : "
+     << "latency = " << node.latency << "\n" << std::string(12, ' ');
+  
+
+
+  if (node.getInst() == NULL)
+    os << "(Dummy node)\n";
+  else
+    {
+      os << *node.getInst() << "\n" << std::string(12, ' ');
+      os << node.inEdges.size() << " Incoming Edges:\n";
+      for (unsigned i=0, N=node.inEdges.size(); i < N; i++)
+       os << std::string(16, ' ') << *node.inEdges[i];
+  
+      os << std::string(12, ' ') << node.outEdges.size()
+         << " Outgoing Edges:\n";
+      for (unsigned i=0, N=node.outEdges.size(); i < N; i++)
+        os << std::string(16, ' ') << *node.outEdges[i];
+    }
+  
+
+  return os;
+}
diff --git a/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h b/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h
new file mode 100644 (file)
index 0000000..07fde3c
--- /dev/null
@@ -0,0 +1,347 @@
+//===- ModuloSchedGraph.h - Represent a collection of data structures ----*- C++ -*-===//
+//
+// This header defines the primative classes that make up a data structure
+// graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MODULO_SCHED_GRAPH_H
+#define LLVM_CODEGEN_MODULO_SCHED_GRAPH_H
+#include "Support/HashExtras.h"
+#include "Support/GraphTraits.h"
+#include "../InstrSched/SchedGraphCommon.h"
+#include "llvm/Instruction.h"
+#include "llvm/Target/TargetMachine.h"
+#include <iostream>
+using std::pair;
+
+//for debug information selecton
+enum ModuloSchedDebugLevel_t{
+  ModuloSched_NoDebugInfo,
+  ModuloSched_Disable,
+  ModuloSched_PrintSchedule,
+  ModuloSched_PrintScheduleProcess,
+};
+
+//===============------------------------------------------------------------------------
+///ModuloSchedGraphNode - Implement a data structure based on the SchedGraphNodeCommon
+///this class stores informtion needed later to order the nodes in modulo scheduling
+///
+class ModuloSchedGraphNode: public SchedGraphNodeCommon {
+private:
+  //the corresponding instruction 
+  const Instruction* inst;
+
+  //whether this node's property(ASAP,ALAP, ...) has been computed
+  bool propertyComputed;
+
+  //ASAP: the earliest time the node could be scheduled
+  //ALAP: the latest time the node couldbe scheduled
+  //depth: the depth of the node
+  //height: the height of the node
+  //mov: the mobility function, computed as ALAP - ASAP
+  //scheTime: the scheduled time if this node has been scheduled 
+  //earlyStart: the earliest time to be tried to schedule the node
+  //lateStart: the latest time to be tried to schedule the node
+  int ASAP, ALAP, depth, height, mov;
+  int schTime;
+  int earlyStart,lateStart;
+
+public:
+  
+  //get the instruction
+  const Instruction* getInst() const { return inst;}
+
+  //get the instruction op-code name
+  const char* getInstOpcodeName() const{ return inst->getOpcodeName();}
+  
+  //get the instruction op-code 
+  const unsigned getInstOpcode() const { return inst->getOpcode();}
+
+  //return whether the node is NULL
+  bool isNullNode() const{ return(inst== NULL);}
+
+  //return whether the property of the node has been computed
+  bool getPropertyComputed() {return propertyComputed;}
+
+  //set the propertyComputed
+  void setPropertyComputed(bool _propertyComputed) {propertyComputed = _propertyComputed;}
+
+  //get the corresponding property
+  int getASAP(){ return ASAP;}
+  int getALAP(){ return ALAP;}
+  int getMov() { return mov;}
+  int getDepth(){return depth;}
+  int getHeight(){return height;}
+  int getSchTime(){return schTime;}
+  int getEarlyStart(){return earlyStart;}
+  int getLateStart() { return lateStart;}
+  void setEarlyStart(int _earlyStart) {earlyStart= _earlyStart;}
+  void setLateStart(int _lateStart) {lateStart= _lateStart;}
+  void setSchTime(int _time){schTime=_time;}
+
+ private:
+  friend class ModuloSchedGraph;
+  friend class SchedGraphNode;
+
+  //constructor:
+  //nodeId: the node id, unique within the each BasicBlock
+  //_bb: which BasicBlock the corresponding instruction belongs to 
+  //_inst: the corresponding instruction
+  //indexInBB: the corresponding instruction's index in the BasicBlock
+  //target: the targetMachine
+  ModuloSchedGraphNode(unsigned int _nodeId,
+                   const BasicBlock* _bb,
+                   const  Instruction* _inst,
+                   int indexInBB,
+                   const TargetMachine& target);
+  
+  
+  friend std::ostream& operator<<(std::ostream& os,const ModuloSchedGraphNode& edge);
+  
+};
+
+
+
+
+
+//FIXME: these two value should not be used
+#define MAXNODE 100
+#define MAXCC   100
+
+
+
+//===----------------------------------------------------------------------===//
+/// ModuloSchedGraph- the data structure to store dependence between nodes
+/// it catches data dependence and constrol dependence
+/// 
+/// 
+
+class ModuloSchedGraph:
+  public SchedGraphCommon,
+  protected hash_map<const Instruction*, ModuloSchedGraphNode*>
+{
+  
+private: 
+  //iteration Interval
+  int MII;
+  
+  //target machine
+  const TargetMachine& target;
+
+  //the circuits in the dependence graph
+  unsigned circuits[MAXCC][MAXNODE];
+
+  //the order nodes
+  std::vector<ModuloSchedGraphNode*> oNodes;
+
+  typedef std::vector<ModuloSchedGraphNode*> NodeVec;
+
+  //the function to compute properties
+  void computeNodeASAP(const BasicBlock* bb);
+  void computeNodeALAP(const BasicBlock* bb); 
+  void computeNodeMov(const BasicBlock* bb);
+  void computeNodeDepth(const BasicBlock* bb);
+  void computeNodeHeight(const BasicBlock* bb);
+
+  //the function to compute node property
+  void computeNodeProperty(const BasicBlock* bb);
+
+  //the function to sort nodes
+  void orderNodes();
+  //add the resource usage 
+  void addResourceUsage(std::vector<pair<int,int> >&, int);
+
+  //debug functions:
+  //dump circuits
+  void dumpCircuits();
+  //dump the input set of nodes
+  void dumpSet(std::vector<ModuloSchedGraphNode*> set);
+  //dump the input resource usage table  
+  void dumpResourceUsage(std::vector<pair<int,int> >&);
+  
+ public:
+  //help functions
+  
+  //get the maxium the delay between two nodes
+  SchedGraphEdge* getMaxDelayEdge(unsigned srcId, unsigned sinkId);  
+
+  //FIXME:
+  //get the predessor Set of the set
+  NodeVec predSet(NodeVec set, unsigned, unsigned);
+  NodeVec predSet(NodeVec set);
+
+  //get the predessor set of the node
+  NodeVec predSet(ModuloSchedGraphNode* node, unsigned,unsigned);
+  NodeVec predSet(ModuloSchedGraphNode* node);
+
+  //get the successor set of the set
+  NodeVec succSet(NodeVec set, unsigned, unsigned);
+  NodeVec succSet(NodeVec set);
+  
+  //get the succssor set of the node
+  NodeVec succSet(ModuloSchedGraphNode* node,unsigned, unsigned);
+  NodeVec succSet(ModuloSchedGraphNode* node);
+
+  //return the uniton of the two vectors
+  NodeVec vectorUnion(NodeVec set1,NodeVec set2 );
+  
+  //return the consjuction of the two vectors
+  NodeVec vectorConj(NodeVec set1,NodeVec set2 );
+  
+  //return all nodes in  set1 but not  set2
+  NodeVec vectorSub(NodeVec set1, NodeVec set2);
+
+  typedef hash_map<const Instruction*, ModuloSchedGraphNode*> map_base;
+public:
+  using map_base::iterator;
+  using map_base::const_iterator;
+  
+public:
+
+  //get target machine
+  const TargetMachine& getTarget(){return target;}
+
+  //get the iteration interval
+  const int getMII(){return MII;} 
+
+  //get the ordered nodes
+  const NodeVec& getONodes(){return oNodes;}
+
+  //get the number of nodes (including the root and leaf)
+  //note: actually root and leaf is not used
+  const unsigned int getNumNodes() const {return size()+2;}
+
+  //return wether the BasicBlock 'bb' contains a loop
+  bool isLoop                         (const BasicBlock* bb);
+
+  //return this basibBlock contains a loop
+  bool isLoop                         ();
+  
+
+  //return the node for the input instruction
+  ModuloSchedGraphNode* getGraphNodeForInst(const Instruction* inst) const{
+    const_iterator onePair = this->find(inst);
+    return (onePair != this->end())?(*onePair).second: NULL;
+  }
+  
+  //Debugging support
+  //dump the graph
+  void dump() const;
+  //dump the basicBlock
+  void dump(const BasicBlock* bb);
+  //dump the basicBlock into 'os' stream
+  void dump(const BasicBlock* bb, std::ostream& os);
+  //dump the node property
+  void dumpNodeProperty() const ;
+
+ private:
+  friend class ModuloSchedGraphSet; //give access to ctor
+  
+ public:
+  /*ctr*/
+  ModuloSchedGraph(const BasicBlock* bb, const TargetMachine& _target)
+    :SchedGraphCommon(bb), target(_target){
+    buildGraph(target);
+  }
+
+  /*dtr*/
+  ~ModuloSchedGraph(){
+    for(const_iterator I=begin(); I!=end(); ++I)
+      delete I->second;
+  }
+  
+  //unorder iterators
+  //return values are pair<const Instruction*, ModuloSchedGraphNode*>
+  using map_base::begin;
+  using map_base::end;
+
+
+  
+  inline void noteModuloSchedGraphNodeForInst(const Instruction* inst,
+                                  ModuloSchedGraphNode* node)
+    {
+      assert((*this)[inst] ==NULL);
+      (*this)[inst]=node;
+    }
+  
+  //Graph builder
+  
+  ModuloSchedGraphNode* getNode          (const unsigned nodeId) const;      
+
+  //build the graph from the basicBlock
+  void buildGraph                     (const TargetMachine& target);
+
+  //Build nodes for BasicBlock
+  void buildNodesforBB                (const TargetMachine& target,
+                                      const BasicBlock* bb,
+                                      NodeVec& memNode,
+                                      RegToRefVecMap& regToRefVecMap,
+                                      ValueToDefVecMap& valueToDefVecMap);
+
+  //find definitiona and use information for all nodes
+  void findDefUseInfoAtInstr           (const TargetMachine& target,
+                                       ModuloSchedGraphNode* node,
+                                      NodeVec& memNode,
+                                      RegToRefVecMap& regToRefVecMap,
+                                      ValueToDefVecMap& valueToDefVecMap);
+
+  //add def-use edge
+  void addDefUseEdges                 (const BasicBlock* bb);
+
+  //add control dependence edges
+  void addCDEdges                     (const BasicBlock* bb);
+
+  //add memory dependence dges
+  void addMemEdges                    (const BasicBlock* bb);
+
+  //add dummy edges
+  void addDummyEdges();
+
+  //computer source restrictoin II
+  int  computeResII                   (const BasicBlock* bb);
+
+  //computer recurrence II
+  int  computeRecII                   (const BasicBlock* bb);
+};
+
+///==================================-
+//gragh set
+
+class ModuloSchedGraphSet:
+  public std::vector<ModuloSchedGraph*>
+{
+private:
+  const Function* method;
+  
+public:
+  typedef std::vector<ModuloSchedGraph*> baseVector;
+  using baseVector::iterator;
+  using baseVector::const_iterator;
+  
+public:
+  /*ctor*/ ModuloSchedGraphSet          (const Function* function, const TargetMachine& target);
+  
+  /*dtor*/ ~ModuloSchedGraphSet          ();
+  
+  //iterators
+  using baseVector::begin;
+  using baseVector::end;
+
+
+  //Debugging support
+  void  dump() const;
+  
+private:
+  inline void addGraph(ModuloSchedGraph* graph){
+    assert(graph !=NULL);
+    this->push_back(graph);
+  }
+  
+  //Graph builder
+  void buildGraphsForMethod (const Function *F, const TargetMachine& target);
+};
+
+#endif 
diff --git a/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp b/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp
new file mode 100644 (file)
index 0000000..3f4002a
--- /dev/null
@@ -0,0 +1,899 @@
+
+//===- SPLInstrScheduling.cpp - Modulo Software Pipelining Instruction Scheduling support -------===//
+//
+// this file implements the llvm/CodeGen/ModuloScheduling.h interface
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineCodeForInstruction.h"
+#include "llvm/CodeGen/MachineCodeForBasicBlock.h"
+#include "llvm/CodeGen/MachineCodeForMethod.h"
+#include "llvm/Analysis/LiveVar/FunctionLiveVarInfo.h" // FIXME: Remove when modularized better
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instruction.h"
+#include "Support/CommandLine.h"
+#include <algorithm>
+#include "ModuloSchedGraph.h"
+#include "ModuloScheduling.h"
+#include "llvm/Target/MachineSchedInfo.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/iTerminators.h"
+#include "llvm/iPHINode.h"
+#include "llvm/Constants.h"
+#include <iostream>
+#include <swig.h>
+#include <fstream>
+#include "llvm/CodeGen/InstrSelection.h"
+
+#define max(x,y) (x>y?x:y)
+#define min(x,y) (x<y?x:y)
+using std::cerr;
+using std::cout;
+using std::ostream;
+using std::ios;
+using std::filebuf;
+
+//************************************************************
+//printing Debug information
+//ModuloSchedDebugLevel stores the value of debug level
+// modsched_os is the ostream to dump debug information, which is written into the file 'moduloSchedDebugInfo.output'
+//see ModuloSchedulingPass::runOnFunction()
+//************************************************************
+
+ModuloSchedDebugLevel_t ModuloSchedDebugLevel;
+static cl::opt<ModuloSchedDebugLevel_t, true>
+SDL_opt("modsched", cl::Hidden, cl::location(ModuloSchedDebugLevel),
+        cl::desc("enable modulo scheduling debugging information"),
+        cl::values(
+ clEnumValN(ModuloSched_NoDebugInfo,      "n", "disable debug output"),
+ clEnumValN(ModuloSched_Disable,        "off", "disable modulo scheduling"),
+ clEnumValN(ModuloSched_PrintSchedule,  "psched", "print original and new schedule"),
+ clEnumValN(ModuloSched_PrintScheduleProcess,"pschedproc", "print how the new schdule is produced"),
+                   0));
+
+filebuf modSched_fb;
+ostream modSched_os(&modSched_fb);
+
+//************************************************************
+
+
+///the method to compute schedule and instert epilogue and prologue
+void ModuloScheduling::instrScheduling(){
+  
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+    modSched_os<<"*************************computing modulo schedule ************************\n";
+  
+  
+  const MachineSchedInfo& msi=target.getSchedInfo();
+
+  //number of issue slots in the in each cycle
+  int numIssueSlots=msi.maxNumIssueTotal;
+
+
+
+  //compute the schedule
+  bool success=false;
+  while(!success)
+    {
+      //clear memory from the last round and initialize if necessary
+      clearInitMem(msi);
+
+      //compute schedule and coreSchedule with the current II
+      success=computeSchedule();
+      
+      if(!success){
+       II++;
+       if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+         modSched_os<<"increase II  to "<<II<<"\n";
+      }
+    }
+  
+  //print the final schedule if necessary
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintSchedule)
+    dumpScheduling();
+  
+
+  //the schedule has been computed
+  //create epilogue, prologue and kernel BasicBlock
+  //find the successor for this BasicBlock
+  BasicBlock* succ_bb= getSuccBB(bb); 
+  
+  //print the original BasicBlock if necessary
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintSchedule){
+    modSched_os<<"dumping the orginal block\n";  
+    graph.dump(bb);
+  }
+
+  //construction of prologue, kernel and epilogue
+  BasicBlock* kernel=bb->splitBasicBlock(bb->begin());
+  BasicBlock* prologue= bb;
+  BasicBlock* epilogue=kernel->splitBasicBlock(kernel->begin());
+  
+  
+  //construct prologue
+  constructPrologue(prologue);
+
+  //construct kernel
+  constructKernel(prologue,kernel,epilogue);
+
+  //construct epilogue
+  constructEpilogue(epilogue,succ_bb);
+
+  //print the BasicBlocks if necessary
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintSchedule){
+    modSched_os<<"dumping the prologue block:\n";
+    graph.dump(prologue);
+    modSched_os<<"dumping the kernel block\n";
+    graph.dump(kernel);
+    modSched_os<<"dumping the epilogue block\n";
+    graph.dump(epilogue);
+  }
+  
+}    
+
+//clear memory from the last round and initialize if necessary
+void ModuloScheduling::clearInitMem(const MachineSchedInfo& msi){
+  
+
+  unsigned numIssueSlots = msi.maxNumIssueTotal;
+  //clear nodeScheduled from the last round
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+    modSched_os<< "***** new round  with II= "<<II<<" *******************"<<endl;
+    modSched_os<< " **************clear the vector nodeScheduled**************** \n";
+  }
+  nodeScheduled.clear();
+  
+  
+  //clear resourceTable from the last round and reset it 
+  resourceTable.clear();
+  for(unsigned i=0;i< II;i++)
+    resourceTable.push_back(msi.resourceNumVector);
+  
+  
+  //clear the schdule and coreSchedule from the last round 
+  schedule.clear();
+  coreSchedule.clear();
+  
+  //create a coreSchedule of size II*numIssueSlots
+  //each entry is NULL
+  while( coreSchedule.size() <  II){
+    std::vector<ModuloSchedGraphNode*>* newCycle=new  std::vector<ModuloSchedGraphNode*>();
+    for(unsigned k=0;k<numIssueSlots;k++)
+      newCycle->push_back(NULL);
+    coreSchedule.push_back(*newCycle);
+  }  
+}
+
+
+//compute schedule and coreSchedule with the current II
+bool ModuloScheduling::computeSchedule(){
+  
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+    modSched_os <<"start to compute schedule \n";
+  
+  //loop over the ordered nodes
+  for(NodeVec::const_iterator I=oNodes.begin();I!=oNodes.end();I++)
+    {
+      //try to schedule for node I
+      if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       dumpScheduling();
+      ModuloSchedGraphNode* node=*I;
+
+      //compute whether this node has successor(s)
+      bool succ=true;
+
+      //compute whether this node has predessor(s)
+      bool pred=true;
+
+      NodeVec schSucc=graph.vectorConj(nodeScheduled,graph.succSet(node));
+      if(schSucc.empty())
+       succ=false;
+      NodeVec schPred=graph.vectorConj(nodeScheduled,graph.predSet(node));   
+      if(schPred.empty())
+       pred=false;
+      
+      //startTime: the earliest time we will try to schedule this node
+      //endTime: the latest time we will try to schedule this node
+      int startTime, endTime;
+
+      //node's earlyStart: possible earliest time to schedule this node
+      //node's lateStart: possible latest time to schedule this node
+      node->setEarlyStart(-1);
+      node->setLateStart(9999);
+      
+
+      //this node has predessor but no successor
+      if(!succ && pred){
+
+       //this node's earlyStart is it's predessor's schedule time + the edge delay 
+       // - the iteration difference* II    
+       for(unsigned i=0;i<schPred.size();i++){
+         ModuloSchedGraphNode* predNode=schPred[i];
+         SchedGraphEdge* edge=graph.getMaxDelayEdge(predNode->getNodeId(),node->getNodeId());
+         int temp=predNode->getSchTime()+edge->getMinDelay() - edge->getIteDiff()*II;
+         node->setEarlyStart( max(node->getEarlyStart(),temp));
+       }
+       startTime=node->getEarlyStart();
+       endTime=node->getEarlyStart()+II-1;
+      }
+      
+
+      //this node has successor but no predessor
+      if(succ && !pred){
+       for(unsigned i=0;i<schSucc.size();i++){
+         ModuloSchedGraphNode* succNode=schSucc[i];
+         SchedGraphEdge* edge=graph.getMaxDelayEdge(succNode->getNodeId(),node->getNodeId());
+         int temp=succNode->getSchTime() - edge->getMinDelay() + edge->getIteDiff()*II;
+         node->setLateStart(min(node->getEarlyStart(),temp));
+       }
+       startTime=node->getLateStart()- II+1;
+       endTime=node->getLateStart();
+      }
+
+      //this node has both successors and predessors
+      if(succ && pred)
+       {
+         for(unsigned i=0;i<schPred.size();i++){
+           ModuloSchedGraphNode* predNode=schPred[i];
+           SchedGraphEdge* edge=graph.getMaxDelayEdge(predNode->getNodeId(),node->getNodeId());
+           int temp=predNode->getSchTime()+edge->getMinDelay() - edge->getIteDiff()*II;
+           node->setEarlyStart(max(node->getEarlyStart(),temp));
+         }
+         for(unsigned i=0;i<schSucc.size();i++){
+           ModuloSchedGraphNode* succNode=schSucc[i];
+           SchedGraphEdge* edge=graph.getMaxDelayEdge(succNode->getNodeId(),node->getNodeId());
+           int temp=succNode->getSchTime() - edge->getMinDelay() + edge->getIteDiff()*II;
+           node->setLateStart(min(node->getEarlyStart(),temp));
+         }
+         startTime=node->getEarlyStart();
+         endTime=min(node->getLateStart(),node->getEarlyStart()+((int)II)-1);
+       }
+      
+      //this node has no successor or predessor
+      if(!succ && !pred){
+       node->setEarlyStart(node->getASAP());
+       startTime=node->getEarlyStart();
+       endTime=node->getEarlyStart()+II -1;
+      }
+
+      //try to schedule this node based on the startTime and endTime
+      if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os<<"scheduling the node "<<(*I)->getNodeId()<<"\n";      
+
+      bool success= this->ScheduleNode(node,startTime, endTime,nodeScheduled);
+      if(!success)return false;
+    }
+  return true;
+}
+
+
+//get the successor of the BasicBlock
+BasicBlock* ModuloScheduling::getSuccBB(BasicBlock* bb){
+
+  BasicBlock* succ_bb;
+  for(unsigned i=0;i < II; i++)
+    for(unsigned j=0;j< coreSchedule[i].size();j++)
+      if(coreSchedule[i][j]){
+       const Instruction* ist=coreSchedule[i][j]->getInst();
+       
+       //we can get successor from the BranchInst instruction
+       //assume we only have one successor (besides itself) here
+       if(BranchInst::classof(ist)){
+         BranchInst* bi=(BranchInst*)ist;
+         assert(bi->isConditional()&&"the branchInst is not a conditional one");
+         assert(bi->getNumSuccessors() ==2&&" more than two successors?");
+         BasicBlock* bb1=bi->getSuccessor(0);
+         BasicBlock* bb2=bi->getSuccessor(1);
+         assert( (bb1 ==  bb|| bb2 == bb) && " None of its successor is itself?");
+         if(bb1 == bb) succ_bb=bb2;
+         else succ_bb=bb1;
+         return succ_bb;
+       }
+      }
+  assert( 0 && "NO Successor?");
+  return NULL;
+}
+
+
+//get the predecessor of the BasicBlock
+BasicBlock* ModuloScheduling::getPredBB(BasicBlock* bb){
+
+  BasicBlock* pred_bb;
+
+  for(unsigned i=0;i < II; i++)
+    for(unsigned j=0;j< coreSchedule[i].size();j++)
+      if(coreSchedule[i][j]){
+       const Instruction* ist=coreSchedule[i][j]->getInst();
+       
+       //we can get predecessor from the PHINode instruction
+       //assume we only have one predecessor (besides itself) here
+       if(PHINode::classof(ist)){
+         PHINode* phi=(PHINode*) ist;
+         assert(phi->getNumIncomingValues() == 2 &&" the number of incoming value is not equal to two? ");
+         BasicBlock* bb1= phi->getIncomingBlock(0);
+         BasicBlock* bb2= phi->getIncomingBlock(1);
+         assert( (bb1 ==  bb || bb2 == bb) && " None of its predecessor is itself?");
+         if(bb1 == bb) pred_bb=bb2;
+         else pred_bb=bb1;       
+         return pred_bb;
+       }
+      }
+  assert(0 && " no predecessor?");
+  return NULL;
+}
+
+
+//construct the prologue
+void ModuloScheduling::constructPrologue(BasicBlock* prologue){
+  
+  InstListType& prologue_ist = prologue->getInstList();
+  vvNodeType& tempSchedule_prologue= *(new vector< std::vector<ModuloSchedGraphNode*> >(schedule));
+  
+  //compute the schedule for prologue
+  unsigned round=0;
+  unsigned scheduleSize=schedule.size();
+  while(round < scheduleSize/II){
+    round++;
+    for(unsigned i=0;i < scheduleSize ;i++){
+      if(round*II + i >= scheduleSize) break;
+      for(unsigned j=0;j < schedule[i].size(); j++) 
+       if(schedule[i][j]){
+         assert( tempSchedule_prologue[round*II +i ][j] == NULL && "table not consitant with core table");
+         
+         //move  the schedule one iteration ahead and overlap with the original one
+         tempSchedule_prologue[round*II + i][j]=schedule[i][j];
+       }
+    }
+  }
+
+  //clear the clone memory in the core schedule instructions
+  clearCloneMemory();
+  
+  //fill in the prologue
+  for(unsigned i=0;i < ceil(1.0*scheduleSize/II -1)*II ;i++)
+    for(unsigned j=0;j < tempSchedule_prologue[i].size();j++)
+      if(tempSchedule_prologue[i][j]){
+
+       //get the instruction
+       Instruction* orn=(Instruction*)tempSchedule_prologue[i][j]->getInst();
+
+       //made a clone of it
+       Instruction* cln=cloneInstSetMemory(orn);
+
+       //insert the instruction
+       prologue_ist.insert(prologue_ist.back(),cln );
+
+       //if there is PHINode in the prologue, the incoming value from itself should be removed
+       //because it is not a loop any longer
+       if( PHINode::classof(cln)){
+         PHINode* phi=(PHINode*)cln;
+         phi->removeIncomingValue(phi->getParent());
+       }
+      }
+}
+
+
+//construct the kernel BasicBlock
+void ModuloScheduling::constructKernel(BasicBlock* prologue,BasicBlock* kernel,BasicBlock* epilogue){
+
+  //*************fill instructions in the kernel****************
+  InstListType& kernel_ist   = kernel->getInstList();
+  BranchInst* brchInst;
+  PHINode* phiInst, *phiCln;
+
+  for(unsigned i=0;i<coreSchedule.size();i++)
+    for(unsigned j=0;j<coreSchedule[i].size();j++)
+      if(coreSchedule[i][j]){
+       
+       //we should take care of branch instruction differently with normal instructions
+       if(BranchInst::classof(coreSchedule[i][j]->getInst())){
+         brchInst=(BranchInst*)coreSchedule[i][j]->getInst();
+         continue;
+       }
+       
+       //we should take care of PHINode instruction differently with normal instructions
+       if( PHINode::classof(coreSchedule[i][j]->getInst())){
+         phiInst= (PHINode*)coreSchedule[i][j]->getInst();
+         Instruction* cln=cloneInstSetMemory(phiInst);
+         kernel_ist.insert(kernel_ist.back(),cln);
+         phiCln=(PHINode*)cln;
+         continue;
+       }
+       
+       //for normal instructions: made a clone and insert it in the kernel_ist
+       Instruction* cln=cloneInstSetMemory( (Instruction*)coreSchedule[i][j]->getInst());
+       kernel_ist.insert(kernel_ist.back(),cln);
+      }
+
+  //the two incoming BasicBlock for PHINode is the prologue and the kernel (itself)
+  phiCln->setIncomingBlock(0,prologue);
+  phiCln->setIncomingBlock(1,kernel);
+
+  //the incoming value for the kernel (itself) is the new value which is computed in the kernel
+  Instruction* originalVal=(Instruction*)phiInst->getIncomingValue(1);
+  phiCln->setIncomingValue(1, originalVal->getClone());
+  
+
+  //make a clone of the branch instruction and insert it in the end
+  BranchInst* cln=(BranchInst*)cloneInstSetMemory( brchInst);
+  kernel_ist.insert(kernel_ist.back(),cln);
+
+  //delete the unconditional branch instruction, which is generated when splitting the basicBlock
+  kernel_ist.erase( --kernel_ist.end());
+
+  //set the first successor to itself
+  ((BranchInst*)cln)->setSuccessor(0, kernel);
+  //set the second successor to eiplogue
+  ((BranchInst*)cln)->setSuccessor(1,epilogue);
+
+  //*****change the condition*******
+
+  //get the condition instruction
+  Instruction* cond=(Instruction*)cln->getCondition();
+
+  //get the condition's second operand, it should be a constant
+  Value* operand=cond->getOperand(1);
+  assert(ConstantSInt::classof(operand));
+
+  //change the constant in the condtion instruction
+  ConstantSInt* iteTimes=ConstantSInt::get(operand->getType(),((ConstantSInt*)operand)->getValue()-II+1);
+  cond->setOperand(1,iteTimes);
+
+}
+
+
+
+
+
+//construct the epilogue 
+void ModuloScheduling::constructEpilogue(BasicBlock* epilogue, BasicBlock* succ_bb){
+  
+  //compute the schedule for epilogue
+  vvNodeType& tempSchedule_epilogue= *(new vector< std::vector<ModuloSchedGraphNode*> >(schedule));
+  unsigned scheduleSize=schedule.size();
+  int round =0;
+  while(round < ceil(1.0*scheduleSize/II )-1 ){
+    round++;
+    for( unsigned i=0;i < scheduleSize ; i++){
+      if(i + round *II >= scheduleSize) break;
+      for(unsigned j=0;j < schedule[i].size();j++)
+       if(schedule[i + round*II ][j]){
+         assert( tempSchedule_epilogue[i][j] == NULL && "table not consitant with core table");
+
+         //move the schdule one iteration behind and overlap
+         tempSchedule_epilogue[i][j]=schedule[i + round*II][j];
+       }
+    }
+  }
+  
+  //fill in the epilogue
+  InstListType& epilogue_ist = epilogue->getInstList();
+  for(unsigned i=II;i <scheduleSize ;i++)
+    for(unsigned j=0;j < tempSchedule_epilogue[i].size();j++)
+      if(tempSchedule_epilogue[i][j]){
+       Instruction* inst=(Instruction*)tempSchedule_epilogue[i][j]->getInst();
+
+       //BranchInst and PHINode should be treated differently
+       //BranchInst:unecessary, simly omitted
+       //PHINode: omitted
+       if( !BranchInst::classof(inst) && ! PHINode::classof(inst) ){
+         //make a clone instruction and insert it into the epilogue
+         Instruction* cln=cloneInstSetMemory(inst);    
+         epilogue_ist.push_front(cln);
+       }
+      }
+
+
+  //*************delete the original instructions****************//
+  //to delete the original instructions, we have to make sure their use is zero
+  
+  //update original core instruction's uses, using its clone instread
+  for(unsigned i=0;i < II; i++)
+    for(unsigned j=0;j < coreSchedule[i].size() ;j++){
+      if(coreSchedule[i][j])
+       updateUseWithClone((Instruction*)coreSchedule[i][j]->getInst() );
+    }
+  
+  //erase these instructions
+  for(unsigned i=0;i < II; i++)
+    for(unsigned j=0;j < coreSchedule[i].size();j++)
+      if(coreSchedule[i][j]){
+       Instruction* ist=(Instruction*)coreSchedule[i][j]->getInst();
+       ist->getParent()->getInstList().erase(ist);
+      }
+  //**************************************************************//
+
+
+  //finally, insert an unconditional branch instruction at the end
+  epilogue_ist.push_back(new BranchInst(succ_bb));
+  
+}
+
+
+//----------------------------------------------------------------------------------------------
+//this function replace the value(instruction) ist in other instructions with its latest clone
+//i.e. after this function is called, the ist is not used anywhere and it can be erased.
+//----------------------------------------------------------------------------------------------
+void ModuloScheduling::updateUseWithClone(Instruction* ist){
+  
+  while(ist->use_size() >0){
+    bool destroyed=false;
+    
+    //other instruction is using this value ist
+    assert(Instruction::classof(*ist->use_begin()));
+    Instruction *inst=(Instruction*)(* ist->use_begin());
+
+    for(unsigned i=0;i<inst->getNumOperands();i++)
+      if(inst->getOperand(i) == ist && ist->getClone()){
+
+       //if the instruction is TmpInstruction, simly delete it because it has no parent
+       // and it does not belongs to any BasicBlock
+       if(TmpInstruction::classof(inst)) {
+         delete inst;
+         destroyed=true;
+         break;
+       }
+
+
+       //otherwise, set the instruction's operand to the value's clone
+       inst->setOperand(i, ist->getClone());
+
+       //the use from the original value ist is destroyed
+       destroyed=true;
+       break;
+      }
+    if( !destroyed)
+      {
+       //if the use can not be destroyed , something is wrong
+       inst->dump();
+       assert( 0  &&"this use can not be destroyed"); 
+      }
+  }
+  
+}
+
+
+//********************************************************
+//this function clear all clone mememoy
+//i.e. set all instruction's clone memory to NULL
+//*****************************************************
+void ModuloScheduling::clearCloneMemory(){
+for(unsigned i=0; i < coreSchedule.size();i++)
+  for(unsigned j=0;j<coreSchedule[i].size();j++)
+    if(coreSchedule[i][j]) ((Instruction*)coreSchedule[i][j]->getInst())->clearClone();
+}
+
+
+//********************************************************************************
+//this function make a clone of the instruction orn
+//the cloned instruction will use the orn's operands' latest clone as its operands
+//it is done this way because LLVM is in SSA form and we should use the correct value
+//
+//this fuction also update the instruction orn's latest clone memory
+//**********************************************************************************
+Instruction*  ModuloScheduling::cloneInstSetMemory(Instruction* orn) {
+
+//make a clone instruction
+  Instruction* cln=orn->clone();
+  
+
+  //update the operands
+  for(unsigned k=0;k<orn->getNumOperands();k++){
+    const Value* op=orn->getOperand(k);
+    if(Instruction::classof(op) && ((Instruction*)op)->getClone()){      
+      Instruction* op_inst=(Instruction*)op;
+      cln->setOperand(k, op_inst->getClone());
+    }
+  }
+
+  //update clone memory
+  orn->setClone(cln);
+  return cln;
+}
+
+
+
+bool ModuloScheduling::ScheduleNode(ModuloSchedGraphNode* node,unsigned start, unsigned end, NodeVec& nodeScheduled)
+{
+  
+  const MachineSchedInfo& msi=target.getSchedInfo();
+  unsigned int numIssueSlots=msi.maxNumIssueTotal;
+
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+    modSched_os<<"startTime= "<<start<<" endTime= "<<end<<"\n";
+  bool isScheduled=false;
+  for(unsigned i=start;i<= end;i++){
+    if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+      modSched_os<< " now try cycle " <<i<<":"<<"\n"; 
+    for(unsigned j=0;j<numIssueSlots;j++){
+      unsigned int core_i = i%II;
+      unsigned int core_j=j;
+      if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os <<"\t Trying slot "<<j<<"...........";
+      //check the resouce table, make sure there is no resource conflicts
+      const Instruction* instr=node->getInst();
+      MachineCodeForInstruction& tempMvec=  MachineCodeForInstruction::get(instr);
+      bool resourceConflict=false;
+      const MachineInstrInfo &mii=msi.getInstrInfo();
+      
+      if(coreSchedule.size() < core_i+1 || !coreSchedule[core_i][core_j]){
+       //this->dumpResourceUsageTable();
+       int latency=0;
+       for(unsigned k=0;k< tempMvec.size();k++)
+         {
+           MachineInstr* minstr=tempMvec[k];
+           InstrRUsage rUsage=msi.getInstrRUsage(minstr->getOpCode());
+           std::vector<std::vector<resourceId_t> > resources
+             =rUsage.resourcesByCycle;
+           updateResourceTable(resources,i + latency);
+           latency +=max(mii.minLatency(minstr->getOpCode()),1) ;
+         }
+       
+       //this->dumpResourceUsageTable();
+       
+       latency=0;
+       if( resourceTableNegative()){
+         
+         //undo-update the resource table
+         for(unsigned k=0;k< tempMvec.size();k++){
+           MachineInstr* minstr=tempMvec[k];
+           InstrRUsage rUsage=msi.getInstrRUsage(minstr->getOpCode());
+           std::vector<std::vector<resourceId_t> > resources
+             =rUsage.resourcesByCycle;
+           undoUpdateResourceTable(resources,i + latency);
+           latency +=max(mii.minLatency(minstr->getOpCode()),1) ;
+         }
+         resourceConflict=true;
+       }
+      }
+      if( !resourceConflict &&  !coreSchedule[core_i][core_j]){
+       if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+         modSched_os <<" OK!"<<"\n";
+         modSched_os<<"Node "<<node->getNodeId()<< " is scheduleed."<<"\n";
+       }
+       //schedule[i][j]=node;
+       while(schedule.size() <= i){
+         std::vector<ModuloSchedGraphNode*>* newCycle=new  std::vector<ModuloSchedGraphNode*>();
+         for(unsigned  k=0;k<numIssueSlots;k++)
+           newCycle->push_back(NULL);
+         schedule.push_back(*newCycle);
+       }
+       vector<ModuloSchedGraphNode*>::iterator startIterator;
+       startIterator = schedule[i].begin();
+       schedule[i].insert(startIterator+j,node);
+       startIterator = schedule[i].begin();
+       schedule[i].erase(startIterator+j+1);
+
+       //update coreSchedule
+       //coreSchedule[core_i][core_j]=node;
+       while(coreSchedule.size() <= core_i){
+         std::vector<ModuloSchedGraphNode*>* newCycle=new  std::vector<ModuloSchedGraphNode*>();
+         for(unsigned k=0;k<numIssueSlots;k++)
+           newCycle->push_back(NULL);
+         coreSchedule.push_back(*newCycle);
+       }
+
+       startIterator = coreSchedule[core_i].begin();
+       coreSchedule[core_i].insert(startIterator+core_j,node);
+       startIterator = coreSchedule[core_i].begin();
+       coreSchedule[core_i].erase(startIterator+core_j+1);
+
+       node->setSchTime(i);
+       isScheduled=true;
+       nodeScheduled.push_back(node);
+       
+       break;
+      }
+      else if( coreSchedule[core_i][core_j]) {
+       if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+         modSched_os <<" Slot not available "<<"\n";
+      }
+      else{
+       if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+         modSched_os <<" Resource conflicts"<<"\n";
+      }
+    }
+    if(isScheduled) break;
+  }
+  //assert(nodeScheduled &&"this node can not be scheduled?");
+  return isScheduled;
+}
+
+void ModuloScheduling::updateResourceTable(std::vector<std::vector<unsigned int> > useResources, int startCycle){
+  for(unsigned i=0;i< useResources.size();i++){
+    int absCycle=startCycle+i;
+    int coreCycle=absCycle % II;
+    std::vector<pair<int,int> >& resourceRemained=resourceTable[coreCycle];
+    std::vector<unsigned int>& resourceUsed= useResources[i];
+    for(unsigned j=0;j< resourceUsed.size();j++){
+      for(unsigned k=0;k< resourceRemained.size();k++)
+       if((int)resourceUsed[j] == resourceRemained[k].first){
+         resourceRemained[k].second--;
+       }
+    }
+  }
+}
+
+void ModuloScheduling::undoUpdateResourceTable(std::vector<std::vector<unsigned int> > useResources, int startCycle){
+  for(unsigned i=0;i< useResources.size();i++){
+    int absCycle=startCycle+i;
+    int coreCycle=absCycle % II;
+    std::vector<pair<int,int> >& resourceRemained=resourceTable[coreCycle];
+    std::vector<unsigned int>& resourceUsed= useResources[i];
+    for(unsigned j=0;j< resourceUsed.size();j++){
+      for(unsigned k=0;k< resourceRemained.size();k++)
+       if((int)resourceUsed[j] == resourceRemained[k].first){
+         resourceRemained[k].second++;
+       }
+    }
+  }
+}
+
+
+//-----------------------------------------------------------------------
+//Function: resouceTableNegative
+//return value:
+//          return false if any element in the resouceTable is negative
+//          otherwise return true
+//Purpose:
+//          this function is used to determine if an instruction is eligible for schedule at certain cycle
+//---------------------------------------------------------------------------------------
+
+bool ModuloScheduling::resourceTableNegative(){
+  assert(resourceTable.size() == (unsigned)II&& "resouceTable size must be equal to II");
+  bool isNegative=false;
+  for(unsigned i=0; i < resourceTable.size();i++)
+    for(unsigned j=0;j < resourceTable[i].size();j++){
+      if(resourceTable[i][j].second <0) {
+       isNegative=true;
+       break;
+      } 
+    }
+  return isNegative;
+}
+
+
+//----------------------------------------------------------------------
+//Function: dumpResouceUsageTable
+//Purpose:
+//          print out ResouceTable for debug
+//
+//------------------------------------------------------------------------
+
+void ModuloScheduling::dumpResourceUsageTable(){
+  modSched_os<<"dumping resource usage table"<<"\n";
+  for(unsigned i=0;i< resourceTable.size();i++){
+    for(unsigned j=0;j < resourceTable[i].size();j++)
+      modSched_os <<resourceTable[i][j].first<<":"<< resourceTable[i][j].second<<" ";
+    modSched_os <<"\n";
+  }
+  
+}
+
+//----------------------------------------------------------------------
+//Function: dumpSchedule
+//Purpose:
+//       print out thisSchedule for debug
+//
+//-----------------------------------------------------------------------
+void ModuloScheduling::dumpSchedule(std::vector< std::vector<ModuloSchedGraphNode*> > thisSchedule){
+  
+  const MachineSchedInfo& msi=target.getSchedInfo();
+  unsigned numIssueSlots=msi.maxNumIssueTotal;
+  for(unsigned i=0;i< numIssueSlots;i++)
+      modSched_os <<"\t#";
+  modSched_os<<"\n";
+  for(unsigned i=0;i < thisSchedule.size();i++)
+    {
+      modSched_os<<"cycle"<<i<<": ";
+      for(unsigned j=0;j<thisSchedule[i].size();j++)
+       if(thisSchedule[i][j]!= NULL)
+         modSched_os<<thisSchedule[i][j]->getNodeId()<<"\t";
+       else
+         modSched_os<<"\t";
+      modSched_os<<"\n";
+    }
+
+}
+
+
+//----------------------------------------------------
+//Function: dumpScheduling
+//Purpose:
+//   print out the schedule and coreSchedule for debug      
+//
+//-------------------------------------------------------
+
+void ModuloScheduling::dumpScheduling(){
+  modSched_os<<"dump schedule:"<<"\n";
+  const MachineSchedInfo& msi=target.getSchedInfo();
+  unsigned numIssueSlots=msi.maxNumIssueTotal;
+  for(unsigned i=0;i< numIssueSlots;i++)
+    modSched_os <<"\t#";
+  modSched_os<<"\n";
+  for(unsigned i=0;i < schedule.size();i++)
+    {
+      modSched_os<<"cycle"<<i<<": ";
+      for(unsigned j=0;j<schedule[i].size();j++)
+       if(schedule[i][j]!= NULL)
+         modSched_os<<schedule[i][j]->getNodeId()<<"\t";
+       else
+         modSched_os<<"\t";
+      modSched_os<<"\n";
+    }
+  
+  modSched_os<<"dump coreSchedule:"<<"\n";
+  for(unsigned i=0;i< numIssueSlots;i++)
+    modSched_os <<"\t#";
+  modSched_os<<"\n";
+  for(unsigned i=0;i < coreSchedule.size();i++){
+    modSched_os<<"cycle"<<i<<": ";
+    for(unsigned j=0;j< coreSchedule[i].size();j++)
+      if(coreSchedule[i][j] !=NULL)
+       modSched_os<<coreSchedule[i][j]->getNodeId()<<"\t";
+      else
+       modSched_os<<"\t";
+    modSched_os<<"\n";
+  }
+}
+
+
+
+//---------------------------------------------------------------------------
+// Function: ModuloSchedulingPass
+// 
+// Purpose:
+//   Entry point for Modulo Scheduling
+//   Schedules LLVM instruction
+//   
+//---------------------------------------------------------------------------
+
+namespace {
+  class ModuloSchedulingPass : public FunctionPass {
+    const TargetMachine &target;
+  public:
+    ModuloSchedulingPass(const TargetMachine &T) : target(T) {}
+    const char *getPassName() const { return "Modulo Scheduling"; }
+    
+    // getAnalysisUsage - We use LiveVarInfo...
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      //AU.addRequired(FunctionLiveVarInfo::ID);
+    }
+    bool runOnFunction(Function &F);
+  };
+} // end anonymous namespace
+
+
+
+bool ModuloSchedulingPass::runOnFunction(Function &F)
+{
+  //if necessary , open the output for debug purpose
+  if(ModuloSchedDebugLevel== ModuloSched_Disable)
+    return false;
+  
+  if(ModuloSchedDebugLevel>= ModuloSched_PrintSchedule){
+    modSched_fb.open("moduloSchedDebugInfo.output", ios::out);
+    modSched_os<<"******************Modula Scheduling debug information*************************"<<endl;
+  }
+  
+  ModuloSchedGraphSet* graphSet = new ModuloSchedGraphSet(&F,target);
+  ModuloSchedulingSet ModuloSchedulingSet(*graphSet);
+  
+  if(ModuloSchedDebugLevel>= ModuloSched_PrintSchedule)
+    modSched_fb.close();
+  
+  return false;
+}
+
+
+Pass *createModuloSchedulingPass(const TargetMachine &tgt) {
+  return new ModuloSchedulingPass(tgt);
+}
+
diff --git a/lib/CodeGen/ModuloScheduling/ModuloScheduling.h b/lib/CodeGen/ModuloScheduling/ModuloScheduling.h
new file mode 100644 (file)
index 0000000..9f9fcaa
--- /dev/null
@@ -0,0 +1,160 @@
+//// - head file for the classes ModuloScheduling and ModuloScheduling ----*- C++ -*-===//
+//
+// This header defines the the classes ModuloScheduling  and ModuloSchedulingSet 's structure
+// 
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_MODULOSCHEDULING_H
+#define LLVM_CODEGEN_MODULOSCHEDULING_H
+
+#include "ModuloSchedGraph.h"
+#include <iostream>
+
+class ModuloScheduling:NonCopyable {
+ private:
+  typedef std::vector<ModuloSchedGraphNode*> NodeVec;
+  
+  /// the graph to feed in
+  ModuloSchedGraph& graph;
+  const TargetMachine& target;
+  
+  //the BasicBlock to be scheduled
+  BasicBlock* bb;
+
+  ///Iteration Intervel
+  ///FIXME: II may be a better name for its meaning
+  unsigned II;
+
+  //the vector containing the nodes which have been scheduled
+  NodeVec nodeScheduled;
+  
+  ///the remaining unscheduled nodes 
+  const NodeVec& oNodes;
+  
+  ///the machine resource table
+  std::vector< std::vector<pair<int,int> > >  resourceTable ;
+  
+  ///the schedule( with many schedule stage)
+  std::vector<std::vector<ModuloSchedGraphNode*> > schedule;
+  
+  ///the kernel(core) schedule(length = II)
+  std::vector<std::vector<ModuloSchedGraphNode*> > coreSchedule;
+
+ typedef   BasicBlock::InstListType InstListType;
+ typedef   std::vector <std::vector<ModuloSchedGraphNode*> > vvNodeType;
+
+
+public:
+  
+  ///constructor
+  ModuloScheduling(ModuloSchedGraph& _graph): 
+    graph(_graph), 
+    target(graph.getTarget()),
+    oNodes(graph.getONodes())
+    {
+      II = graph.getMII();
+      bb=(BasicBlock*)graph.getBasicBlocks()[0];
+
+      instrScheduling();
+    };
+
+  ///destructor
+  ~ModuloScheduling(){};
+
+  ///the method to compute schedule and instert epilogue and prologue
+  void instrScheduling();
+
+  ///debug functions:
+  ///dump the schedule and core schedule
+  void dumpScheduling();
+  
+  ///dump the input vector of nodes
+  //sch: the input vector of nodes
+  void dumpSchedule( std::vector<std::vector<ModuloSchedGraphNode*> > sch);
+
+  ///dump the resource usage table
+  void dumpResourceUsageTable();
+
+
+  //*******************internel functions*******************************
+private:
+  //clear memory from the last round and initialize if necessary
+  void clearInitMem(const MachineSchedInfo& );
+
+  //compute schedule and coreSchedule with the current II
+  bool computeSchedule();
+
+  BasicBlock* getSuccBB(BasicBlock*);
+  BasicBlock* getPredBB(BasicBlock*);
+  void constructPrologue(BasicBlock* prologue);
+  void constructKernel(BasicBlock* prologue,BasicBlock* kernel,BasicBlock* epilogue);
+  void constructEpilogue(BasicBlock* epilogue,BasicBlock* succ_bb);
+
+  ///update the resource table at the startCycle
+  //vec: the resouce usage
+  //startCycle: the start cycle the resouce usage is
+  void updateResourceTable(std::vector<vector<unsigned int> > vec,int startCycle);
+
+  ///un-do the update in the resource table in the startCycle
+  //vec: the resouce usage
+  //startCycle: the start cycle the resouce usage is
+  void undoUpdateResourceTable(std::vector<vector<unsigned int> > vec,int startCycle);
+
+  ///return whether the resourcetable has negative element
+  ///this function is called after updateResouceTable() to determine whether a node can
+  /// be scheduled at certain cycle
+  bool resourceTableNegative();
+
+
+  ///try to Schedule the node starting from start to end cycle(inclusive)
+  //if it can be scheduled, put it in the schedule and update nodeScheduled
+  //node: the node to be scheduled
+  //start: start cycle
+  //end : end cycle
+  //nodeScheduled: a vector storing nodes which has been scheduled
+  bool ScheduleNode(ModuloSchedGraphNode* node,unsigned start, unsigned end, NodeVec& nodeScheduled);
+
+  //each instruction has a memory of the latest clone instruction
+  //the clone instruction can be get using getClone() 
+  //this function clears the memory, i.e. getClone() after calling this function returns null
+  void clearCloneMemory();
+
+  //this fuction make a clone of this input Instruction and update the clone memory
+  //inst: the instrution to be cloned
+  Instruction* cloneInstSetMemory(Instruction* inst);
+
+  //this function update each instrutions which uses ist as its operand
+  //after update, each instruction will use ist's clone as its operand
+  void updateUseWithClone(Instruction* ist);
+
+};
+
+
+class ModuloSchedulingSet:NonCopyable{
+ private:
+  
+  //the graphSet to feed in
+  ModuloSchedGraphSet& graphSet;
+ public:
+
+  //constructor
+  //Scheduling graph one by one
+  ModuloSchedulingSet(ModuloSchedGraphSet _graphSet):graphSet(_graphSet){
+    for(unsigned i=0;i<graphSet.size();i++){
+      ModuloSchedGraph& graph=*(graphSet[i]);
+      if(graph.isLoop())ModuloScheduling ModuloScheduling(graph);
+    }
+  };
+  
+  //destructor
+  ~ModuloSchedulingSet(){};
+};
+
+
+
+#endif
+
+
diff --git a/lib/Target/SparcV9/ModuloScheduling/Makefile b/lib/Target/SparcV9/ModuloScheduling/Makefile
new file mode 100644 (file)
index 0000000..adbc021
--- /dev/null
@@ -0,0 +1,7 @@
+LEVEL = ../../..
+
+DIRS  = 
+
+LIBRARYNAME = modulosched
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SparcV9/ModuloScheduling/ModuloSchedGraph.cpp b/lib/Target/SparcV9/ModuloScheduling/ModuloSchedGraph.cpp
new file mode 100644 (file)
index 0000000..4e36b07
--- /dev/null
@@ -0,0 +1,1313 @@
+#include <math.h>
+#include "ModuloSchedGraph.h"
+#include "llvm/CodeGen/InstrSelection.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/iOther.h"
+#include "Support/StringExtras.h"
+#include "Support/STLExtras.h"
+#include <iostream>
+#include <swig.h>
+#include "llvm/iOperators.h"
+#include "llvm/iOther.h"
+#include "llvm/iPHINode.h"
+#include "llvm/iTerminators.h"
+#include "llvm/iMemory.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineCodeForInstruction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/MachineSchedInfo.h"
+
+#define UNIDELAY 1
+#define min(a, b)       ((a) < (b) ? (a) : (b))
+#define max(a, b)       ((a) < (b) ? (b) : (a))
+
+using std::vector;
+using std::pair;
+//using std::hash_map;
+using std::cerr;
+extern std::ostream modSched_os;
+extern ModuloSchedDebugLevel_t ModuloSchedDebugLevel;
+//*********************** Internal Data Structures *************************/
+
+// The following two types need to be classes, not typedefs, so we can use
+// opaque declarations in SchedGraph.h
+// 
+struct RefVec: public vector< pair<ModuloSchedGraphNode*, int> > {
+  typedef vector< pair<ModuloSchedGraphNode*, int> >::      iterator       iterator;
+  typedef vector< pair<ModuloSchedGraphNode*, int> >::const_iterator const_iterator;
+};
+
+struct RegToRefVecMap: public hash_map<int, RefVec> {
+  typedef hash_map<int, RefVec>::      iterator       iterator;
+  typedef hash_map<int, RefVec>::const_iterator const_iterator;
+};
+
+struct ValueToDefVecMap: public hash_map<const Instruction*, RefVec> {
+  typedef hash_map<const Instruction*, RefVec>::      iterator       iterator;
+  typedef hash_map<const Instruction*, RefVec>::const_iterator const_iterator;
+};
+
+
+
+// class Modulo SchedGraphNode
+
+/*ctor*/
+ModuloSchedGraphNode::ModuloSchedGraphNode(unsigned int _nodeId,
+                                    const BasicBlock* _bb,
+                                    const Instruction* _inst,
+                                    int indexInBB,
+                                    const TargetMachine& target)
+  :
+  SchedGraphNodeCommon(_nodeId, _bb, indexInBB),
+  inst(_inst)
+{
+  if(inst)
+    {
+      //FIXME: find the latency 
+      //currently setthe latency to zero
+      latency =0;
+    }
+
+}
+                                    
+//class ModuloScheGraph
+
+
+void
+ModuloSchedGraph::addDummyEdges()
+{
+  assert(graphRoot->outEdges.size() == 0);
+  
+  for (const_iterator I=begin(); I != end(); ++I)
+    {
+      ModuloSchedGraphNode* node = (ModuloSchedGraphNode*)( (*I).second);
+      assert(node != graphRoot && node != graphLeaf);
+      if (node->beginInEdges() == node->endInEdges())
+       (void) new SchedGraphEdge(graphRoot, node, SchedGraphEdge::CtrlDep,
+                                 SchedGraphEdge::NonDataDep, 0);
+      if (node->beginOutEdges() == node->endOutEdges())
+       (void) new SchedGraphEdge(node, graphLeaf, SchedGraphEdge::CtrlDep,
+                                 SchedGraphEdge::NonDataDep, 0);
+    }
+}
+
+bool isDefinition(const Instruction* I)
+{
+  //if(TerminatorInst::classof(I)||FreeInst::classof(I) || StoreInst::classof(I) || CallInst::classof(I))
+  if(!I->hasName())
+    return false;
+  else
+    return true;
+}
+
+void ModuloSchedGraph::addDefUseEdges(const BasicBlock* bb)
+{
+  //collect def instructions, store them in vector
+  const MachineInstrInfo& mii = target.getInstrInfo();
+  
+  typedef std::vector<ModuloSchedGraphNode*> DefVec;
+  DefVec defVec;
+  
+  //find those def instructions
+  for(BasicBlock::const_iterator I=bb->begin(), E=bb->end(); I !=E; I++)
+    if(I->getType() != Type::VoidTy)
+      {
+       defVec.push_back(this->getGraphNodeForInst(I)) ;
+      }
+  
+  for(unsigned int i=0; i < defVec.size();i++)
+    for(Value::use_const_iterator I=defVec[i]->getInst()->use_begin();I !=defVec[i]->getInst()->use_end()  ;I++)
+      {
+       //for each use of a def, add a flow edge from the def instruction to the ref instruction
+
+       
+       const Instruction* value=defVec[i]->getInst();
+       Instruction *inst=(Instruction*)(*I);
+       ModuloSchedGraphNode* node=NULL;
+
+       for(BasicBlock::const_iterator I=bb->begin(), E=bb->end(); I !=E; I++)
+         if((const Instruction*)I == inst)
+           {
+             node=(*this)[inst];
+             break;
+           }
+       
+       assert(inst != NULL &&" Use not an Instruction ?" );
+       
+       if(node == NULL) //inst is not an instruction in this block
+         {}
+       else
+         {
+           //add a flow edge from the def instruction to the ref instruction
+
+           //self loop will not happen in SSA form
+           assert(defVec[i] != node && "same node?");
+           
+
+           //this is a true dependence, so the delay is equal to the delay of the pred node.
+           int delay=0;
+           MachineCodeForInstruction& tempMvec=  MachineCodeForInstruction::get(value);
+           for(unsigned j=0;j< tempMvec.size();j++)
+             {
+               MachineInstr* temp=tempMvec[j];
+               //delay +=mii.minLatency(temp->getOpCode());
+               delay = max(delay, mii.minLatency(temp->getOpCode()));
+             }
+
+           SchedGraphEdge* trueEdge=new SchedGraphEdge(defVec[i],node,value, SchedGraphEdge::TrueDep,delay);
+
+           //if the ref instruction is before the def instrution
+           //then the def instruction must be a phi instruction 
+           //add an anti-dependence edge to from the ref instruction to the def instruction
+           if( node->getOrigIndexInBB() < defVec[i]->getOrigIndexInBB())
+             {
+               assert(PHINode::classof(inst) && "the ref instruction befre def is not PHINode?");
+               trueEdge->setIteDiff(1);
+             }
+           
+         }
+       
+      }
+}
+
+void ModuloSchedGraph::addCDEdges(const BasicBlock* bb)
+{
+  //find the last instruction in the basic block
+  //see if it is an branch instruction. 
+  //If yes, then add an edge from each node expcept the last node to the last node
+
+  const Instruction* inst=&(bb->back());
+  ModuloSchedGraphNode* lastNode=(*this)[inst];
+  if( TerminatorInst::classof(inst))
+    for(BasicBlock::const_iterator I=bb->begin(),E=bb->end(); I != E; I++)
+      {  
+       if(inst != I)
+         {
+           ModuloSchedGraphNode* node = (*this)[I];
+           //use latency of 0
+           (void) new SchedGraphEdge(node,lastNode,SchedGraphEdge::CtrlDep,
+                                     SchedGraphEdge::NonDataDep,0);
+         }
+       
+      }
+  
+}
+
+
+
+
+static const int SG_LOAD_REF  = 0;
+static const int SG_STORE_REF = 1;
+static const int SG_CALL_REF  = 2;
+
+static const unsigned int SG_DepOrderArray[][3] = {
+  { SchedGraphEdge::NonDataDep,
+            SchedGraphEdge::AntiDep,
+                        SchedGraphEdge::AntiDep },
+  { SchedGraphEdge::TrueDep,
+            SchedGraphEdge::OutputDep,
+                        SchedGraphEdge::TrueDep | SchedGraphEdge::OutputDep },
+  { SchedGraphEdge::TrueDep,
+            SchedGraphEdge::AntiDep | SchedGraphEdge::OutputDep,
+                        SchedGraphEdge::TrueDep | SchedGraphEdge::AntiDep
+                                                | SchedGraphEdge::OutputDep }
+};
+
+
+// Add a dependence edge between every pair of machine load/store/call
+// instructions, where at least one is a store or a call.
+// Use latency 1 just to ensure that memory operations are ordered;
+// latency does not otherwise matter (true dependences enforce that).
+// 
+void
+ModuloSchedGraph::addMemEdges(const BasicBlock* bb)
+{
+  
+  vector<ModuloSchedGraphNode*> memNodeVec;
+
+  //construct the memNodeVec
+  
+  for( BasicBlock::const_iterator I=bb->begin(), E=bb->end();I != E; I++)
+    if(LoadInst::classof(I) ||StoreInst::classof(I) || CallInst::classof(I))
+      {
+       ModuloSchedGraphNode* node =(*this)[(const Instruction*)I];
+       memNodeVec.push_back(node);
+      }
+  // Instructions in memNodeVec are in execution order within the basic block,
+  // so simply look at all pairs <memNodeVec[i], memNodeVec[j: j > i]>.
+  // 
+  for (unsigned im=0, NM=memNodeVec.size(); im < NM; im++)
+    {
+      const Instruction* fromInst = memNodeVec[im]->getInst();
+      int fromType = CallInst::classof(fromInst)? SG_CALL_REF
+       : LoadInst::classof(fromInst)? SG_LOAD_REF
+       : SG_STORE_REF;
+      for (unsigned jm=im+1; jm < NM; jm++)
+       {
+         const Instruction* toInst=memNodeVec[jm]->getInst();
+         int toType = CallInst::classof(toInst)? SG_CALL_REF
+           : LoadInst::classof(toInst)? SG_LOAD_REF
+           : SG_STORE_REF;
+          if (fromType != SG_LOAD_REF || toType != SG_LOAD_REF)
+           {
+             (void) new SchedGraphEdge(memNodeVec[im], memNodeVec[jm],
+                                       SchedGraphEdge::MemoryDep,
+                                       SG_DepOrderArray[fromType][toType], 1);
+             
+             SchedGraphEdge* edge=new SchedGraphEdge(memNodeVec[jm],memNodeVec[im],
+                                                     SchedGraphEdge::MemoryDep,
+                                                     SG_DepOrderArray[toType][fromType],1);
+             edge->setIteDiff(1);
+             
+           }
+       }
+    }
+} 
+
+
+
+void ModuloSchedGraph::buildNodesforBB  (const TargetMachine& target,
+                                     const BasicBlock* bb,
+                                     std::vector<ModuloSchedGraphNode*>& memNode,
+                                     RegToRefVecMap& regToRefVecMap,
+                                     ValueToDefVecMap& valueToDefVecMap)
+{
+  //const MachineInstrInfo& mii=target.getInstrInfo();
+  
+  //Build graph nodes for each LLVM instruction and gather def/use info.
+  //Do both together in a single pass over all machine instructions.
+  
+  int i=0;
+  for(BasicBlock::const_iterator I=bb->begin(), E=bb->end(); I!=E; I++)
+    {
+      ModuloSchedGraphNode* node=new ModuloSchedGraphNode(getNumNodes(), bb,I,i, target);
+      i++;
+      this->noteModuloSchedGraphNodeForInst(I,node);
+    }
+
+  //this function finds some info about instruction in basic block for later use
+  //findDefUseInfoAtInstr(target, node, memNode,regToRefVecMap,valueToDefVecMap);
+
+
+}
+
+
+bool ModuloSchedGraph::isLoop(const BasicBlock* bb)
+{
+  //only if the last instruction in the basicblock is branch instruction and 
+  //there is at least an option to branch itself
+
+  
+  const Instruction* inst=&(bb->back());
+  if( BranchInst::classof(inst))
+    for(unsigned i=0;i < ((BranchInst*)inst)->getNumSuccessors();i++)
+      {
+       BasicBlock* sb=((BranchInst*)inst)->getSuccessor(i);
+       if(sb ==bb) return true;
+      }
+  
+  return false;
+  
+}
+bool ModuloSchedGraph::isLoop()
+{
+  //only if the last instruction in the basicblock is branch instruction and 
+  //there is at least an option to branch itself
+  
+  assert(bbVec.size() ==1 &&"only 1 basicblock in a graph");
+  const BasicBlock* bb=bbVec[0];
+  const Instruction* inst=&(bb->back());
+  if( BranchInst::classof(inst))
+    for(unsigned i=0;i < ((BranchInst*)inst)->getNumSuccessors();i++)
+      {
+       BasicBlock* sb=((BranchInst*)inst)->getSuccessor(i);
+       if(sb ==bb) return true;
+      }
+  
+  return false;
+  
+}
+
+void ModuloSchedGraph::computeNodeASAP(const BasicBlock* bb)
+{
+
+ //FIXME: now assume the only backward edges come from the edges from other nodes to the phi Node
+  //so i will ignore all edges to the phi node; after this, there shall be no recurrence. 
+  
+  unsigned numNodes=bb->size();
+  for(unsigned i=2;i < numNodes+2;i++)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->setPropertyComputed(false);
+    }
+
+  for(unsigned i=2;i < numNodes+2; i++)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->ASAP=0;
+      if(i==2 ||node->getNumInEdges() ==0) 
+       { node->setPropertyComputed(true);continue;}
+      for(ModuloSchedGraphNode::const_iterator I=node->beginInEdges(), E=node->endInEdges();I !=E; I++)
+       {
+         SchedGraphEdge* edge=*I;
+         ModuloSchedGraphNode* pred=(ModuloSchedGraphNode*)(edge->getSrc());
+         assert(pred->getPropertyComputed()&&"pred node property is not computed!");
+         int temp=pred->ASAP + edge->getMinDelay() - edge->getIteDiff()*this->MII;
+         node->ASAP= max(node->ASAP,temp);
+       }
+      node->setPropertyComputed(true);
+    }
+}
+
+void ModuloSchedGraph::computeNodeALAP(const BasicBlock* bb)
+{
+  
+  unsigned numNodes=bb->size();
+  int maxASAP=0;
+  for(unsigned i=numNodes+1;i >= 2;i--)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->setPropertyComputed(false);
+      //cerr<< " maxASAP= " <<maxASAP<<" node->ASAP= "<< node->ASAP<<"\n";
+      maxASAP=max(maxASAP,node->ASAP);
+    }
+
+  //cerr<<"maxASAP is "<<maxASAP<<"\n";
+  
+  for(unsigned i=numNodes+1;i >= 2; i--)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->ALAP=maxASAP;
+      for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges(); I!=E; I++)
+       {
+         SchedGraphEdge* edge= *I;
+         ModuloSchedGraphNode* succ=(ModuloSchedGraphNode*) (edge->getSink());
+         if(PHINode::classof(succ->getInst()))continue;
+         assert(succ->getPropertyComputed()&&"succ node property is not computed!");
+         int temp=succ->ALAP - edge->getMinDelay()+edge->getIteDiff()*this->MII;
+         node->ALAP =min(node->ALAP,temp);
+       }
+      node->setPropertyComputed(true);
+      
+    }
+}
+
+void ModuloSchedGraph::computeNodeMov(const BasicBlock* bb)
+{
+  unsigned numNodes=bb->size();
+  for(unsigned i =2;i < numNodes +2 ;i++)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->mov=node->ALAP-node->ASAP;
+      assert(node->mov >=0 &&"move freedom for this node is less than zero? ");
+    }
+}
+
+
+void ModuloSchedGraph::computeNodeDepth(const BasicBlock* bb)
+{
+
+  unsigned numNodes=bb->size();
+  for(unsigned i=2;i < numNodes+2;i++)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->setPropertyComputed(false);
+    }
+
+  for(unsigned i=2;i < numNodes+2; i++)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->depth=0;
+      if(i==2 ||node->getNumInEdges() ==0) 
+       { node->setPropertyComputed(true);continue;}
+      for(ModuloSchedGraphNode::const_iterator I=node->beginInEdges(), E=node->endInEdges();I !=E; I++)
+       {
+         SchedGraphEdge* edge=*I;
+         ModuloSchedGraphNode* pred=(ModuloSchedGraphNode*)(edge->getSrc());
+         assert(pred->getPropertyComputed()&&"pred node property is not computed!");
+         int temp=pred->depth + edge->getMinDelay();
+         node->depth= max(node->depth,temp);
+       }
+      node->setPropertyComputed(true);
+    }
+
+}
+
+
+void ModuloSchedGraph::computeNodeHeight(const BasicBlock* bb)
+{
+  unsigned numNodes=bb->size();
+  for(unsigned i=numNodes+1;i >= 2;i--)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->setPropertyComputed(false);
+    }
+  
+  for(unsigned i=numNodes+1;i >= 2; i--)
+    {
+      ModuloSchedGraphNode* node=getNode(i);
+      node->height=0;
+      for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges(); I!=E; I++)
+       {
+         SchedGraphEdge* edge= *I;
+         ModuloSchedGraphNode* succ=(ModuloSchedGraphNode*)(edge->getSink());
+         if(PHINode::classof(succ->getInst())) continue;
+         assert(succ->getPropertyComputed()&&"succ node property is not computed!");
+         node->height=max(node->height, succ->height+edge->getMinDelay());
+         
+       }
+      node->setPropertyComputed(true);
+      
+    }
+
+}
+
+void ModuloSchedGraph::computeNodeProperty(const BasicBlock* bb)
+{
+  //FIXME: now assume the only backward edges come from the edges from other nodes to the phi Node
+  //so i will ignore all edges to the phi node; after this, there shall be no recurrence. 
+  
+  this->computeNodeASAP(bb);
+  this->computeNodeALAP(bb);
+  this->computeNodeMov(bb);
+  this->computeNodeDepth(bb);
+  this->computeNodeHeight(bb); 
+}
+
+
+//do not consider the backedge in these two functions:
+// i.e. don't consider the edge with destination in phi node
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::predSet(std::vector<ModuloSchedGraphNode*> set, unsigned backEdgeSrc, unsigned backEdgeSink)
+{
+  std::vector<ModuloSchedGraphNode*> predS;
+  for(unsigned i=0; i < set.size(); i++)
+    {
+      ModuloSchedGraphNode* node = set[i];
+      for(ModuloSchedGraphNode::const_iterator I=node->beginInEdges(), E=node->endInEdges(); I !=E; I++)
+       {
+         SchedGraphEdge* edge= *I;
+         if(edge->getSrc()->getNodeId() ==backEdgeSrc && edge->getSink()->getNodeId() == backEdgeSink) continue;
+         ModuloSchedGraphNode* pred= (ModuloSchedGraphNode*)(edge->getSrc());
+         //if pred is not in the predSet, push it in vector
+         bool alreadyInset=false;
+         for(unsigned j=0; j < predS.size(); j++)
+           if(predS[j]->getNodeId() == pred->getNodeId() ) 
+             {alreadyInset=true;break;}
+         
+         for(unsigned j=0; j < set.size(); j++)
+           if(set[j]->getNodeId()  == pred->getNodeId() ) 
+             {alreadyInset=true; break;}
+         
+         if(! alreadyInset)
+           predS.push_back(pred);
+       }
+    }
+  return predS;
+}
+
+ModuloSchedGraph::NodeVec ModuloSchedGraph::predSet(NodeVec set)
+{
+  //node number increases from 2
+  return predSet(set,0, 0);
+}
+
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::predSet(ModuloSchedGraphNode* _node, unsigned backEdgeSrc, unsigned backEdgeSink)
+{
+  
+  std::vector<ModuloSchedGraphNode*> set;
+  set.push_back(_node);
+  return predSet(set, backEdgeSrc, backEdgeSink);
+
+}
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::predSet(ModuloSchedGraphNode* _node)
+{
+  return predSet(_node,0,0);
+}
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::succSet(std::vector<ModuloSchedGraphNode*> set,unsigned src, unsigned sink)
+{
+  vector<ModuloSchedGraphNode*> succS;
+  for(unsigned i=0; i < set.size(); i++)
+    {
+      ModuloSchedGraphNode* node = set[i];
+      for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges(); I !=E; I++)
+       {
+         SchedGraphEdge* edge= *I;
+         if(edge->getSrc()->getNodeId() == src && edge->getSink()->getNodeId() == sink) continue;
+         ModuloSchedGraphNode* succ= (ModuloSchedGraphNode*)(edge->getSink());
+         //if pred is not in the predSet, push it in vector
+         bool alreadyInset=false;
+         for(unsigned j=0; j < succS.size(); j++)
+           if(succS[j]->getNodeId() == succ->getNodeId() ) 
+             {alreadyInset=true;break;}
+
+         for(unsigned j=0; j < set.size(); j++)
+           if(set[j]->getNodeId() == succ->getNodeId() ) 
+             {alreadyInset=true;break;}
+         if(! alreadyInset)
+           succS.push_back(succ);
+       }
+    }
+  return succS; 
+}
+
+ModuloSchedGraph::NodeVec ModuloSchedGraph::succSet(NodeVec set)
+{
+  return succSet(set,0,0);
+}
+
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::succSet(ModuloSchedGraphNode* _node,unsigned src, unsigned sink)
+{
+  std::vector<ModuloSchedGraphNode*> set;
+  set.push_back(_node);
+  return succSet(set, src, sink);
+  
+  
+}
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::succSet(ModuloSchedGraphNode* _node)
+{
+  return succSet(_node, 0, 0);
+}
+
+SchedGraphEdge* ModuloSchedGraph::getMaxDelayEdge(unsigned srcId, unsigned sinkId)
+{
+  
+  ModuloSchedGraphNode* node =getNode(srcId);
+  SchedGraphEdge* maxDelayEdge=NULL;
+  int maxDelay=-1;
+  for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges();I !=E; I++)
+    {
+      SchedGraphEdge* edge= *I;
+      if(edge->getSink()->getNodeId() == sinkId)
+       if(edge->getMinDelay() > maxDelay){
+         maxDelayEdge=edge;
+         maxDelay=edge->getMinDelay(); 
+       }
+    }
+  assert(maxDelayEdge != NULL&&"no edge between the srcId and sinkId?");
+  return maxDelayEdge;
+}
+
+void ModuloSchedGraph::dumpCircuits()
+{
+  modSched_os<<"dumping circuits for graph: "<<"\n";
+  int j=-1;
+  while(circuits[++j][0] !=0){
+    int k=-1;
+    while(circuits[j][++k] !=0)
+      modSched_os<< circuits[j][k]<<"\t";
+    modSched_os<<"\n";
+  }
+}
+
+void ModuloSchedGraph::dumpSet(std::vector<ModuloSchedGraphNode*> set)
+{
+  for(unsigned i=0;i < set.size() ; i++)
+    modSched_os<< set[i]->getNodeId()<<"\t";
+  modSched_os<<"\n";
+}
+
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::vectorUnion(std::vector<ModuloSchedGraphNode*> set1,std::vector<ModuloSchedGraphNode*> set2 )
+{
+  std::vector<ModuloSchedGraphNode*> unionVec;
+  for(unsigned i=0;i<set1.size();i++)
+    unionVec.push_back(set1[i]);
+  for(unsigned j=0;j < set2.size();j++){
+    bool inset=false;
+    for(unsigned i=0;i < unionVec.size();i++)
+      if(set2[j]==unionVec[i]) inset=true;
+    if(!inset)unionVec.push_back(set2[j]);
+  }
+  return unionVec;
+}
+std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::vectorConj(std::vector<ModuloSchedGraphNode*> set1,std::vector<ModuloSchedGraphNode*> set2 )
+{
+   std::vector<ModuloSchedGraphNode*> conjVec;
+  for(unsigned i=0;i<set1.size();i++)
+    for(unsigned j=0;j < set2.size();j++)
+      if(set1[i]==set2[j]) conjVec.push_back(set1[i]);
+  return conjVec; 
+}
+
+ModuloSchedGraph::NodeVec ModuloSchedGraph::vectorSub(NodeVec set1, NodeVec set2)
+{
+  NodeVec newVec;
+  for(NodeVec::iterator I=set1.begin(); I != set1.end(); I++){
+    bool inset=false;
+    for(NodeVec::iterator II=set2.begin(); II!=set2.end(); II++)
+      if( (*I)->getNodeId() ==(*II)->getNodeId()) 
+       {inset=true;break;}
+    if(!inset) newVec.push_back(*I);
+  }
+  return newVec;
+}
+  
+void ModuloSchedGraph::orderNodes(){
+  oNodes.clear();
+  
+  std::vector<ModuloSchedGraphNode*> set;
+  const BasicBlock* bb=bbVec[0];
+  unsigned numNodes = bb->size();
+  
+  //first order all the sets
+  int j=-1;
+  int totalDelay=-1;
+  int preDelay=-1;
+  while(circuits[++j][0] !=0){
+    int k=-1;
+    preDelay=totalDelay;
+
+    while(circuits[j][++k] !=0){
+      ModuloSchedGraphNode* node =getNode(circuits[j][k]);
+      unsigned nextNodeId;
+      nextNodeId =circuits[j][k+1] !=0? circuits[j][k+1]:circuits[j][0];
+      SchedGraphEdge* edge = getMaxDelayEdge(circuits[j][k], nextNodeId);
+      totalDelay += edge->getMinDelay();
+    }
+    if(preDelay != -1 && totalDelay > preDelay){
+      //swap circuits[j][] and cuicuits[j-1][]
+      unsigned temp[MAXNODE];
+      for(int k=0;k<MAXNODE;k++){
+       temp[k]=circuits[j-1][k];       
+       circuits[j-1][k]=circuits[j][k];
+       circuits[j][k]=temp[k];
+      }
+      //restart
+      j=-1;
+    }
+  }
+
+  //build the first set
+  int backEdgeSrc;
+  int backEdgeSink;
+  if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+    modSched_os<<"building the first set"<<"\n";
+  int setSeq=-1;
+  int k=-1;
+  setSeq++;
+  while(circuits[setSeq][++k]!=0)
+    set.push_back(getNode(circuits[setSeq][k]));
+  if(circuits[setSeq][0]!=0){
+    backEdgeSrc=circuits[setSeq][k-1];
+    backEdgeSink=circuits[setSeq][0];
+  }
+  if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+    modSched_os<<"the first set is:";
+    dumpSet(set);
+  }
+  
+  //implement the ordering algorithm
+  enum OrderSeq{ bottom_up, top_down};
+  OrderSeq order;
+  std::vector<ModuloSchedGraphNode*> R;
+  while(!set.empty())
+    {
+      std::vector<ModuloSchedGraphNode*> pset=predSet(oNodes);
+      std::vector<ModuloSchedGraphNode*> sset=succSet(oNodes);
+      
+      if(!pset.empty() && !vectorConj(pset,set).empty())
+       {R=vectorConj(pset,set);order=bottom_up;}
+      else if( !sset.empty() && !vectorConj(sset,set).empty())
+       {R=vectorConj(sset,set);order=top_down;}
+      else
+       {
+         int maxASAP=-1;
+         int position=-1;
+         for(unsigned i=0;i<set.size();i++){
+           int temp= set[i]->getASAP();
+           if(temp>maxASAP ) {
+             maxASAP=temp;
+             position=i;
+           }
+         }
+         R.push_back(set[position]);
+         order=bottom_up;      
+       }
+      
+      while(!R.empty()){
+       if(order== top_down){
+         if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+           modSched_os<<"in top_down round"<<"\n";
+         while(!R.empty()){
+           int maxHeight=-1;
+           NodeVec::iterator chosenI;
+           for(NodeVec::iterator I=R.begin();I != R.end();I++){
+             int temp=(*I)->height;
+             if( (temp > maxHeight) || ( temp == maxHeight && (*I)->mov <= (*chosenI)->mov ) ){
+               
+               if((temp > maxHeight) || ( temp == maxHeight && (*I)->mov < (*chosenI)->mov )){
+                 maxHeight=temp;
+                 chosenI=I;
+                 continue;
+               }
+               //possible case: instruction A and B has the same height and mov, but A has dependence to B
+               //e.g B is the branch instruction in the end, or A is the phi instruction at the beginning
+               if((*I)->mov == (*chosenI)->mov)
+                 for(ModuloSchedGraphNode::const_iterator oe=(*I)->beginOutEdges(), end=(*I)->endOutEdges();oe !=end; oe++){
+                   if((*oe)->getSink() == (*chosenI)){
+                     maxHeight=temp;
+                     chosenI=I;
+                     continue;
+                   }
+                 }
+             }
+           }
+           ModuloSchedGraphNode* mu= *chosenI;
+           oNodes.push_back(mu);
+           R.erase(chosenI);
+           std::vector<ModuloSchedGraphNode*> succ_mu= succSet(mu,backEdgeSrc,backEdgeSink);
+             std::vector<ModuloSchedGraphNode*> comm=vectorConj(succ_mu,set);
+             comm=vectorSub(comm,oNodes);
+           R = vectorUnion(comm, R);
+         }
+         order=bottom_up;
+         R= vectorConj(predSet(oNodes), set);
+       } else{
+         if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+           modSched_os<<"in bottom up round"<<"\n";
+         while(!R.empty()){
+           int maxDepth=-1;
+           NodeVec::iterator chosenI;
+           for(NodeVec::iterator I=R.begin();I != R.end();I++){
+             int temp=(*I)->depth;
+             if( (temp > maxDepth) || ( temp == maxDepth && (*I)->mov < (*chosenI)->mov )){
+               maxDepth=temp;
+               chosenI=I;
+             }
+           }
+           ModuloSchedGraphNode* mu=*chosenI;
+           oNodes.push_back(mu);
+           R.erase(chosenI);
+           std::vector<ModuloSchedGraphNode*> pred_mu= predSet(mu,backEdgeSrc,backEdgeSink);
+           std::vector<ModuloSchedGraphNode*> comm=vectorConj(pred_mu,set);
+           comm=vectorSub(comm,oNodes);
+           R = vectorUnion(comm, R);
+         }
+         order=top_down;
+         R= vectorConj(succSet(oNodes), set);
+       }
+      }
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+       modSched_os<<"order finished"<<"\n";
+       modSched_os<<"dumping the ordered nodes: "<<"\n";
+       dumpSet(oNodes);
+       dumpCircuits();
+      }
+      //create a new set
+      //FIXME: the nodes between onodes and this circuit should also be include in this set
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os<<"building the next set"<<"\n";
+      set.clear();
+      int k=-1;
+      setSeq++;
+      while(circuits[setSeq][++k]!=0)
+       set.push_back(getNode(circuits[setSeq][k]));
+      if(circuits[setSeq][0]!=0){
+       backEdgeSrc=circuits[setSeq][k-1];
+       backEdgeSink=circuits[setSeq][0];
+      }
+      if(set.empty()){
+       //no circuits any more
+       //collect all other nodes
+       if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+         modSched_os<<"no circuits any more, collect the rest nodes"<<"\n";
+       for(unsigned i=2;i<numNodes+2;i++){
+         bool inset=false;
+         for(unsigned j=0;j< oNodes.size();j++)
+           if(oNodes[j]->getNodeId() == i) 
+             {inset=true;break;}
+         if(!inset)set.push_back(getNode(i));
+       }
+      }
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+       modSched_os<<"next set is: "<<"\n";
+       dumpSet(set);
+      }
+    }//while(!set.empty())
+  
+}
+
+
+
+
+
+
+void ModuloSchedGraph::buildGraph (const TargetMachine& target)
+{
+  const BasicBlock* bb=bbVec[0]; 
+
+  assert(bbVec.size() ==1 &&"only handling a single basic block here");
+
+  // Use this data structure to note all machine operands that compute
+  // ordinary LLVM values.  These must be computed defs (i.e., instructions). 
+  // Note that there may be multiple machine instructions that define
+  // each Value.
+  ValueToDefVecMap valueToDefVecMap;
+
+  // Use this data structure to note all memory instructions.
+  // We use this to add memory dependence edges without a second full walk.
+  // 
+  // vector<const Instruction*> memVec;
+  vector<ModuloSchedGraphNode*> memNodeVec;
+
+   // Use this data structure to note any uses or definitions of
+  // machine registers so we can add edges for those later without
+  // extra passes over the nodes.
+  // The vector holds an ordered list of references to the machine reg,
+  // ordered according to control-flow order.  This only works for a
+  // single basic block, hence the assertion.  Each reference is identified
+  // by the pair: <node, operand-number>.
+  // 
+  RegToRefVecMap regToRefVecMap;
+  
+
+
+ // Make a dummy root node.  We'll add edges to the real roots later.
+  graphRoot = new ModuloSchedGraphNode(0, NULL, NULL, -1,target);
+  graphLeaf = new ModuloSchedGraphNode(1, NULL, NULL, -1,target);
+ //----------------------------------------------------------------
+  // First add nodes for all the LLVM instructions in the basic block
+  // because this greatly simplifies identifying which edges to add.
+  // Do this one VM instruction at a time since the ModuloSchedGraphNode needs that.
+  // Also, remember the load/store instructions to add memory deps later.
+  //----------------------------------------------------------------
+
+  //FIXME:if there is call instruction, then we shall quit somewhere
+  //      currently we leave call instruction and continue construct graph
+
+  //dump only the blocks which are from loops
+
+
+  if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+    this->dump(bb);
+
+  if(!isLoop(bb)){
+    modSched_os <<" dumping non-loop BB:"<<endl;
+    dump(bb);
+  }
+  if( isLoop(bb))
+    {
+      buildNodesforBB(target, bb, memNodeVec, regToRefVecMap, valueToDefVecMap);
+      
+      this->addDefUseEdges(bb);
+      
+      this->addCDEdges(bb);
+      
+      this->addMemEdges(bb);
+      
+      //this->dump();
+      
+      int ResII=this->computeResII(bb);
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os << "ResII is " << ResII<<"\n";;
+      int RecII=this->computeRecII(bb);      
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os << "RecII is " <<RecII<<"\n";
+      
+      this->MII=max(ResII, RecII);
+      
+      this->computeNodeProperty(bb);
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       this->dumpNodeProperty();
+      
+      this->orderNodes();
+      
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       this->dump();
+      //this->instrScheduling();
+      
+      //this->dumpScheduling();
+      
+      
+    }
+}
+
+ModuloSchedGraphNode* ModuloSchedGraph::getNode  (const unsigned nodeId) const
+{
+  for(const_iterator I=begin(), E=end();I !=E;I++)
+    if((*I).second->getNodeId() ==nodeId)
+      return (ModuloSchedGraphNode*)(*I).second;
+  return NULL;
+}
+
+int ModuloSchedGraph::computeRecII(const BasicBlock* bb)
+{
+
+  int  RecII=0;
+
+  //os<<"begining computerRecII()"<<"\n";
+
+
+  //FIXME: only deal with circuits starting at the first node: the phi node nodeId=2;
+
+  //search all elementary circuits in the dependance graph
+  //assume maximum number of nodes is MAXNODE
+  
+  unsigned path[MAXNODE];
+  unsigned stack[MAXNODE][MAXNODE];
+
+
+  for(int j=0;j<MAXNODE;j++)
+    {path[j]=0;
+    for(int k=0;k<MAXNODE;k++)
+      stack[j][k]=0;
+    }
+  //in our graph, the node number starts at 2
+
+  //number of nodes, because each instruction will result in one node
+  const unsigned numNodes= bb->size();
+  
+  int i=0;
+  path[i]=2;
+
+  ModuloSchedGraphNode* initNode=getNode(path[0]);
+  unsigned initNodeId=initNode->getNodeId();
+  ModuloSchedGraphNode* currentNode=initNode;
+  
+  while(currentNode != NULL)
+    {
+      unsigned currentNodeId=currentNode->getNodeId();
+      //      modSched_os<<"current node is "<<currentNodeId<<"\n";
+      
+      ModuloSchedGraphNode* nextNode=NULL;
+      for(ModuloSchedGraphNode::const_iterator I=currentNode->beginOutEdges(), E=currentNode->endOutEdges(); I !=E; I++)
+       {
+         //modSched_os <<" searching in outgoint edges of node "<<currentNodeId<<"\n";
+         unsigned nodeId=((SchedGraphEdge*) *I)->getSink()->getNodeId();
+         bool inpath=false,instack=false;
+         int k;
+
+         //modSched_os<<"nodeId is "<<nodeId<<"\n";
+
+         k=-1;
+         while(path[++k] !=0)
+           if(nodeId == path[k])
+             {inpath=true;break;}
+         
+
+
+         k=-1;
+         while(stack[i][++k] !=0 )
+           if(nodeId == stack[i][k])
+             {instack =true; break;}
+
+
+         if( nodeId > currentNodeId && !inpath && !instack)
+           {nextNode=(ModuloSchedGraphNode*) ((SchedGraphEdge*)*I)->getSink();break;}
+         
+       }
+      if(nextNode != NULL)
+       {
+         //modSched_os<<"find the next Node "<<nextNode->getNodeId()<<"\n";
+         
+         
+         int j=0;
+         while( stack[i][j] !=0) j++;
+         stack[i][j]=nextNode->getNodeId();
+
+
+         i++;
+         path[i]=nextNode->getNodeId();
+         currentNode=nextNode;
+       }
+      else
+       {
+         //modSched_os<<"no expansion any more"<<"\n";
+         //confirmCircuit();
+         for(ModuloSchedGraphNode::const_iterator I=currentNode->beginOutEdges(), E=currentNode->endOutEdges() ; I != E; I++)
+           {
+             unsigned nodeId=((SchedGraphEdge*)*I)->getSink()->getNodeId();
+             if(nodeId == initNodeId)
+               {
+
+                 int j=-1;
+                 while(circuits[++j][0] !=0);
+                 for(int k=0;k<MAXNODE;k++)circuits[j][k]=path[k];
+
+               }
+           }
+         //remove this node in the path and clear the corresponding entries in the stack
+         path[i]=0;
+         int j=0;
+         for(j=0;j<MAXNODE;j++)stack[i][j]=0;
+         i--;
+         currentNode=getNode(path[i]);
+       }
+      if(i==0) 
+       {
+
+         if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+           modSched_os<<"circuits found are: "<<"\n";
+         int j=-1;
+         while(circuits[++j][0] !=0){
+           int k=-1;
+           while(circuits[j][++k] !=0)
+             if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+               modSched_os<< circuits[j][k]<<"\t";
+           if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+             modSched_os<<"\n";
+         
+           
+           //for this circuit, compute the sum of all edge delay
+           int sumDelay=0;
+           k=-1;
+           while(circuits[j][++k] !=0)
+             {
+               //ModuloSchedGraphNode* node =getNode(circuits[j][k]);
+               unsigned nextNodeId;
+               nextNodeId =circuits[j][k+1] !=0? circuits[j][k+1]:circuits[j][0];
+
+               /*
+               for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges();I !=E; I++)
+               {
+                 SchedGraphEdge* edge= *I;
+                   if(edge->getSink()->getNodeId() == nextNodeId)
+                   {sumDelay  += edge->getMinDelay();break;}
+                     }
+               */
+
+               sumDelay += getMaxDelayEdge(circuits[j][k],nextNodeId)->getMinDelay();
+
+             }
+           //       assume we have distance 1, in this case the sumDelay is RecII
+           //       this is correct for SSA form only
+           //      
+           if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+             modSched_os<<"The total Delay in the circuit is " <<sumDelay<<"\n";
+           
+           RecII= RecII > sumDelay? RecII: sumDelay;
+           
+         }
+         return RecII;
+       }
+      
+    }
+  
+  return -1;
+}
+
+void  ModuloSchedGraph::addResourceUsage(std::vector<pair<int,int> >& ruVec, int rid){
+  //modSched_os<<"\nadding a resouce , current resouceUsage vector size is "<<ruVec.size()<<"\n";
+  bool alreadyExists=false;
+  for(unsigned i=0;i <ruVec.size() ; i++){
+    if(rid == ruVec[i].first) {
+      ruVec[i].second++;
+      alreadyExists=true;
+      break;
+    }
+  }
+  if( !alreadyExists) ruVec.push_back(std::make_pair(rid, 1));
+  //modSched_os<<"current resouceUsage vector size is "<<ruVec.size()<<"\n";
+
+}
+void ModuloSchedGraph::dumpResourceUsage(std::vector< pair<int,int> > &ru)
+{
+  MachineSchedInfo& msi = (MachineSchedInfo&)target.getSchedInfo();
+  
+  std::vector<pair<int,int> > resourceNumVector = msi.resourceNumVector;
+  modSched_os <<"resourceID\t"<<"resourceNum"<<"\n";
+  for(unsigned i=0;i<resourceNumVector.size();i++)
+    modSched_os <<resourceNumVector[i].first<<"\t"<<resourceNumVector[i].second<<"\n";
+  
+  modSched_os <<" maxNumIssueTotal(issue slot in one cycle) = "<<msi.maxNumIssueTotal<<"\n";
+  modSched_os<<"resourceID\t resourceUsage\t ResourceNum"<<"\n";
+  for(unsigned i=0;i<ru.size();i++){
+    modSched_os<<ru[i].first<<"\t"<<ru[i].second;
+    const unsigned  resNum=msi.getCPUResourceNum(ru[i].first);
+    modSched_os<<"\t"<<resNum<<"\n";
+  }
+}
+
+int ModuloSchedGraph::computeResII(const BasicBlock* bb)
+{
+  
+  const MachineInstrInfo& mii = target.getInstrInfo();
+  const MachineSchedInfo& msi = target.getSchedInfo();
+  
+  int ResII;
+  std::vector<pair<int,int> > resourceUsage; //pair<int resourceid, int resourceUsageTimes_in_the_whole_block>
+  
+  //FIXME: number of functional units the target machine can provide should be get from Target
+  //       here I temporary hardcode it
+  
+  for(BasicBlock::const_iterator I=bb->begin(),E=bb->end(); I !=E; I++)
+    {
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+       modSched_os<<"machine instruction for llvm instruction( node "<<getGraphNodeForInst(I)->getNodeId()<<")" <<"\n";
+       modSched_os<<"\t"<<*I;
+      }
+      MachineCodeForInstruction& tempMvec=  MachineCodeForInstruction::get(I);
+      if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os <<"size =" <<tempMvec.size()<<"\n";
+      for(unsigned i=0;i< tempMvec.size();i++)
+       {
+         MachineInstr* minstr=tempMvec[i];
+         
+         unsigned minDelay=mii.minLatency(minstr->getOpCode());
+         InstrRUsage rUsage=msi.getInstrRUsage(minstr->getOpCode());
+         InstrClassRUsage classRUsage=msi.getClassRUsage(mii.getSchedClass(minstr->getOpCode()));
+         unsigned totCycles= classRUsage.totCycles;
+         
+         std::vector<std::vector<resourceId_t> > resources
+           =rUsage.resourcesByCycle;
+         assert(totCycles == resources.size());
+         if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+           modSched_os <<"resources Usage for this Instr(totCycles=" << totCycles << ",mindLatency="<<mii.minLatency(minstr->getOpCode())<<"): "<< *minstr <<"\n";
+         for(unsigned j=0;j< resources.size();j++){
+           if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+             modSched_os<<"cycle "<<j<<": ";
+           for(unsigned k=0;k< resources[j].size(); k++){
+             if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+               modSched_os<<"\t"<<resources[j][k];
+             addResourceUsage(resourceUsage,resources[j][k]);
+           }
+           if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+             modSched_os<<"\n";
+         }
+       }  
+    }
+  if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)  
+    this->dumpResourceUsage(resourceUsage);
+
+  //compute ResII
+  ResII=0;
+  int issueSlots=msi.maxNumIssueTotal;
+  for(unsigned i=0;i<resourceUsage.size();i++){
+    int resourceNum=msi.getCPUResourceNum(resourceUsage[i].first);
+    int useNum=resourceUsage[i].second;
+    double tempII;
+    if(resourceNum <= issueSlots)
+      tempII=ceil(1.0*useNum/resourceNum);
+    else
+      tempII=ceil(1.0*useNum/issueSlots);
+    ResII=max((int)tempII,ResII);
+  }
+  return ResII;
+}
+
+ModuloSchedGraphSet::ModuloSchedGraphSet(const Function* function, const TargetMachine& target)
+  :method(function)
+{
+  buildGraphsForMethod(method,target);
+  
+}
+
+
+ModuloSchedGraphSet::~ModuloSchedGraphSet()
+{
+  //delete all the graphs
+  for(iterator I=begin(), E=end();I !=E; ++I)
+    delete *I;
+}
+
+void ModuloSchedGraphSet::dump() const 
+{
+  modSched_os << " ====== ModuloSched graphs for function `" <<method->getName()
+       << "' =========\n\n";
+  for(const_iterator I=begin(); I != end(); ++I)
+    (*I)->dump();
+    
+  modSched_os << "\n=========End graphs for funtion` "<<method->getName()
+       << "' ==========\n\n";
+}
+
+void ModuloSchedGraph::dump(const BasicBlock* bb)
+{
+  modSched_os<<"dumping basic block:";
+  modSched_os<<(bb->hasName()?bb->getName(): "block")
+      <<" (" << bb << ")"<<"\n";
+
+}
+
+void ModuloSchedGraph::dump(const BasicBlock* bb, std::ostream& os)
+{
+  os<<"dumping basic block:";
+  os<<(bb->hasName()?bb->getName(): "block")
+    <<" (" << bb << ")"<<"\n";
+}
+
+void 
+ModuloSchedGraph::dump() const
+{
+  modSched_os << " ModuloSchedGraph for basic Blocks:";
+  for(unsigned i=0, N =bbVec.size(); i< N; i++)
+    {
+      modSched_os<<(bbVec[i]->hasName()?bbVec[i]->getName(): "block")
+         <<" (" << bbVec[i] << ")"
+         << ((i==N-1)?"":", ");
+    }
+  
+  modSched_os <<"\n\n    Actual Root nodes : ";
+  for (unsigned i=0, N=graphRoot->outEdges.size(); i < N; i++)
+    modSched_os << graphRoot->outEdges[i]->getSink()->getNodeId()
+        << ((i == N-1)? "" : ", ");
+
+  modSched_os << "\n    Graph Nodes:\n";
+  //for (const_iterator I=begin(); I != end(); ++I)
+  //modSched_os << "\n" << *I->second;
+  unsigned numNodes=bbVec[0]->size();
+  for(unsigned i=2;i< numNodes+2;i++)
+    {
+      ModuloSchedGraphNode* node= getNode(i);
+      modSched_os << "\n" << *node;
+    }
+
+  modSched_os << "\n";
+}
+void 
+ModuloSchedGraph::dumpNodeProperty() const
+{
+  const BasicBlock* bb=bbVec[0];
+  unsigned numNodes=bb->size();
+  for(unsigned i=2;i < numNodes+2;i++)
+    { 
+      ModuloSchedGraphNode* node=getNode(i);
+      modSched_os<<"NodeId "<<node->getNodeId()<<"\t";
+      modSched_os<<"ASAP "<<node->getASAP()<<"\t";
+      modSched_os<<"ALAP "<<node->getALAP()<<"\t";
+      modSched_os<<"mov " <<node->getMov() <<"\t";
+      modSched_os<<"depth "<<node->getDepth()<<"\t";
+      modSched_os<<"height "<<node->getHeight()<<"\t";
+      modSched_os<<"\n";
+    }
+}
+
+void ModuloSchedGraphSet::buildGraphsForMethod (const Function *F, const TargetMachine& target)
+{
+  for(Function::const_iterator BI = F->begin(); BI != F->end(); ++BI)
+    addGraph(new ModuloSchedGraph(BI,target));
+}
+
+std::ostream &operator<<(std::ostream &os, const ModuloSchedGraphNode& node)
+{
+  os << std::string(8, ' ')
+     << "Node " << node.nodeId << " : "
+     << "latency = " << node.latency << "\n" << std::string(12, ' ');
+  
+
+
+  if (node.getInst() == NULL)
+    os << "(Dummy node)\n";
+  else
+    {
+      os << *node.getInst() << "\n" << std::string(12, ' ');
+      os << node.inEdges.size() << " Incoming Edges:\n";
+      for (unsigned i=0, N=node.inEdges.size(); i < N; i++)
+       os << std::string(16, ' ') << *node.inEdges[i];
+  
+      os << std::string(12, ' ') << node.outEdges.size()
+         << " Outgoing Edges:\n";
+      for (unsigned i=0, N=node.outEdges.size(); i < N; i++)
+        os << std::string(16, ' ') << *node.outEdges[i];
+    }
+  
+
+  return os;
+}
diff --git a/lib/Target/SparcV9/ModuloScheduling/ModuloSchedGraph.h b/lib/Target/SparcV9/ModuloScheduling/ModuloSchedGraph.h
new file mode 100644 (file)
index 0000000..07fde3c
--- /dev/null
@@ -0,0 +1,347 @@
+//===- ModuloSchedGraph.h - Represent a collection of data structures ----*- C++ -*-===//
+//
+// This header defines the primative classes that make up a data structure
+// graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MODULO_SCHED_GRAPH_H
+#define LLVM_CODEGEN_MODULO_SCHED_GRAPH_H
+#include "Support/HashExtras.h"
+#include "Support/GraphTraits.h"
+#include "../InstrSched/SchedGraphCommon.h"
+#include "llvm/Instruction.h"
+#include "llvm/Target/TargetMachine.h"
+#include <iostream>
+using std::pair;
+
+//for debug information selecton
+enum ModuloSchedDebugLevel_t{
+  ModuloSched_NoDebugInfo,
+  ModuloSched_Disable,
+  ModuloSched_PrintSchedule,
+  ModuloSched_PrintScheduleProcess,
+};
+
+//===============------------------------------------------------------------------------
+///ModuloSchedGraphNode - Implement a data structure based on the SchedGraphNodeCommon
+///this class stores informtion needed later to order the nodes in modulo scheduling
+///
+class ModuloSchedGraphNode: public SchedGraphNodeCommon {
+private:
+  //the corresponding instruction 
+  const Instruction* inst;
+
+  //whether this node's property(ASAP,ALAP, ...) has been computed
+  bool propertyComputed;
+
+  //ASAP: the earliest time the node could be scheduled
+  //ALAP: the latest time the node couldbe scheduled
+  //depth: the depth of the node
+  //height: the height of the node
+  //mov: the mobility function, computed as ALAP - ASAP
+  //scheTime: the scheduled time if this node has been scheduled 
+  //earlyStart: the earliest time to be tried to schedule the node
+  //lateStart: the latest time to be tried to schedule the node
+  int ASAP, ALAP, depth, height, mov;
+  int schTime;
+  int earlyStart,lateStart;
+
+public:
+  
+  //get the instruction
+  const Instruction* getInst() const { return inst;}
+
+  //get the instruction op-code name
+  const char* getInstOpcodeName() const{ return inst->getOpcodeName();}
+  
+  //get the instruction op-code 
+  const unsigned getInstOpcode() const { return inst->getOpcode();}
+
+  //return whether the node is NULL
+  bool isNullNode() const{ return(inst== NULL);}
+
+  //return whether the property of the node has been computed
+  bool getPropertyComputed() {return propertyComputed;}
+
+  //set the propertyComputed
+  void setPropertyComputed(bool _propertyComputed) {propertyComputed = _propertyComputed;}
+
+  //get the corresponding property
+  int getASAP(){ return ASAP;}
+  int getALAP(){ return ALAP;}
+  int getMov() { return mov;}
+  int getDepth(){return depth;}
+  int getHeight(){return height;}
+  int getSchTime(){return schTime;}
+  int getEarlyStart(){return earlyStart;}
+  int getLateStart() { return lateStart;}
+  void setEarlyStart(int _earlyStart) {earlyStart= _earlyStart;}
+  void setLateStart(int _lateStart) {lateStart= _lateStart;}
+  void setSchTime(int _time){schTime=_time;}
+
+ private:
+  friend class ModuloSchedGraph;
+  friend class SchedGraphNode;
+
+  //constructor:
+  //nodeId: the node id, unique within the each BasicBlock
+  //_bb: which BasicBlock the corresponding instruction belongs to 
+  //_inst: the corresponding instruction
+  //indexInBB: the corresponding instruction's index in the BasicBlock
+  //target: the targetMachine
+  ModuloSchedGraphNode(unsigned int _nodeId,
+                   const BasicBlock* _bb,
+                   const  Instruction* _inst,
+                   int indexInBB,
+                   const TargetMachine& target);
+  
+  
+  friend std::ostream& operator<<(std::ostream& os,const ModuloSchedGraphNode& edge);
+  
+};
+
+
+
+
+
+//FIXME: these two value should not be used
+#define MAXNODE 100
+#define MAXCC   100
+
+
+
+//===----------------------------------------------------------------------===//
+/// ModuloSchedGraph- the data structure to store dependence between nodes
+/// it catches data dependence and constrol dependence
+/// 
+/// 
+
+class ModuloSchedGraph:
+  public SchedGraphCommon,
+  protected hash_map<const Instruction*, ModuloSchedGraphNode*>
+{
+  
+private: 
+  //iteration Interval
+  int MII;
+  
+  //target machine
+  const TargetMachine& target;
+
+  //the circuits in the dependence graph
+  unsigned circuits[MAXCC][MAXNODE];
+
+  //the order nodes
+  std::vector<ModuloSchedGraphNode*> oNodes;
+
+  typedef std::vector<ModuloSchedGraphNode*> NodeVec;
+
+  //the function to compute properties
+  void computeNodeASAP(const BasicBlock* bb);
+  void computeNodeALAP(const BasicBlock* bb); 
+  void computeNodeMov(const BasicBlock* bb);
+  void computeNodeDepth(const BasicBlock* bb);
+  void computeNodeHeight(const BasicBlock* bb);
+
+  //the function to compute node property
+  void computeNodeProperty(const BasicBlock* bb);
+
+  //the function to sort nodes
+  void orderNodes();
+  //add the resource usage 
+  void addResourceUsage(std::vector<pair<int,int> >&, int);
+
+  //debug functions:
+  //dump circuits
+  void dumpCircuits();
+  //dump the input set of nodes
+  void dumpSet(std::vector<ModuloSchedGraphNode*> set);
+  //dump the input resource usage table  
+  void dumpResourceUsage(std::vector<pair<int,int> >&);
+  
+ public:
+  //help functions
+  
+  //get the maxium the delay between two nodes
+  SchedGraphEdge* getMaxDelayEdge(unsigned srcId, unsigned sinkId);  
+
+  //FIXME:
+  //get the predessor Set of the set
+  NodeVec predSet(NodeVec set, unsigned, unsigned);
+  NodeVec predSet(NodeVec set);
+
+  //get the predessor set of the node
+  NodeVec predSet(ModuloSchedGraphNode* node, unsigned,unsigned);
+  NodeVec predSet(ModuloSchedGraphNode* node);
+
+  //get the successor set of the set
+  NodeVec succSet(NodeVec set, unsigned, unsigned);
+  NodeVec succSet(NodeVec set);
+  
+  //get the succssor set of the node
+  NodeVec succSet(ModuloSchedGraphNode* node,unsigned, unsigned);
+  NodeVec succSet(ModuloSchedGraphNode* node);
+
+  //return the uniton of the two vectors
+  NodeVec vectorUnion(NodeVec set1,NodeVec set2 );
+  
+  //return the consjuction of the two vectors
+  NodeVec vectorConj(NodeVec set1,NodeVec set2 );
+  
+  //return all nodes in  set1 but not  set2
+  NodeVec vectorSub(NodeVec set1, NodeVec set2);
+
+  typedef hash_map<const Instruction*, ModuloSchedGraphNode*> map_base;
+public:
+  using map_base::iterator;
+  using map_base::const_iterator;
+  
+public:
+
+  //get target machine
+  const TargetMachine& getTarget(){return target;}
+
+  //get the iteration interval
+  const int getMII(){return MII;} 
+
+  //get the ordered nodes
+  const NodeVec& getONodes(){return oNodes;}
+
+  //get the number of nodes (including the root and leaf)
+  //note: actually root and leaf is not used
+  const unsigned int getNumNodes() const {return size()+2;}
+
+  //return wether the BasicBlock 'bb' contains a loop
+  bool isLoop                         (const BasicBlock* bb);
+
+  //return this basibBlock contains a loop
+  bool isLoop                         ();
+  
+
+  //return the node for the input instruction
+  ModuloSchedGraphNode* getGraphNodeForInst(const Instruction* inst) const{
+    const_iterator onePair = this->find(inst);
+    return (onePair != this->end())?(*onePair).second: NULL;
+  }
+  
+  //Debugging support
+  //dump the graph
+  void dump() const;
+  //dump the basicBlock
+  void dump(const BasicBlock* bb);
+  //dump the basicBlock into 'os' stream
+  void dump(const BasicBlock* bb, std::ostream& os);
+  //dump the node property
+  void dumpNodeProperty() const ;
+
+ private:
+  friend class ModuloSchedGraphSet; //give access to ctor
+  
+ public:
+  /*ctr*/
+  ModuloSchedGraph(const BasicBlock* bb, const TargetMachine& _target)
+    :SchedGraphCommon(bb), target(_target){
+    buildGraph(target);
+  }
+
+  /*dtr*/
+  ~ModuloSchedGraph(){
+    for(const_iterator I=begin(); I!=end(); ++I)
+      delete I->second;
+  }
+  
+  //unorder iterators
+  //return values are pair<const Instruction*, ModuloSchedGraphNode*>
+  using map_base::begin;
+  using map_base::end;
+
+
+  
+  inline void noteModuloSchedGraphNodeForInst(const Instruction* inst,
+                                  ModuloSchedGraphNode* node)
+    {
+      assert((*this)[inst] ==NULL);
+      (*this)[inst]=node;
+    }
+  
+  //Graph builder
+  
+  ModuloSchedGraphNode* getNode          (const unsigned nodeId) const;      
+
+  //build the graph from the basicBlock
+  void buildGraph                     (const TargetMachine& target);
+
+  //Build nodes for BasicBlock
+  void buildNodesforBB                (const TargetMachine& target,
+                                      const BasicBlock* bb,
+                                      NodeVec& memNode,
+                                      RegToRefVecMap& regToRefVecMap,
+                                      ValueToDefVecMap& valueToDefVecMap);
+
+  //find definitiona and use information for all nodes
+  void findDefUseInfoAtInstr           (const TargetMachine& target,
+                                       ModuloSchedGraphNode* node,
+                                      NodeVec& memNode,
+                                      RegToRefVecMap& regToRefVecMap,
+                                      ValueToDefVecMap& valueToDefVecMap);
+
+  //add def-use edge
+  void addDefUseEdges                 (const BasicBlock* bb);
+
+  //add control dependence edges
+  void addCDEdges                     (const BasicBlock* bb);
+
+  //add memory dependence dges
+  void addMemEdges                    (const BasicBlock* bb);
+
+  //add dummy edges
+  void addDummyEdges();
+
+  //computer source restrictoin II
+  int  computeResII                   (const BasicBlock* bb);
+
+  //computer recurrence II
+  int  computeRecII                   (const BasicBlock* bb);
+};
+
+///==================================-
+//gragh set
+
+class ModuloSchedGraphSet:
+  public std::vector<ModuloSchedGraph*>
+{
+private:
+  const Function* method;
+  
+public:
+  typedef std::vector<ModuloSchedGraph*> baseVector;
+  using baseVector::iterator;
+  using baseVector::const_iterator;
+  
+public:
+  /*ctor*/ ModuloSchedGraphSet          (const Function* function, const TargetMachine& target);
+  
+  /*dtor*/ ~ModuloSchedGraphSet          ();
+  
+  //iterators
+  using baseVector::begin;
+  using baseVector::end;
+
+
+  //Debugging support
+  void  dump() const;
+  
+private:
+  inline void addGraph(ModuloSchedGraph* graph){
+    assert(graph !=NULL);
+    this->push_back(graph);
+  }
+  
+  //Graph builder
+  void buildGraphsForMethod (const Function *F, const TargetMachine& target);
+};
+
+#endif 
diff --git a/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp b/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp
new file mode 100644 (file)
index 0000000..3f4002a
--- /dev/null
@@ -0,0 +1,899 @@
+
+//===- SPLInstrScheduling.cpp - Modulo Software Pipelining Instruction Scheduling support -------===//
+//
+// this file implements the llvm/CodeGen/ModuloScheduling.h interface
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineCodeForInstruction.h"
+#include "llvm/CodeGen/MachineCodeForBasicBlock.h"
+#include "llvm/CodeGen/MachineCodeForMethod.h"
+#include "llvm/Analysis/LiveVar/FunctionLiveVarInfo.h" // FIXME: Remove when modularized better
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instruction.h"
+#include "Support/CommandLine.h"
+#include <algorithm>
+#include "ModuloSchedGraph.h"
+#include "ModuloScheduling.h"
+#include "llvm/Target/MachineSchedInfo.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/iTerminators.h"
+#include "llvm/iPHINode.h"
+#include "llvm/Constants.h"
+#include <iostream>
+#include <swig.h>
+#include <fstream>
+#include "llvm/CodeGen/InstrSelection.h"
+
+#define max(x,y) (x>y?x:y)
+#define min(x,y) (x<y?x:y)
+using std::cerr;
+using std::cout;
+using std::ostream;
+using std::ios;
+using std::filebuf;
+
+//************************************************************
+//printing Debug information
+//ModuloSchedDebugLevel stores the value of debug level
+// modsched_os is the ostream to dump debug information, which is written into the file 'moduloSchedDebugInfo.output'
+//see ModuloSchedulingPass::runOnFunction()
+//************************************************************
+
+ModuloSchedDebugLevel_t ModuloSchedDebugLevel;
+static cl::opt<ModuloSchedDebugLevel_t, true>
+SDL_opt("modsched", cl::Hidden, cl::location(ModuloSchedDebugLevel),
+        cl::desc("enable modulo scheduling debugging information"),
+        cl::values(
+ clEnumValN(ModuloSched_NoDebugInfo,      "n", "disable debug output"),
+ clEnumValN(ModuloSched_Disable,        "off", "disable modulo scheduling"),
+ clEnumValN(ModuloSched_PrintSchedule,  "psched", "print original and new schedule"),
+ clEnumValN(ModuloSched_PrintScheduleProcess,"pschedproc", "print how the new schdule is produced"),
+                   0));
+
+filebuf modSched_fb;
+ostream modSched_os(&modSched_fb);
+
+//************************************************************
+
+
+///the method to compute schedule and instert epilogue and prologue
+void ModuloScheduling::instrScheduling(){
+  
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+    modSched_os<<"*************************computing modulo schedule ************************\n";
+  
+  
+  const MachineSchedInfo& msi=target.getSchedInfo();
+
+  //number of issue slots in the in each cycle
+  int numIssueSlots=msi.maxNumIssueTotal;
+
+
+
+  //compute the schedule
+  bool success=false;
+  while(!success)
+    {
+      //clear memory from the last round and initialize if necessary
+      clearInitMem(msi);
+
+      //compute schedule and coreSchedule with the current II
+      success=computeSchedule();
+      
+      if(!success){
+       II++;
+       if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+         modSched_os<<"increase II  to "<<II<<"\n";
+      }
+    }
+  
+  //print the final schedule if necessary
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintSchedule)
+    dumpScheduling();
+  
+
+  //the schedule has been computed
+  //create epilogue, prologue and kernel BasicBlock
+  //find the successor for this BasicBlock
+  BasicBlock* succ_bb= getSuccBB(bb); 
+  
+  //print the original BasicBlock if necessary
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintSchedule){
+    modSched_os<<"dumping the orginal block\n";  
+    graph.dump(bb);
+  }
+
+  //construction of prologue, kernel and epilogue
+  BasicBlock* kernel=bb->splitBasicBlock(bb->begin());
+  BasicBlock* prologue= bb;
+  BasicBlock* epilogue=kernel->splitBasicBlock(kernel->begin());
+  
+  
+  //construct prologue
+  constructPrologue(prologue);
+
+  //construct kernel
+  constructKernel(prologue,kernel,epilogue);
+
+  //construct epilogue
+  constructEpilogue(epilogue,succ_bb);
+
+  //print the BasicBlocks if necessary
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintSchedule){
+    modSched_os<<"dumping the prologue block:\n";
+    graph.dump(prologue);
+    modSched_os<<"dumping the kernel block\n";
+    graph.dump(kernel);
+    modSched_os<<"dumping the epilogue block\n";
+    graph.dump(epilogue);
+  }
+  
+}    
+
+//clear memory from the last round and initialize if necessary
+void ModuloScheduling::clearInitMem(const MachineSchedInfo& msi){
+  
+
+  unsigned numIssueSlots = msi.maxNumIssueTotal;
+  //clear nodeScheduled from the last round
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+    modSched_os<< "***** new round  with II= "<<II<<" *******************"<<endl;
+    modSched_os<< " **************clear the vector nodeScheduled**************** \n";
+  }
+  nodeScheduled.clear();
+  
+  
+  //clear resourceTable from the last round and reset it 
+  resourceTable.clear();
+  for(unsigned i=0;i< II;i++)
+    resourceTable.push_back(msi.resourceNumVector);
+  
+  
+  //clear the schdule and coreSchedule from the last round 
+  schedule.clear();
+  coreSchedule.clear();
+  
+  //create a coreSchedule of size II*numIssueSlots
+  //each entry is NULL
+  while( coreSchedule.size() <  II){
+    std::vector<ModuloSchedGraphNode*>* newCycle=new  std::vector<ModuloSchedGraphNode*>();
+    for(unsigned k=0;k<numIssueSlots;k++)
+      newCycle->push_back(NULL);
+    coreSchedule.push_back(*newCycle);
+  }  
+}
+
+
+//compute schedule and coreSchedule with the current II
+bool ModuloScheduling::computeSchedule(){
+  
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+    modSched_os <<"start to compute schedule \n";
+  
+  //loop over the ordered nodes
+  for(NodeVec::const_iterator I=oNodes.begin();I!=oNodes.end();I++)
+    {
+      //try to schedule for node I
+      if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       dumpScheduling();
+      ModuloSchedGraphNode* node=*I;
+
+      //compute whether this node has successor(s)
+      bool succ=true;
+
+      //compute whether this node has predessor(s)
+      bool pred=true;
+
+      NodeVec schSucc=graph.vectorConj(nodeScheduled,graph.succSet(node));
+      if(schSucc.empty())
+       succ=false;
+      NodeVec schPred=graph.vectorConj(nodeScheduled,graph.predSet(node));   
+      if(schPred.empty())
+       pred=false;
+      
+      //startTime: the earliest time we will try to schedule this node
+      //endTime: the latest time we will try to schedule this node
+      int startTime, endTime;
+
+      //node's earlyStart: possible earliest time to schedule this node
+      //node's lateStart: possible latest time to schedule this node
+      node->setEarlyStart(-1);
+      node->setLateStart(9999);
+      
+
+      //this node has predessor but no successor
+      if(!succ && pred){
+
+       //this node's earlyStart is it's predessor's schedule time + the edge delay 
+       // - the iteration difference* II    
+       for(unsigned i=0;i<schPred.size();i++){
+         ModuloSchedGraphNode* predNode=schPred[i];
+         SchedGraphEdge* edge=graph.getMaxDelayEdge(predNode->getNodeId(),node->getNodeId());
+         int temp=predNode->getSchTime()+edge->getMinDelay() - edge->getIteDiff()*II;
+         node->setEarlyStart( max(node->getEarlyStart(),temp));
+       }
+       startTime=node->getEarlyStart();
+       endTime=node->getEarlyStart()+II-1;
+      }
+      
+
+      //this node has successor but no predessor
+      if(succ && !pred){
+       for(unsigned i=0;i<schSucc.size();i++){
+         ModuloSchedGraphNode* succNode=schSucc[i];
+         SchedGraphEdge* edge=graph.getMaxDelayEdge(succNode->getNodeId(),node->getNodeId());
+         int temp=succNode->getSchTime() - edge->getMinDelay() + edge->getIteDiff()*II;
+         node->setLateStart(min(node->getEarlyStart(),temp));
+       }
+       startTime=node->getLateStart()- II+1;
+       endTime=node->getLateStart();
+      }
+
+      //this node has both successors and predessors
+      if(succ && pred)
+       {
+         for(unsigned i=0;i<schPred.size();i++){
+           ModuloSchedGraphNode* predNode=schPred[i];
+           SchedGraphEdge* edge=graph.getMaxDelayEdge(predNode->getNodeId(),node->getNodeId());
+           int temp=predNode->getSchTime()+edge->getMinDelay() - edge->getIteDiff()*II;
+           node->setEarlyStart(max(node->getEarlyStart(),temp));
+         }
+         for(unsigned i=0;i<schSucc.size();i++){
+           ModuloSchedGraphNode* succNode=schSucc[i];
+           SchedGraphEdge* edge=graph.getMaxDelayEdge(succNode->getNodeId(),node->getNodeId());
+           int temp=succNode->getSchTime() - edge->getMinDelay() + edge->getIteDiff()*II;
+           node->setLateStart(min(node->getEarlyStart(),temp));
+         }
+         startTime=node->getEarlyStart();
+         endTime=min(node->getLateStart(),node->getEarlyStart()+((int)II)-1);
+       }
+      
+      //this node has no successor or predessor
+      if(!succ && !pred){
+       node->setEarlyStart(node->getASAP());
+       startTime=node->getEarlyStart();
+       endTime=node->getEarlyStart()+II -1;
+      }
+
+      //try to schedule this node based on the startTime and endTime
+      if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os<<"scheduling the node "<<(*I)->getNodeId()<<"\n";      
+
+      bool success= this->ScheduleNode(node,startTime, endTime,nodeScheduled);
+      if(!success)return false;
+    }
+  return true;
+}
+
+
+//get the successor of the BasicBlock
+BasicBlock* ModuloScheduling::getSuccBB(BasicBlock* bb){
+
+  BasicBlock* succ_bb;
+  for(unsigned i=0;i < II; i++)
+    for(unsigned j=0;j< coreSchedule[i].size();j++)
+      if(coreSchedule[i][j]){
+       const Instruction* ist=coreSchedule[i][j]->getInst();
+       
+       //we can get successor from the BranchInst instruction
+       //assume we only have one successor (besides itself) here
+       if(BranchInst::classof(ist)){
+         BranchInst* bi=(BranchInst*)ist;
+         assert(bi->isConditional()&&"the branchInst is not a conditional one");
+         assert(bi->getNumSuccessors() ==2&&" more than two successors?");
+         BasicBlock* bb1=bi->getSuccessor(0);
+         BasicBlock* bb2=bi->getSuccessor(1);
+         assert( (bb1 ==  bb|| bb2 == bb) && " None of its successor is itself?");
+         if(bb1 == bb) succ_bb=bb2;
+         else succ_bb=bb1;
+         return succ_bb;
+       }
+      }
+  assert( 0 && "NO Successor?");
+  return NULL;
+}
+
+
+//get the predecessor of the BasicBlock
+BasicBlock* ModuloScheduling::getPredBB(BasicBlock* bb){
+
+  BasicBlock* pred_bb;
+
+  for(unsigned i=0;i < II; i++)
+    for(unsigned j=0;j< coreSchedule[i].size();j++)
+      if(coreSchedule[i][j]){
+       const Instruction* ist=coreSchedule[i][j]->getInst();
+       
+       //we can get predecessor from the PHINode instruction
+       //assume we only have one predecessor (besides itself) here
+       if(PHINode::classof(ist)){
+         PHINode* phi=(PHINode*) ist;
+         assert(phi->getNumIncomingValues() == 2 &&" the number of incoming value is not equal to two? ");
+         BasicBlock* bb1= phi->getIncomingBlock(0);
+         BasicBlock* bb2= phi->getIncomingBlock(1);
+         assert( (bb1 ==  bb || bb2 == bb) && " None of its predecessor is itself?");
+         if(bb1 == bb) pred_bb=bb2;
+         else pred_bb=bb1;       
+         return pred_bb;
+       }
+      }
+  assert(0 && " no predecessor?");
+  return NULL;
+}
+
+
+//construct the prologue
+void ModuloScheduling::constructPrologue(BasicBlock* prologue){
+  
+  InstListType& prologue_ist = prologue->getInstList();
+  vvNodeType& tempSchedule_prologue= *(new vector< std::vector<ModuloSchedGraphNode*> >(schedule));
+  
+  //compute the schedule for prologue
+  unsigned round=0;
+  unsigned scheduleSize=schedule.size();
+  while(round < scheduleSize/II){
+    round++;
+    for(unsigned i=0;i < scheduleSize ;i++){
+      if(round*II + i >= scheduleSize) break;
+      for(unsigned j=0;j < schedule[i].size(); j++) 
+       if(schedule[i][j]){
+         assert( tempSchedule_prologue[round*II +i ][j] == NULL && "table not consitant with core table");
+         
+         //move  the schedule one iteration ahead and overlap with the original one
+         tempSchedule_prologue[round*II + i][j]=schedule[i][j];
+       }
+    }
+  }
+
+  //clear the clone memory in the core schedule instructions
+  clearCloneMemory();
+  
+  //fill in the prologue
+  for(unsigned i=0;i < ceil(1.0*scheduleSize/II -1)*II ;i++)
+    for(unsigned j=0;j < tempSchedule_prologue[i].size();j++)
+      if(tempSchedule_prologue[i][j]){
+
+       //get the instruction
+       Instruction* orn=(Instruction*)tempSchedule_prologue[i][j]->getInst();
+
+       //made a clone of it
+       Instruction* cln=cloneInstSetMemory(orn);
+
+       //insert the instruction
+       prologue_ist.insert(prologue_ist.back(),cln );
+
+       //if there is PHINode in the prologue, the incoming value from itself should be removed
+       //because it is not a loop any longer
+       if( PHINode::classof(cln)){
+         PHINode* phi=(PHINode*)cln;
+         phi->removeIncomingValue(phi->getParent());
+       }
+      }
+}
+
+
+//construct the kernel BasicBlock
+void ModuloScheduling::constructKernel(BasicBlock* prologue,BasicBlock* kernel,BasicBlock* epilogue){
+
+  //*************fill instructions in the kernel****************
+  InstListType& kernel_ist   = kernel->getInstList();
+  BranchInst* brchInst;
+  PHINode* phiInst, *phiCln;
+
+  for(unsigned i=0;i<coreSchedule.size();i++)
+    for(unsigned j=0;j<coreSchedule[i].size();j++)
+      if(coreSchedule[i][j]){
+       
+       //we should take care of branch instruction differently with normal instructions
+       if(BranchInst::classof(coreSchedule[i][j]->getInst())){
+         brchInst=(BranchInst*)coreSchedule[i][j]->getInst();
+         continue;
+       }
+       
+       //we should take care of PHINode instruction differently with normal instructions
+       if( PHINode::classof(coreSchedule[i][j]->getInst())){
+         phiInst= (PHINode*)coreSchedule[i][j]->getInst();
+         Instruction* cln=cloneInstSetMemory(phiInst);
+         kernel_ist.insert(kernel_ist.back(),cln);
+         phiCln=(PHINode*)cln;
+         continue;
+       }
+       
+       //for normal instructions: made a clone and insert it in the kernel_ist
+       Instruction* cln=cloneInstSetMemory( (Instruction*)coreSchedule[i][j]->getInst());
+       kernel_ist.insert(kernel_ist.back(),cln);
+      }
+
+  //the two incoming BasicBlock for PHINode is the prologue and the kernel (itself)
+  phiCln->setIncomingBlock(0,prologue);
+  phiCln->setIncomingBlock(1,kernel);
+
+  //the incoming value for the kernel (itself) is the new value which is computed in the kernel
+  Instruction* originalVal=(Instruction*)phiInst->getIncomingValue(1);
+  phiCln->setIncomingValue(1, originalVal->getClone());
+  
+
+  //make a clone of the branch instruction and insert it in the end
+  BranchInst* cln=(BranchInst*)cloneInstSetMemory( brchInst);
+  kernel_ist.insert(kernel_ist.back(),cln);
+
+  //delete the unconditional branch instruction, which is generated when splitting the basicBlock
+  kernel_ist.erase( --kernel_ist.end());
+
+  //set the first successor to itself
+  ((BranchInst*)cln)->setSuccessor(0, kernel);
+  //set the second successor to eiplogue
+  ((BranchInst*)cln)->setSuccessor(1,epilogue);
+
+  //*****change the condition*******
+
+  //get the condition instruction
+  Instruction* cond=(Instruction*)cln->getCondition();
+
+  //get the condition's second operand, it should be a constant
+  Value* operand=cond->getOperand(1);
+  assert(ConstantSInt::classof(operand));
+
+  //change the constant in the condtion instruction
+  ConstantSInt* iteTimes=ConstantSInt::get(operand->getType(),((ConstantSInt*)operand)->getValue()-II+1);
+  cond->setOperand(1,iteTimes);
+
+}
+
+
+
+
+
+//construct the epilogue 
+void ModuloScheduling::constructEpilogue(BasicBlock* epilogue, BasicBlock* succ_bb){
+  
+  //compute the schedule for epilogue
+  vvNodeType& tempSchedule_epilogue= *(new vector< std::vector<ModuloSchedGraphNode*> >(schedule));
+  unsigned scheduleSize=schedule.size();
+  int round =0;
+  while(round < ceil(1.0*scheduleSize/II )-1 ){
+    round++;
+    for( unsigned i=0;i < scheduleSize ; i++){
+      if(i + round *II >= scheduleSize) break;
+      for(unsigned j=0;j < schedule[i].size();j++)
+       if(schedule[i + round*II ][j]){
+         assert( tempSchedule_epilogue[i][j] == NULL && "table not consitant with core table");
+
+         //move the schdule one iteration behind and overlap
+         tempSchedule_epilogue[i][j]=schedule[i + round*II][j];
+       }
+    }
+  }
+  
+  //fill in the epilogue
+  InstListType& epilogue_ist = epilogue->getInstList();
+  for(unsigned i=II;i <scheduleSize ;i++)
+    for(unsigned j=0;j < tempSchedule_epilogue[i].size();j++)
+      if(tempSchedule_epilogue[i][j]){
+       Instruction* inst=(Instruction*)tempSchedule_epilogue[i][j]->getInst();
+
+       //BranchInst and PHINode should be treated differently
+       //BranchInst:unecessary, simly omitted
+       //PHINode: omitted
+       if( !BranchInst::classof(inst) && ! PHINode::classof(inst) ){
+         //make a clone instruction and insert it into the epilogue
+         Instruction* cln=cloneInstSetMemory(inst);    
+         epilogue_ist.push_front(cln);
+       }
+      }
+
+
+  //*************delete the original instructions****************//
+  //to delete the original instructions, we have to make sure their use is zero
+  
+  //update original core instruction's uses, using its clone instread
+  for(unsigned i=0;i < II; i++)
+    for(unsigned j=0;j < coreSchedule[i].size() ;j++){
+      if(coreSchedule[i][j])
+       updateUseWithClone((Instruction*)coreSchedule[i][j]->getInst() );
+    }
+  
+  //erase these instructions
+  for(unsigned i=0;i < II; i++)
+    for(unsigned j=0;j < coreSchedule[i].size();j++)
+      if(coreSchedule[i][j]){
+       Instruction* ist=(Instruction*)coreSchedule[i][j]->getInst();
+       ist->getParent()->getInstList().erase(ist);
+      }
+  //**************************************************************//
+
+
+  //finally, insert an unconditional branch instruction at the end
+  epilogue_ist.push_back(new BranchInst(succ_bb));
+  
+}
+
+
+//----------------------------------------------------------------------------------------------
+//this function replace the value(instruction) ist in other instructions with its latest clone
+//i.e. after this function is called, the ist is not used anywhere and it can be erased.
+//----------------------------------------------------------------------------------------------
+void ModuloScheduling::updateUseWithClone(Instruction* ist){
+  
+  while(ist->use_size() >0){
+    bool destroyed=false;
+    
+    //other instruction is using this value ist
+    assert(Instruction::classof(*ist->use_begin()));
+    Instruction *inst=(Instruction*)(* ist->use_begin());
+
+    for(unsigned i=0;i<inst->getNumOperands();i++)
+      if(inst->getOperand(i) == ist && ist->getClone()){
+
+       //if the instruction is TmpInstruction, simly delete it because it has no parent
+       // and it does not belongs to any BasicBlock
+       if(TmpInstruction::classof(inst)) {
+         delete inst;
+         destroyed=true;
+         break;
+       }
+
+
+       //otherwise, set the instruction's operand to the value's clone
+       inst->setOperand(i, ist->getClone());
+
+       //the use from the original value ist is destroyed
+       destroyed=true;
+       break;
+      }
+    if( !destroyed)
+      {
+       //if the use can not be destroyed , something is wrong
+       inst->dump();
+       assert( 0  &&"this use can not be destroyed"); 
+      }
+  }
+  
+}
+
+
+//********************************************************
+//this function clear all clone mememoy
+//i.e. set all instruction's clone memory to NULL
+//*****************************************************
+void ModuloScheduling::clearCloneMemory(){
+for(unsigned i=0; i < coreSchedule.size();i++)
+  for(unsigned j=0;j<coreSchedule[i].size();j++)
+    if(coreSchedule[i][j]) ((Instruction*)coreSchedule[i][j]->getInst())->clearClone();
+}
+
+
+//********************************************************************************
+//this function make a clone of the instruction orn
+//the cloned instruction will use the orn's operands' latest clone as its operands
+//it is done this way because LLVM is in SSA form and we should use the correct value
+//
+//this fuction also update the instruction orn's latest clone memory
+//**********************************************************************************
+Instruction*  ModuloScheduling::cloneInstSetMemory(Instruction* orn) {
+
+//make a clone instruction
+  Instruction* cln=orn->clone();
+  
+
+  //update the operands
+  for(unsigned k=0;k<orn->getNumOperands();k++){
+    const Value* op=orn->getOperand(k);
+    if(Instruction::classof(op) && ((Instruction*)op)->getClone()){      
+      Instruction* op_inst=(Instruction*)op;
+      cln->setOperand(k, op_inst->getClone());
+    }
+  }
+
+  //update clone memory
+  orn->setClone(cln);
+  return cln;
+}
+
+
+
+bool ModuloScheduling::ScheduleNode(ModuloSchedGraphNode* node,unsigned start, unsigned end, NodeVec& nodeScheduled)
+{
+  
+  const MachineSchedInfo& msi=target.getSchedInfo();
+  unsigned int numIssueSlots=msi.maxNumIssueTotal;
+
+  if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+    modSched_os<<"startTime= "<<start<<" endTime= "<<end<<"\n";
+  bool isScheduled=false;
+  for(unsigned i=start;i<= end;i++){
+    if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+      modSched_os<< " now try cycle " <<i<<":"<<"\n"; 
+    for(unsigned j=0;j<numIssueSlots;j++){
+      unsigned int core_i = i%II;
+      unsigned int core_j=j;
+      if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+       modSched_os <<"\t Trying slot "<<j<<"...........";
+      //check the resouce table, make sure there is no resource conflicts
+      const Instruction* instr=node->getInst();
+      MachineCodeForInstruction& tempMvec=  MachineCodeForInstruction::get(instr);
+      bool resourceConflict=false;
+      const MachineInstrInfo &mii=msi.getInstrInfo();
+      
+      if(coreSchedule.size() < core_i+1 || !coreSchedule[core_i][core_j]){
+       //this->dumpResourceUsageTable();
+       int latency=0;
+       for(unsigned k=0;k< tempMvec.size();k++)
+         {
+           MachineInstr* minstr=tempMvec[k];
+           InstrRUsage rUsage=msi.getInstrRUsage(minstr->getOpCode());
+           std::vector<std::vector<resourceId_t> > resources
+             =rUsage.resourcesByCycle;
+           updateResourceTable(resources,i + latency);
+           latency +=max(mii.minLatency(minstr->getOpCode()),1) ;
+         }
+       
+       //this->dumpResourceUsageTable();
+       
+       latency=0;
+       if( resourceTableNegative()){
+         
+         //undo-update the resource table
+         for(unsigned k=0;k< tempMvec.size();k++){
+           MachineInstr* minstr=tempMvec[k];
+           InstrRUsage rUsage=msi.getInstrRUsage(minstr->getOpCode());
+           std::vector<std::vector<resourceId_t> > resources
+             =rUsage.resourcesByCycle;
+           undoUpdateResourceTable(resources,i + latency);
+           latency +=max(mii.minLatency(minstr->getOpCode()),1) ;
+         }
+         resourceConflict=true;
+       }
+      }
+      if( !resourceConflict &&  !coreSchedule[core_i][core_j]){
+       if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){
+         modSched_os <<" OK!"<<"\n";
+         modSched_os<<"Node "<<node->getNodeId()<< " is scheduleed."<<"\n";
+       }
+       //schedule[i][j]=node;
+       while(schedule.size() <= i){
+         std::vector<ModuloSchedGraphNode*>* newCycle=new  std::vector<ModuloSchedGraphNode*>();
+         for(unsigned  k=0;k<numIssueSlots;k++)
+           newCycle->push_back(NULL);
+         schedule.push_back(*newCycle);
+       }
+       vector<ModuloSchedGraphNode*>::iterator startIterator;
+       startIterator = schedule[i].begin();
+       schedule[i].insert(startIterator+j,node);
+       startIterator = schedule[i].begin();
+       schedule[i].erase(startIterator+j+1);
+
+       //update coreSchedule
+       //coreSchedule[core_i][core_j]=node;
+       while(coreSchedule.size() <= core_i){
+         std::vector<ModuloSchedGraphNode*>* newCycle=new  std::vector<ModuloSchedGraphNode*>();
+         for(unsigned k=0;k<numIssueSlots;k++)
+           newCycle->push_back(NULL);
+         coreSchedule.push_back(*newCycle);
+       }
+
+       startIterator = coreSchedule[core_i].begin();
+       coreSchedule[core_i].insert(startIterator+core_j,node);
+       startIterator = coreSchedule[core_i].begin();
+       coreSchedule[core_i].erase(startIterator+core_j+1);
+
+       node->setSchTime(i);
+       isScheduled=true;
+       nodeScheduled.push_back(node);
+       
+       break;
+      }
+      else if( coreSchedule[core_i][core_j]) {
+       if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+         modSched_os <<" Slot not available "<<"\n";
+      }
+      else{
+       if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess)
+         modSched_os <<" Resource conflicts"<<"\n";
+      }
+    }
+    if(isScheduled) break;
+  }
+  //assert(nodeScheduled &&"this node can not be scheduled?");
+  return isScheduled;
+}
+
+void ModuloScheduling::updateResourceTable(std::vector<std::vector<unsigned int> > useResources, int startCycle){
+  for(unsigned i=0;i< useResources.size();i++){
+    int absCycle=startCycle+i;
+    int coreCycle=absCycle % II;
+    std::vector<pair<int,int> >& resourceRemained=resourceTable[coreCycle];
+    std::vector<unsigned int>& resourceUsed= useResources[i];
+    for(unsigned j=0;j< resourceUsed.size();j++){
+      for(unsigned k=0;k< resourceRemained.size();k++)
+       if((int)resourceUsed[j] == resourceRemained[k].first){
+         resourceRemained[k].second--;
+       }
+    }
+  }
+}
+
+void ModuloScheduling::undoUpdateResourceTable(std::vector<std::vector<unsigned int> > useResources, int startCycle){
+  for(unsigned i=0;i< useResources.size();i++){
+    int absCycle=startCycle+i;
+    int coreCycle=absCycle % II;
+    std::vector<pair<int,int> >& resourceRemained=resourceTable[coreCycle];
+    std::vector<unsigned int>& resourceUsed= useResources[i];
+    for(unsigned j=0;j< resourceUsed.size();j++){
+      for(unsigned k=0;k< resourceRemained.size();k++)
+       if((int)resourceUsed[j] == resourceRemained[k].first){
+         resourceRemained[k].second++;
+       }
+    }
+  }
+}
+
+
+//-----------------------------------------------------------------------
+//Function: resouceTableNegative
+//return value:
+//          return false if any element in the resouceTable is negative
+//          otherwise return true
+//Purpose:
+//          this function is used to determine if an instruction is eligible for schedule at certain cycle
+//---------------------------------------------------------------------------------------
+
+bool ModuloScheduling::resourceTableNegative(){
+  assert(resourceTable.size() == (unsigned)II&& "resouceTable size must be equal to II");
+  bool isNegative=false;
+  for(unsigned i=0; i < resourceTable.size();i++)
+    for(unsigned j=0;j < resourceTable[i].size();j++){
+      if(resourceTable[i][j].second <0) {
+       isNegative=true;
+       break;
+      } 
+    }
+  return isNegative;
+}
+
+
+//----------------------------------------------------------------------
+//Function: dumpResouceUsageTable
+//Purpose:
+//          print out ResouceTable for debug
+//
+//------------------------------------------------------------------------
+
+void ModuloScheduling::dumpResourceUsageTable(){
+  modSched_os<<"dumping resource usage table"<<"\n";
+  for(unsigned i=0;i< resourceTable.size();i++){
+    for(unsigned j=0;j < resourceTable[i].size();j++)
+      modSched_os <<resourceTable[i][j].first<<":"<< resourceTable[i][j].second<<" ";
+    modSched_os <<"\n";
+  }
+  
+}
+
+//----------------------------------------------------------------------
+//Function: dumpSchedule
+//Purpose:
+//       print out thisSchedule for debug
+//
+//-----------------------------------------------------------------------
+void ModuloScheduling::dumpSchedule(std::vector< std::vector<ModuloSchedGraphNode*> > thisSchedule){
+  
+  const MachineSchedInfo& msi=target.getSchedInfo();
+  unsigned numIssueSlots=msi.maxNumIssueTotal;
+  for(unsigned i=0;i< numIssueSlots;i++)
+      modSched_os <<"\t#";
+  modSched_os<<"\n";
+  for(unsigned i=0;i < thisSchedule.size();i++)
+    {
+      modSched_os<<"cycle"<<i<<": ";
+      for(unsigned j=0;j<thisSchedule[i].size();j++)
+       if(thisSchedule[i][j]!= NULL)
+         modSched_os<<thisSchedule[i][j]->getNodeId()<<"\t";
+       else
+         modSched_os<<"\t";
+      modSched_os<<"\n";
+    }
+
+}
+
+
+//----------------------------------------------------
+//Function: dumpScheduling
+//Purpose:
+//   print out the schedule and coreSchedule for debug      
+//
+//-------------------------------------------------------
+
+void ModuloScheduling::dumpScheduling(){
+  modSched_os<<"dump schedule:"<<"\n";
+  const MachineSchedInfo& msi=target.getSchedInfo();
+  unsigned numIssueSlots=msi.maxNumIssueTotal;
+  for(unsigned i=0;i< numIssueSlots;i++)
+    modSched_os <<"\t#";
+  modSched_os<<"\n";
+  for(unsigned i=0;i < schedule.size();i++)
+    {
+      modSched_os<<"cycle"<<i<<": ";
+      for(unsigned j=0;j<schedule[i].size();j++)
+       if(schedule[i][j]!= NULL)
+         modSched_os<<schedule[i][j]->getNodeId()<<"\t";
+       else
+         modSched_os<<"\t";
+      modSched_os<<"\n";
+    }
+  
+  modSched_os<<"dump coreSchedule:"<<"\n";
+  for(unsigned i=0;i< numIssueSlots;i++)
+    modSched_os <<"\t#";
+  modSched_os<<"\n";
+  for(unsigned i=0;i < coreSchedule.size();i++){
+    modSched_os<<"cycle"<<i<<": ";
+    for(unsigned j=0;j< coreSchedule[i].size();j++)
+      if(coreSchedule[i][j] !=NULL)
+       modSched_os<<coreSchedule[i][j]->getNodeId()<<"\t";
+      else
+       modSched_os<<"\t";
+    modSched_os<<"\n";
+  }
+}
+
+
+
+//---------------------------------------------------------------------------
+// Function: ModuloSchedulingPass
+// 
+// Purpose:
+//   Entry point for Modulo Scheduling
+//   Schedules LLVM instruction
+//   
+//---------------------------------------------------------------------------
+
+namespace {
+  class ModuloSchedulingPass : public FunctionPass {
+    const TargetMachine &target;
+  public:
+    ModuloSchedulingPass(const TargetMachine &T) : target(T) {}
+    const char *getPassName() const { return "Modulo Scheduling"; }
+    
+    // getAnalysisUsage - We use LiveVarInfo...
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      //AU.addRequired(FunctionLiveVarInfo::ID);
+    }
+    bool runOnFunction(Function &F);
+  };
+} // end anonymous namespace
+
+
+
+bool ModuloSchedulingPass::runOnFunction(Function &F)
+{
+  //if necessary , open the output for debug purpose
+  if(ModuloSchedDebugLevel== ModuloSched_Disable)
+    return false;
+  
+  if(ModuloSchedDebugLevel>= ModuloSched_PrintSchedule){
+    modSched_fb.open("moduloSchedDebugInfo.output", ios::out);
+    modSched_os<<"******************Modula Scheduling debug information*************************"<<endl;
+  }
+  
+  ModuloSchedGraphSet* graphSet = new ModuloSchedGraphSet(&F,target);
+  ModuloSchedulingSet ModuloSchedulingSet(*graphSet);
+  
+  if(ModuloSchedDebugLevel>= ModuloSched_PrintSchedule)
+    modSched_fb.close();
+  
+  return false;
+}
+
+
+Pass *createModuloSchedulingPass(const TargetMachine &tgt) {
+  return new ModuloSchedulingPass(tgt);
+}
+
diff --git a/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h b/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h
new file mode 100644 (file)
index 0000000..9f9fcaa
--- /dev/null
@@ -0,0 +1,160 @@
+//// - head file for the classes ModuloScheduling and ModuloScheduling ----*- C++ -*-===//
+//
+// This header defines the the classes ModuloScheduling  and ModuloSchedulingSet 's structure
+// 
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_MODULOSCHEDULING_H
+#define LLVM_CODEGEN_MODULOSCHEDULING_H
+
+#include "ModuloSchedGraph.h"
+#include <iostream>
+
+class ModuloScheduling:NonCopyable {
+ private:
+  typedef std::vector<ModuloSchedGraphNode*> NodeVec;
+  
+  /// the graph to feed in
+  ModuloSchedGraph& graph;
+  const TargetMachine& target;
+  
+  //the BasicBlock to be scheduled
+  BasicBlock* bb;
+
+  ///Iteration Intervel
+  ///FIXME: II may be a better name for its meaning
+  unsigned II;
+
+  //the vector containing the nodes which have been scheduled
+  NodeVec nodeScheduled;
+  
+  ///the remaining unscheduled nodes 
+  const NodeVec& oNodes;
+  
+  ///the machine resource table
+  std::vector< std::vector<pair<int,int> > >  resourceTable ;
+  
+  ///the schedule( with many schedule stage)
+  std::vector<std::vector<ModuloSchedGraphNode*> > schedule;
+  
+  ///the kernel(core) schedule(length = II)
+  std::vector<std::vector<ModuloSchedGraphNode*> > coreSchedule;
+
+ typedef   BasicBlock::InstListType InstListType;
+ typedef   std::vector <std::vector<ModuloSchedGraphNode*> > vvNodeType;
+
+
+public:
+  
+  ///constructor
+  ModuloScheduling(ModuloSchedGraph& _graph): 
+    graph(_graph), 
+    target(graph.getTarget()),
+    oNodes(graph.getONodes())
+    {
+      II = graph.getMII();
+      bb=(BasicBlock*)graph.getBasicBlocks()[0];
+
+      instrScheduling();
+    };
+
+  ///destructor
+  ~ModuloScheduling(){};
+
+  ///the method to compute schedule and instert epilogue and prologue
+  void instrScheduling();
+
+  ///debug functions:
+  ///dump the schedule and core schedule
+  void dumpScheduling();
+  
+  ///dump the input vector of nodes
+  //sch: the input vector of nodes
+  void dumpSchedule( std::vector<std::vector<ModuloSchedGraphNode*> > sch);
+
+  ///dump the resource usage table
+  void dumpResourceUsageTable();
+
+
+  //*******************internel functions*******************************
+private:
+  //clear memory from the last round and initialize if necessary
+  void clearInitMem(const MachineSchedInfo& );
+
+  //compute schedule and coreSchedule with the current II
+  bool computeSchedule();
+
+  BasicBlock* getSuccBB(BasicBlock*);
+  BasicBlock* getPredBB(BasicBlock*);
+  void constructPrologue(BasicBlock* prologue);
+  void constructKernel(BasicBlock* prologue,BasicBlock* kernel,BasicBlock* epilogue);
+  void constructEpilogue(BasicBlock* epilogue,BasicBlock* succ_bb);
+
+  ///update the resource table at the startCycle
+  //vec: the resouce usage
+  //startCycle: the start cycle the resouce usage is
+  void updateResourceTable(std::vector<vector<unsigned int> > vec,int startCycle);
+
+  ///un-do the update in the resource table in the startCycle
+  //vec: the resouce usage
+  //startCycle: the start cycle the resouce usage is
+  void undoUpdateResourceTable(std::vector<vector<unsigned int> > vec,int startCycle);
+
+  ///return whether the resourcetable has negative element
+  ///this function is called after updateResouceTable() to determine whether a node can
+  /// be scheduled at certain cycle
+  bool resourceTableNegative();
+
+
+  ///try to Schedule the node starting from start to end cycle(inclusive)
+  //if it can be scheduled, put it in the schedule and update nodeScheduled
+  //node: the node to be scheduled
+  //start: start cycle
+  //end : end cycle
+  //nodeScheduled: a vector storing nodes which has been scheduled
+  bool ScheduleNode(ModuloSchedGraphNode* node,unsigned start, unsigned end, NodeVec& nodeScheduled);
+
+  //each instruction has a memory of the latest clone instruction
+  //the clone instruction can be get using getClone() 
+  //this function clears the memory, i.e. getClone() after calling this function returns null
+  void clearCloneMemory();
+
+  //this fuction make a clone of this input Instruction and update the clone memory
+  //inst: the instrution to be cloned
+  Instruction* cloneInstSetMemory(Instruction* inst);
+
+  //this function update each instrutions which uses ist as its operand
+  //after update, each instruction will use ist's clone as its operand
+  void updateUseWithClone(Instruction* ist);
+
+};
+
+
+class ModuloSchedulingSet:NonCopyable{
+ private:
+  
+  //the graphSet to feed in
+  ModuloSchedGraphSet& graphSet;
+ public:
+
+  //constructor
+  //Scheduling graph one by one
+  ModuloSchedulingSet(ModuloSchedGraphSet _graphSet):graphSet(_graphSet){
+    for(unsigned i=0;i<graphSet.size();i++){
+      ModuloSchedGraph& graph=*(graphSet[i]);
+      if(graph.isLoop())ModuloScheduling ModuloScheduling(graph);
+    }
+  };
+  
+  //destructor
+  ~ModuloSchedulingSet(){};
+};
+
+
+
+#endif
+
+