fix bugs in memory model in multi-core version
authorjzhou <jzhou>
Mon, 20 Jul 2009 17:16:42 +0000 (17:16 +0000)
committerjzhou <jzhou>
Mon, 20 Jul 2009 17:16:42 +0000 (17:16 +0000)
13 files changed:
Robust/src/Analysis/Scheduling/MCImplSynthesis.java
Robust/src/Analysis/Scheduling/ObjectSimulator.java
Robust/src/Analysis/Scheduling/Schedule.java
Robust/src/Analysis/Scheduling/ScheduleAnalysis.java
Robust/src/Analysis/Scheduling/ScheduleSimulator.java
Robust/src/IR/Flat/BuildCodeMultiCore.java
Robust/src/IR/State.java
Robust/src/Main/Main.java
Robust/src/Runtime/mem.c
Robust/src/Runtime/mem.h
Robust/src/Runtime/multicoreruntime.h
Robust/src/Runtime/multicoretask.c
Robust/src/buildscript

index 06836479f19f1e4e39e96aa8007f6a10b01c558f..65f0ea6c0a49294c36cba95d7e4fd7d90af327c3 100644 (file)
@@ -6,6 +6,8 @@ import java.io.PrintStream;
 import java.io.PrintWriter;
 import java.util.Hashtable;
 import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Queue;
 import java.util.Random;
 import java.util.Vector;
 
@@ -20,1254 +22,1316 @@ import IR.TypeUtil;
 
 public class MCImplSynthesis {
 
-    State state;
-    ScheduleAnalysis scheduleAnalysis;
-    TaskAnalysis taskAnalysis;
-    OwnershipAnalysis ownershipAnalysis;
-    ScheduleSimulator scheduleSimulator;
-    
-    int coreNum;
-    int scheduleThreshold; // # of starting points generated by schedule analysis
-    int probThreshold; // the probability to stop when no accelaration achieved  
-                       // in the directed simulated annealing
-    int generateThreshold; // how many optimized implementation generated in 
-                           // each iteration of the directed simulated annealing
+  State state;
+  ScheduleAnalysis scheduleAnalysis;
+  TaskAnalysis taskAnalysis;
+  OwnershipAnalysis ownershipAnalysis;
+  ScheduleSimulator scheduleSimulator;
+
+  int coreNum;
+  int scheduleThreshold; // # of starting points generated by schedule analysis
+  int probThreshold; // the probability to stop when no accelaration achieved  
+                     // in the directed simulated annealing
+  int generateThreshold; // how many optimized implementation generated in 
+                         // each iteration of the directed simulated annealing
+
+  public MCImplSynthesis(State state, 
+                         TaskAnalysis ta,
+                         OwnershipAnalysis oa) {
+    this.state = state;
+    this.coreNum = state.CORENUM;
+    this.taskAnalysis = ta;
+    this.ownershipAnalysis = oa;
+    this.scheduleAnalysis = new ScheduleAnalysis(state,
+        ta);
+    this.scheduleAnalysis.setCoreNum(this.coreNum);
+    this.scheduleSimulator = new ScheduleSimulator(this.coreNum,
+        state,
+        ta);
+    this.scheduleThreshold = 1000;
+    this.probThreshold = 0;
+    this.generateThreshold = 30;
+  }
+
+  public int getCoreNum() {
+    return this.scheduleAnalysis.getCoreNum();
+  }
+
+  public int getScheduleThreshold() {
+    return scheduleThreshold;
+  }
+
+  public void setScheduleThreshold(int scheduleThreshold) {
+    this.scheduleThreshold = scheduleThreshold;
+  }
+
+  public int getProbThreshold() {
+    return probThreshold;
+  }
+
+  public void setProbThreshold(int probThreshold) {
+    this.probThreshold = probThreshold;
+  }
+
+  public int getGenerateThreshold() {
+    return generateThreshold;
+  }
+
+  public void setGenerateThreshold(int generateThreshold) {
+    this.generateThreshold = generateThreshold;
+  }
+
+  public Vector<Schedule> synthesis() {
+    // Print stuff to the original output and error streams.
+    // The stuff printed through the 'origOut' and 'origErr' references
+    // should go to the console on most systems while the messages
+    // printed through the 'System.out' and 'System.err' will end up in
+    // the files we created for them.
+    //origOut.println ("\nRedirect:  Round #2");
+    //System.out.println ("Test output via 'SimulatorResult.out'.");
+    //origOut.println ("Test output via 'origOut' reference.");
+
+    // Save the current standard input, output, and error streams
+    // for later restoration.
+    PrintStream origOut = System.out;
+
+    // Create a new output stream for the stcriticalPathandard output.
+    PrintStream stdout  = null;
+    try {
+      stdout = new PrintStream(
+          new FileOutputStream(this.state.outputdir + "SimulatorResult_" 
+              + this.coreNum + ".out"));
+    } catch (Exception e) {
+      // Sigh.  Couldn't open the file.
+      System.out.println("Redirect:  Unable to open output file!");
+      System.exit(1);
+    }
+
+    // Print stuff to the original output and error streams.
+    // On most systems all of this will end up on your console when you
+    // run this application.
+    //origOut.println ("\nRedirect:  Round #1");
+    //System.out.println ("Test output via 'System.out'.");
+    //origOut.println ("Test output via 'origOut' reference.");
+
+    // Set the System out and err streams to use our replacements.
+    System.setOut(stdout);
+
+    Vector<Schedule> scheduling = null;
+    Vector<ScheduleNode> schedulinggraph = null;
+    int gid = 1;
     
-    public MCImplSynthesis(State state, 
-                          TaskAnalysis ta,
-                          OwnershipAnalysis oa) {
-       this.state = state;
-       this.coreNum = state.CORENUM;
-       this.taskAnalysis = ta;
-       this.ownershipAnalysis = oa;
-       this.scheduleAnalysis = new ScheduleAnalysis(state,
-                                                    ta);
-       this.scheduleAnalysis.setCoreNum(this.coreNum);
-       this.scheduleSimulator = new ScheduleSimulator(this.coreNum,
-                                                      state,
-                                                      ta);
-       this.scheduleThreshold = 1000;
-       this.probThreshold = 0;
-       this.generateThreshold = 30;
+    // check all multi-parameter tasks
+    Vector<TaskDescriptor> multiparamtds = new Vector<TaskDescriptor>();
+    Iterator it_tasks = 
+      this.state.getTaskSymbolTable().getDescriptorsIterator();
+    while(it_tasks.hasNext()) {
+      TaskDescriptor td = (TaskDescriptor)it_tasks.next();
+      if(td.numParameters() > 1) {
+        multiparamtds.addElement(td);
+      }
+    }
+    it_tasks = null;
+
+    // generate multiple schedulings
+    this.scheduleAnalysis.setScheduleThreshold(this.scheduleThreshold);
+    boolean tooptimize = 
+      this.scheduleAnalysis.schedule(this.generateThreshold, multiparamtds);
+    if(this.generateThreshold > 5) {
+      this.generateThreshold = 5;
     }
 
-    public int getCoreNum() {
-       return this.scheduleAnalysis.getCoreNum();
+    Vector<Vector<ScheduleNode>> scheduleGraphs = null;
+    Vector<Vector<ScheduleNode>> newscheduleGraphs = 
+      this.scheduleAnalysis.getScheduleGraphs();
+    Hashtable<TaskDescriptor, ClassDescriptor> td2maincd = 
+      this.scheduleAnalysis.getTd2maincd();
+    Vector<Vector<Schedule>> schedulings = new Vector<Vector<Schedule>>();
+    Vector<Integer> selectedSchedulings = new Vector<Integer>();
+    Vector<SimExecutionNode> selectedSimExeGraphs = 
+      new Vector<SimExecutionNode>();
+
+    int tryindex = 1;
+    long bestexetime = Long.MAX_VALUE;
+    Random rand = new Random();
+    // simulate the generated schedulings and try to optimize it
+    do {
+      System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
+      System.out.print("Simulate and optimize round: #" + tryindex + ": \n");
+      gid += newscheduleGraphs.size();
+      if(scheduleGraphs != null) {
+        for(int i = 0; i < scheduleGraphs.size(); i++) {
+          Vector<ScheduleNode> tmpgraph = scheduleGraphs.elementAt(i);
+          for(int j = 0; j < tmpgraph.size(); j++) {
+            ScheduleNode snode = tmpgraph.elementAt(j);
+            snode.getEdgeVector().clear();
+            snode.getInedgeVector().clear();
+            snode.getScheduleEdges().clear();
+            snode.getClassNodes().clear();
+          }
+          tmpgraph.clear();
+          tmpgraph = null;
+        }
+        scheduleGraphs.clear();
+      }
+      scheduleGraphs = newscheduleGraphs;
+      schedulings.clear();
+      // get scheduling layouts from schedule graphs
+      for(int i = 0; i < scheduleGraphs.size(); i++) {
+        Vector<ScheduleNode> scheduleGraph = scheduleGraphs.elementAt(i);
+        Vector<Schedule> tmpscheduling = 
+          generateScheduling(scheduleGraph, td2maincd);
+        schedulings.add(tmpscheduling);
+        scheduleGraph = null;
+        tmpscheduling = null;
+      }
+      selectedSchedulings.clear();
+      selectedSimExeGraphs.clear();
+      long tmpexetime = this.scheduleSimulator.simulate(schedulings, 
+          selectedSchedulings, 
+          selectedSimExeGraphs);
+      boolean remove = false;
+      if(tmpexetime < bestexetime) {
+        remove = true;
+        bestexetime = tmpexetime;
+        if(scheduling != null) {
+          scheduling.clear();
+          for(int j = 0; j < schedulinggraph.size(); j++) {
+            ScheduleNode snode = schedulinggraph.elementAt(j);
+            snode.getEdgeVector().clear();
+            snode.getInedgeVector().clear();
+            snode.getScheduleEdges().clear();
+            snode.getClassNodes().clear();
+          }
+          schedulinggraph.clear();
+        }
+        scheduling = schedulings.elementAt(selectedSchedulings.elementAt(0));
+        schedulinggraph = scheduleGraphs.elementAt(
+            selectedSchedulings.elementAt(0));
+        System.out.print("end of: #" + tryindex + " (bestexetime: " 
+            + bestexetime + ")\n");
+        System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
+        tryindex++;
+      } else if(tmpexetime == bestexetime) {
+        System.out.print("end of: #" + tryindex + " (bestexetime: " 
+            + bestexetime + ")\n");
+        System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
+        tryindex++;
+        if((Math.abs(rand.nextInt()) % 100) < this.probThreshold) {
+          break;
+        }
+      } else {
+        break;
+      }
+
+      //if(tooptimize) {
+      // try to optimize the best one scheduling
+      newscheduleGraphs = optimizeScheduling(scheduleGraphs, 
+          selectedSchedulings, 
+          selectedSimExeGraphs,
+          gid,
+          this.scheduleThreshold);
+      if(remove) {
+        scheduleGraphs.removeElementAt(selectedSchedulings.elementAt(0));
+      }
+      /*} else {
+        break;
+      }*/
+    }while(newscheduleGraphs != null); // TODO: could it possibly lead to endless loop?
+
+    if(scheduleGraphs != null) {
+      scheduleGraphs.clear();
+    }
+    scheduleGraphs = null;
+    newscheduleGraphs = null;
+    schedulings.clear();
+    schedulings = null;
+    selectedSchedulings.clear();
+    selectedSchedulings = null;
+    selectedSimExeGraphs.clear();
+    selectedSimExeGraphs = null;
+
+    multiparamtds.clear();
+    multiparamtds = null;
+    td2maincd.clear();
+    td2maincd = null;
+
+    System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
+    System.out.print("selected bestexetime: " + bestexetime + "\n");
+    String path = this.state.outputdir + "scheduling_selected.dot";
+    SchedulingUtil.printScheduleGraph(path, schedulinggraph);
+
+    // Close the streams.
+    try {
+      stdout.close();
+      stdout = null;
+      System.setOut(origOut);
+    } catch (Exception e) {
+      origOut.println("Redirect:  Unable to close files!");
     }
 
-    public int getScheduleThreshold() {
-        return scheduleThreshold;
+    schedulinggraph.clear();
+    schedulinggraph = null;
+
+    return scheduling;
+  }
+
+  // for test
+  // get the distribution info of new search algorithm
+  public void distribution(boolean isall, int startnum) {
+    // Print stuff to the original output and error streams.
+    // The stuff printed through the 'origOut' and 'origErr' references
+    // should go to the console on most systems while the messages
+    // printed through the 'System.out' and 'System.err' will end up in
+    // the files we created for them.
+    //origOut.println ("\nRedirect:  Round #2");
+    //System.out.println ("Test output via 'SimulatorResult.out'.");
+    //origOut.println ("Test output via 'origOut' reference.");
+
+    // Save the current standard input, output, and error streams
+    // for later restoration.
+    PrintStream origOut = System.out;
+
+    // Create a new output stream for the standard output.
+    PrintStream stdout  = null;
+    try {
+      stdout = new PrintStream(
+          new FileOutputStream(this.state.outputdir + "SimulatorResult_" 
+              + this.coreNum + ".out"));
+    } catch (Exception e) {
+      // Sigh.  Couldn't open the file.
+      System.out.println("Redirect:  Unable to open output file!");
+      System.exit(1);
     }
 
-    public void setScheduleThreshold(int scheduleThreshold) {
-        this.scheduleThreshold = scheduleThreshold;
+    // Print stuff to the original output and error streams.
+    // On most systems all of this will end up on your console when you
+    // run this application.
+    //origOut.println ("\nRedirect:  Round #1");
+    //System.out.println ("Test output via 'System.out'.");
+    //origOut.println ("Test output via 'origOut' reference.");
+
+    // Set the System out and err streams to use our replacements.
+    System.setOut(stdout);
+
+    if(isall) {
+      // check all multi-parameter tasks
+      Vector<TaskDescriptor> multiparamtds = new Vector<TaskDescriptor>();
+      Iterator it_tasks = 
+        this.state.getTaskSymbolTable().getDescriptorsIterator();
+      while(it_tasks.hasNext()) {
+        TaskDescriptor td = (TaskDescriptor)it_tasks.next();
+        if(td.numParameters() > 1) {
+          multiparamtds.addElement(td);
+        }
+      }
+      it_tasks = null;
+      
+      // Generate all possible schedulings
+      this.scheduleAnalysis.setScheduleThreshold(Integer.MAX_VALUE);
+      this.scheduleAnalysis.schedule(-1, multiparamtds);
+
+      Vector<Vector<ScheduleNode>> totestscheduleGraphs = 
+        this.scheduleAnalysis.getScheduleGraphs();
+      Hashtable<TaskDescriptor, ClassDescriptor> td2maincd = 
+        this.scheduleAnalysis.getTd2maincd();
+      Vector<Vector<Schedule>> schedulings = new Vector<Vector<Schedule>>();
+      Vector<Integer> selectedSchedulings = new Vector<Integer>();
+      Vector<SimExecutionNode> selectedSimExeGraphs = 
+        new Vector<SimExecutionNode>();
+
+      File file=new File(this.state.outputdir+"distributeinfo_s_"+this.coreNum
+                         +".out");
+      FileOutputStream dotstream = null; 
+      try {
+        dotstream = new FileOutputStream(file,false);
+      } catch (Exception e) {
+        e.printStackTrace();
+        System.exit(-1);
+      }
+      PrintWriter output = new java.io.PrintWriter(dotstream, true);
+      output.println("start time(1,000,000 cycles): " 
+                     + totestscheduleGraphs.size());
+      for(int ii = 0; ii < totestscheduleGraphs.size(); ii++) {
+        Vector<Vector<ScheduleNode>> newscheduleGraphs = 
+          new Vector<Vector<ScheduleNode>>();
+        newscheduleGraphs.add(totestscheduleGraphs.elementAt(ii));
+        // simulate the generated schedulings and try to optimize it   
+        schedulings.clear();
+        // get scheduling layouts from schedule graphs
+        for(int i = 0; i < newscheduleGraphs.size(); i++) {
+          Vector<ScheduleNode> scheduleGraph = newscheduleGraphs.elementAt(i);
+          Vector<Schedule> tmpscheduling = 
+            generateScheduling(scheduleGraph, td2maincd);
+          schedulings.add(tmpscheduling);
+          scheduleGraph = null;
+          tmpscheduling = null;
+        }
+        selectedSchedulings.clear();
+        selectedSimExeGraphs.clear();
+        long tmpexetime = this.scheduleSimulator.simulate(schedulings, 
+            selectedSchedulings, 
+            selectedSimExeGraphs);
+        output.println(((float)tmpexetime/100000000));
+      }
+
+    } else {
+      // check all multi-parameter tasks
+      Vector<TaskDescriptor> multiparamtds = new Vector<TaskDescriptor>();
+      Iterator it_tasks = 
+        this.state.getTaskSymbolTable().getDescriptorsIterator();
+      while(it_tasks.hasNext()) {
+        TaskDescriptor td = (TaskDescriptor)it_tasks.next();
+        if(td.numParameters() > 1) {
+          multiparamtds.addElement(td);
+        }
+      }
+      it_tasks = null;
+      
+      // generate multiple schedulings
+      this.scheduleThreshold = 20;
+      this.generateThreshold = 30;
+      this.probThreshold = 0;
+      this.scheduleAnalysis.setScheduleThreshold(1000);
+      boolean tooptimize = 
+        this.scheduleAnalysis.schedule(this.generateThreshold, multiparamtds);
+
+      Vector<Vector<ScheduleNode>> scheduleGraphs = null;
+      Vector<Vector<ScheduleNode>> totestscheduleGraphs = 
+        this.scheduleAnalysis.getScheduleGraphs();
+      Hashtable<TaskDescriptor, ClassDescriptor> td2maincd = 
+        this.scheduleAnalysis.getTd2maincd();
+      Vector<Vector<Schedule>> schedulings = new Vector<Vector<Schedule>>();
+      Vector<Integer> selectedSchedulings = new Vector<Integer>();
+      Vector<SimExecutionNode> selectedSimExeGraphs = 
+        new Vector<SimExecutionNode>();
+
+      File file=new File(this.state.outputdir + "distributeinfo_s_" 
+                         + this.coreNum + ".out");
+      FileOutputStream dotstream = null; 
+      File file2=new File(this.state.outputdir + "distributeinfo_o_" 
+                          + this.coreNum + ".out");
+      FileOutputStream dotstream2 = null; 
+      try {
+        dotstream = new FileOutputStream(file,false);
+        dotstream2 = new FileOutputStream(file2,false);
+      } catch (Exception e) {
+        e.printStackTrace();
+        System.exit(-1);
+      }
+      PrintWriter output = new java.io.PrintWriter(dotstream, true);
+      PrintWriter output2 = new java.io.PrintWriter(dotstream2, true);
+      output.println("start time(1,000,000 cycles): " 
+                     + totestscheduleGraphs.size());
+      output2.println("optimized time(1,000,000 cycles): " 
+                      + totestscheduleGraphs.size());
+      for(int ii = startnum; ii < totestscheduleGraphs.size(); ii++) {
+        Vector<Vector<ScheduleNode>> newscheduleGraphs = 
+          new Vector<Vector<ScheduleNode>>();
+        newscheduleGraphs.add(totestscheduleGraphs.elementAt(ii));
+        int tryindex = 1;
+        long bestexetime = Long.MAX_VALUE;
+        int gid = 1;
+        Vector<Schedule> scheduling = null;
+        Vector<ScheduleNode> schedulinggraph = null;
+        boolean isfirst = true;
+        Random rand = new Random();
+        // simulate the generated schedulings and try to optimize it
+        System.out.print("=========================================================\n");
+        System.out.print("# " + ii + ": \n");
+        do {
+          System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
+          System.out.print("Simulate and optimize round: #" + tryindex + ": \n");    
+          gid += newscheduleGraphs.size();
+          if(scheduleGraphs != null) {
+            for(int i = 0; i < scheduleGraphs.size(); i++) {
+              Vector<ScheduleNode> tmpgraph = scheduleGraphs.elementAt(i);
+              for(int j = 0; j < tmpgraph.size(); j++) {
+                ScheduleNode snode = tmpgraph.elementAt(j);
+                snode.getEdgeVector().clear();
+                snode.getInedgeVector().clear();
+                snode.getScheduleEdges().clear();
+                snode.getClassNodes().clear();
+              }
+              tmpgraph.clear();
+              tmpgraph = null;
+            }
+            scheduleGraphs.clear();
+          }
+          scheduleGraphs = newscheduleGraphs;
+          schedulings.clear();
+          // get scheduling layouts from schedule graphs
+          for(int i = 0; i < scheduleGraphs.size(); i++) {
+            Vector<ScheduleNode> scheduleGraph = scheduleGraphs.elementAt(i);
+            Vector<Schedule> tmpscheduling = 
+              generateScheduling(scheduleGraph, td2maincd);
+            schedulings.add(tmpscheduling);
+            scheduleGraph = null;
+            tmpscheduling = null;
+          }
+          selectedSchedulings.clear();
+          selectedSimExeGraphs.clear();
+          long tmpexetime = this.scheduleSimulator.simulate(schedulings, 
+              selectedSchedulings, 
+              selectedSimExeGraphs);
+          if(isfirst) {
+            output.println(((float)tmpexetime/100000000));
+            isfirst = false;
+          }
+          if(tmpexetime < bestexetime) {
+            bestexetime = tmpexetime;
+            if(scheduling != null) {
+              scheduling.clear();
+              for(int j = 0; j < schedulinggraph.size(); j++) {
+                ScheduleNode snode = schedulinggraph.elementAt(j);
+                snode.getEdgeVector().clear();
+                snode.getInedgeVector().clear();
+                snode.getScheduleEdges().clear();
+                snode.getClassNodes().clear();
+              }
+              schedulinggraph.clear();
+            }
+            scheduling = schedulings.elementAt(selectedSchedulings.elementAt(0));
+            schedulinggraph = 
+              scheduleGraphs.elementAt(selectedSchedulings.elementAt(0));
+            tryindex++;
+            System.out.print("end of: #" + tryindex + " (bestexetime: " 
+                             + bestexetime + ")\n");
+            System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
+          } else if(tmpexetime == bestexetime) {
+            System.out.print("end of: #" + tryindex + " (bestexetime: " 
+                             + bestexetime + ")\n");
+            System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
+            tryindex++;
+            if((Math.abs(rand.nextInt()) % 100) < this.probThreshold) {
+              break;
+            }
+          } else {
+            break;
+          }
+
+          if(tooptimize) {
+          // try to optimize theschedulings best one scheduling
+          newscheduleGraphs = optimizeScheduling(scheduleGraphs, 
+              selectedSchedulings, 
+              selectedSimExeGraphs,
+              gid,
+              this.scheduleThreshold);
+          if(tmpexetime < bestexetime) {
+            scheduleGraphs.remove(selectedSchedulings.elementAt(0));
+          }
+          } else {
+            break;
+          }
+        }while(newscheduleGraphs != null); // TODO: could it possibly lead to endless loop?
+
+        scheduleGraphs.clear();
+        scheduleGraphs = null;
+        scheduling = null;
+        schedulinggraph = null;
+        if(newscheduleGraphs != null) {
+          newscheduleGraphs.clear();
+        }
+        newscheduleGraphs = null;
+        totestscheduleGraphs.elementAt(ii).clear();
+        for(int i = 0; i < schedulings.size(); i++) {
+          schedulings.elementAt(i).clear();
+        }
+        schedulings.clear();
+        selectedSchedulings.clear();
+        selectedSimExeGraphs.clear();
+
+        output2.println(((float)bestexetime/100000000));
+        System.out.print("=========================================================\n");
+      }
+
+      if(scheduleGraphs != null) {
+        scheduleGraphs.clear();
+      }
+      scheduleGraphs = null;
+      totestscheduleGraphs = null;
+      for(int i = 0; i < schedulings.size(); i++) {
+        schedulings.elementAt(i).clear();
+      }
+      schedulings.clear();
+      schedulings = null;
+      selectedSchedulings.clear();
+      selectedSchedulings = null;
+      selectedSimExeGraphs.clear();
+      selectedSimExeGraphs = null;
+      multiparamtds.clear();
+      multiparamtds = null;
+      td2maincd.clear();
+      td2maincd = null;
+
+
+      // Close the streams.
+      try {
+        output.close();
+        stdout.close();
+        output = null;
+        stdout = null;
+        System.setOut(origOut);
+      } catch (Exception e) {
+        origOut.println("Redirect:  Unable to close files!");
+      }
     }
 
-    public int getProbThreshold() {
-        return probThreshold;
+    return;
+  }
+
+  private Vector<Vector<ScheduleNode>> 
+  optimizeScheduling(Vector<Vector<ScheduleNode>> scheduleGraphs,
+                     Vector<Integer> selectedScheduleGraphs,
+                     Vector<SimExecutionNode> selectedSimExeGraphs,
+                     int gid,
+                     int count) {
+    if(this.coreNum == 1) {
+      // single core
+      return null;
     }
 
-    public void setProbThreshold(int probThreshold) {
-        this.probThreshold = probThreshold;
+    Vector<Vector<ScheduleNode>> optimizeschedulegraphs = null;
+    int lgid = gid;
+    int left = count;
+
+    for(int i = 0; i < selectedScheduleGraphs.size(); i++) {
+      Vector<ScheduleNode> schedulegraph = scheduleGraphs.elementAt(
+          selectedScheduleGraphs.elementAt(i));
+      SimExecutionNode startnode = selectedSimExeGraphs.elementAt(i);
+      Vector<SimExecutionEdge> criticalPath = analyzeCriticalPath(startnode); 
+      Vector<Vector<ScheduleNode>> tmposchedulegraphs = 
+        optimizeCriticalPath(schedulegraph, 
+                             criticalPath,
+                             lgid,
+                             left);
+      if(tmposchedulegraphs != null) {
+        if(optimizeschedulegraphs == null) {
+          optimizeschedulegraphs = new Vector<Vector<ScheduleNode>>();
+        }
+        optimizeschedulegraphs.addAll(tmposchedulegraphs);
+        lgid += tmposchedulegraphs.size();
+        left -= tmposchedulegraphs.size();
+        if(left == 0) {
+          schedulegraph = null;
+          criticalPath = null;
+          tmposchedulegraphs = null;
+          break;
+        }
+      }
+      schedulegraph = null;
+      criticalPath = null;
+      tmposchedulegraphs = null;
     }
 
-    public int getGenerateThreshold() {
-        return generateThreshold;
+    return optimizeschedulegraphs;
+  }
+
+  private Vector<SimExecutionEdge> 
+  analyzeCriticalPath(SimExecutionNode startnode) {
+    // first figure out the critical path
+    Vector<SimExecutionEdge> criticalPath = new Vector<SimExecutionEdge>();
+    getCriticalPath(startnode, criticalPath);
+    computeBestStartPoint(criticalPath);
+
+    return criticalPath;
+  }
+
+  // TODO: currently only get one critical path. It's possible that there are
+  // multiple critical paths and some of them can not be optimized while others
+  // can. Need to fix up for this situation.
+  private long getCriticalPath(SimExecutionNode startnode, 
+                               Vector<SimExecutionEdge> criticalPath) {
+    long sum = 0;
+    SimExecutionNode snode = startnode;
+    // go reversely to find the critical path
+    while(snode != null) {
+      SimExecutionNode nsnode = null;
+      Iterator<SimExecutionEdge> it_iedges = 
+        (Iterator<SimExecutionEdge>)snode.inedges();
+      while(it_iedges.hasNext()) {
+        SimExecutionEdge sedge = it_iedges.next();
+        if(sedge.getWeight() != 0) {    
+          SimExecutionNode tsnode = (SimExecutionNode)(sedge.getSource());
+          if(tsnode.getTimepoint() + sedge.getWeight() == snode.getTimepoint()) {
+            nsnode = tsnode;
+            criticalPath.insertElementAt(sedge, 0);
+            sum += sedge.getWeight();
+            break;
+          }
+        }
+      }
+      it_iedges = null;
+      snode = nsnode;
+    }
+    return sum;
+  }
+
+  private void computeBestStartPoint(Vector<SimExecutionEdge> criticalPath) {
+    // calculate the earliest start time of each task on the critial path
+    for(int i = 0; i < criticalPath.size(); i++) {
+      SimExecutionEdge seedge = criticalPath.elementAt(i);
+      Vector<SimExecutionEdge> predicates = seedge.getPredicates();
+      if(predicates != null) {
+        // have predicates
+        long starttime = 0;
+        // check the latest finish time of all the predicates
+        for(int j = 0; j < predicates.size(); j++) {
+          SimExecutionEdge predicate = predicates.elementAt(j);
+          long tmptime = predicate.getBestStartPoint() + predicate.getWeight();
+          if(tmptime > starttime) {
+            starttime = tmptime;
+            seedge.setLastpredicateEdge(predicate);
+            if(predicate.getTd() != null) {
+              seedge.setLastpredicateNode(
+                  (SimExecutionNode)predicate.getTarget());
+            } else {
+              // transfer edge
+              seedge.setLastpredicateNode(
+                  (SimExecutionNode)predicate.getSource());
+            }
+          }
+        }
+        seedge.setBestStartPoint(starttime);
+      } else if(seedge.getSource().getInedgeVector().size() > 0) {
+        // should have only one in edge
+        long starttime = ((SimExecutionNode)seedge.getSource()).getTimepoint();
+        seedge.setBestStartPoint(starttime);
+      } else {
+        // no predicates
+        seedge.setBestStartPoint(0);
+      }
+      predicates = null;
+    }
+  }
+
+  private Vector<Vector<ScheduleNode>> 
+  optimizeCriticalPath(Vector<ScheduleNode> scheduleGraph,
+                       Vector<SimExecutionEdge> criticalPath,
+                       int gid,
+                       int count) {
+    Vector<Vector<ScheduleNode>> optimizeschedulegraphs = null;
+    int lgid = gid;
+    int left = count;
+
+    // for test, print out the criticalPath
+    if(this.state.PRINTCRITICALPATH) {
+      SchedulingUtil.printCriticalPath(this.state.outputdir + "criticalpath_" 
+                                       + lgid + ".dot",  criticalPath);
     }
 
-    public void setGenerateThreshold(int generateThreshold) {
-        this.generateThreshold = generateThreshold;
+    // first check all seedges whose real start point is late than predicted
+    // earliest start time and group them
+    long opcheckpoint = Long.MAX_VALUE;
+    Vector<Integer> sparecores = null;
+    // group according to core index
+    Hashtable<Long, Hashtable<Integer, Vector<SimExecutionEdge>>> toselects = 
+      new Hashtable<Long, Hashtable<Integer, Vector<SimExecutionEdge>>>();
+    Random rand = new Random();
+    for(int i = 0; i < criticalPath.size(); i++) {
+      SimExecutionEdge seedge = criticalPath.elementAt(i);
+      long starttime = seedge.getBestStartPoint();
+      if(starttime < ((SimExecutionNode)seedge.getSource()).getTimepoint()) {
+        // no restrictions due to data dependencies
+        // have potential to be parallelled and start execution earlier
+        seedge.setFixedTime(false);
+        // consider to optimize it only when its predicates can NOT 
+        // be optimized, otherwise first considering optimize its predicates
+        SimExecutionEdge lastpredicateedge = seedge.getLastpredicateEdge();
+        if(lastpredicateedge.isFixedTime()) {                  
+          int corenum = seedge.getCoreNum();
+          if(!toselects.containsKey(starttime)) {
+            toselects.put(starttime, 
+                new Hashtable<Integer, Vector<SimExecutionEdge>>());
+          }
+          if(!toselects.get(starttime).containsKey(corenum)) {
+            toselects.get(starttime).put(corenum, 
+                new Vector<SimExecutionEdge>());
+          }
+          toselects.get(starttime).get(corenum).add(seedge);
+        }
+      }
     }
 
-    public Vector<Schedule> synthesis() {
-       // Print stuff to the original output and error streams.
-       // The stuff printed through the 'origOut' and 'origErr' references
-       // should go to the console on most systems while the messages
-       // printed through the 'System.out' and 'System.err' will end up in
-       // the files we created for them.
-       //origOut.println ("\nRedirect:  Round #2");
-       //System.out.println ("Test output via 'SimulatorResult.out'.");
-       //origOut.println ("Test output via 'origOut' reference.");
-
-       // Save the current standard input, output, and error streams
-       // for later restoration.
-       PrintStream origOut = System.out;
-
-       // Create a new output stream for the stcriticalPathandard output.
-       PrintStream stdout  = null;
-       try {
-           stdout = new PrintStream(
-                   new FileOutputStream(this.state.outputdir + "SimulatorResult_" 
-                                        + this.coreNum + ".out"));
-       } catch (Exception e) {
-           // Sigh.  Couldn't open the file.
-           System.out.println("Redirect:  Unable to open output file!");
-           System.exit(1);
-       }
-
-       // Print stuff to the original output and error streams.
-       // On most systems all of this will end up on your console when you
-       // run this application.
-       //origOut.println ("\nRedirect:  Round #1");
-       //System.out.println ("Test output via 'System.out'.");
-       //origOut.println ("Test output via 'origOut' reference.");
-
-       // Set the System out and err streams to use our replacements.
-       System.setOut(stdout);
-             
-       Vector<Schedule> scheduling = null;
-       Vector<ScheduleNode> schedulinggraph = null;
-       int gid = 1;
-
-       // generate multiple schedulings
-       this.scheduleAnalysis.setScheduleThreshold(this.scheduleThreshold);
-       this.scheduleAnalysis.schedule(this.generateThreshold);
-       if(this.generateThreshold > 5) {
-           this.generateThreshold = 5;
-       }
-
-       Vector<Vector<ScheduleNode>> scheduleGraphs = null;
-       Vector<Vector<ScheduleNode>> newscheduleGraphs = 
-           this.scheduleAnalysis.getScheduleGraphs();
-       Vector<Vector<Schedule>> schedulings = new Vector<Vector<Schedule>>();
-       Vector<Integer> selectedSchedulings = new Vector<Integer>();
-       Vector<Vector<SimExecutionEdge>> selectedSimExeGraphs = 
-           new Vector<Vector<SimExecutionEdge>>();
-       
-       // check all multi-parameter tasks
-       Vector<TaskDescriptor> multiparamtds = new Vector<TaskDescriptor>();
-       Iterator it_tasks = 
-           this.state.getTaskSymbolTable().getDescriptorsIterator();
-       while(it_tasks.hasNext()) {
-           TaskDescriptor td = (TaskDescriptor)it_tasks.next();
-           if(td.numParameters() > 1) {
-               multiparamtds.addElement(td);
-           }
-       }
-       it_tasks = null;
-
-       int tryindex = 1;
-       long bestexetime = Long.MAX_VALUE;
-       Random rand = new Random();
-       // simulate the generated schedulings and try to optimize it
-       do {
-           System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-           System.out.print("Simulate and optimize round: #" + tryindex + ": \n");
-           gid += newscheduleGraphs.size();
-           if(scheduleGraphs != null) {
-               for(int i = 0; i < scheduleGraphs.size(); i++) {
-                   Vector<ScheduleNode> tmpgraph = scheduleGraphs.elementAt(i);
-                   for(int j = 0; j < tmpgraph.size(); j++) {
-                       ScheduleNode snode = tmpgraph.elementAt(j);
-                       snode.getEdgeVector().clear();
-                       snode.getInedgeVector().clear();
-                       snode.getScheduleEdges().clear();
-                       snode.getClassNodes().clear();
-                   }
-                   tmpgraph.clear();
-                   tmpgraph = null;
-               }
-               scheduleGraphs.clear();
-           }
-           scheduleGraphs = newscheduleGraphs;
-           schedulings.clear();
-           // get scheduling layouts from schedule graphs
-           for(int i = 0; i < scheduleGraphs.size(); i++) {
-               Vector<ScheduleNode> scheduleGraph = scheduleGraphs.elementAt(i);
-               Vector<Schedule> tmpscheduling = 
-                   generateScheduling(scheduleGraph, multiparamtds);
-               schedulings.add(tmpscheduling);
-               scheduleGraph = null;
-               tmpscheduling = null;
-           }
-           selectedSchedulings.clear();
-           for(int i = 0; i < selectedSimExeGraphs.size(); i++) {
-               selectedSimExeGraphs.elementAt(i).clear();
-           }
-           selectedSimExeGraphs.clear();
-           long tmpexetime = this.scheduleSimulator.simulate(schedulings, 
-                                                            selectedSchedulings, 
-                                                            selectedSimExeGraphs);
-           boolean remove = false;
-           if(tmpexetime < bestexetime) {
-               remove = true;
-               bestexetime = tmpexetime;
-               if(scheduling != null) {
-                   scheduling.clear();
-                   for(int j = 0; j < schedulinggraph.size(); j++) {
-                       ScheduleNode snode = schedulinggraph.elementAt(j);
-                       snode.getEdgeVector().clear();
-                       snode.getInedgeVector().clear();
-                       snode.getScheduleEdges().clear();
-                       snode.getClassNodes().clear();
-                   }
-                   schedulinggraph.clear();
-               }
-               scheduling = schedulings.elementAt(selectedSchedulings.elementAt(0));
-               schedulinggraph = scheduleGraphs.elementAt(
-                       selectedSchedulings.elementAt(0));
-               System.out.print("end of: #" + tryindex + " (bestexetime: " 
-                                + bestexetime + ")\n");
-               System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-               tryindex++;
-           } else if(tmpexetime == bestexetime) {
-               System.out.print("end of: #" + tryindex + " (bestexetime: " 
-                                + bestexetime + ")\n");
-               System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-               tryindex++;
-               if((Math.abs(rand.nextInt()) % 100) < this.probThreshold) {
-                   break;
-               }
-           } else {
-               break;
-           }
-
-           // try to optimize the best one scheduling
-           newscheduleGraphs = optimizeScheduling(scheduleGraphs, 
-                                                  selectedSchedulings, 
-                                                  selectedSimExeGraphs,
-                                                  gid,
-                                                  this.scheduleThreshold);
-           if(remove) {
-               scheduleGraphs.removeElementAt(selectedSchedulings.elementAt(0));
-           }
-       }while(newscheduleGraphs != null); // TODO: could it possibly lead to endless loop?
-
-       if(scheduleGraphs != null) {
-           scheduleGraphs.clear();
-       }
-       scheduleGraphs = null;
-       newscheduleGraphs = null;
-       schedulings.clear();
-       schedulings = null;
-       selectedSchedulings.clear();
-       selectedSchedulings = null;
-       for(int i = 0; i < selectedSimExeGraphs.size(); i++) {
-           selectedSimExeGraphs.elementAt(i).clear();
-       }
-       selectedSimExeGraphs.clear();
-       selectedSimExeGraphs = null;
-       
-       multiparamtds.clear();
-       multiparamtds = null;
-
-       System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-       System.out.print("selected bestexetime: " + bestexetime + "\n");
-       String path = this.state.outputdir + "scheduling_selected.dot";
-       SchedulingUtil.printScheduleGraph(path, schedulinggraph);
-
-       // Close the streams.
-       try {
-           stdout.close();
-           stdout = null;
-           System.setOut(origOut);
-       } catch (Exception e) {
-           origOut.println("Redirect:  Unable to close files!");
-       }
-       
-       schedulinggraph.clear();
-       schedulinggraph = null;
-
-       return scheduling;
+    // Randomly choose the tasks to optimize(previously only 
+    // consider the tasks with smallest best start time)
+    Vector<Long> keys = new Vector<Long>(toselects.keySet());
+    do{
+      int length = keys.size();
+      if(length == 0) {
+        return optimizeschedulegraphs;
+      }
+      int tochoose = Math.abs(rand.nextInt()) % length;
+      opcheckpoint = (keys.elementAt(tochoose)).longValue();
+      keys.removeElementAt(tochoose);
+      Hashtable<Integer, Vector<SimExecutionEdge>> tooptimize = 
+        toselects.get(opcheckpoint);
+      SimExecutionEdge seedge = 
+        tooptimize.values().iterator().next().elementAt(0);
+      SimExecutionNode lastpredicatenode = seedge.getLastpredicateNode();
+      SimExecutionEdge lastpredicateedge = seedge.getLastpredicateEdge();
+      long timepoint = lastpredicatenode.getTimepoint();
+      if(lastpredicateedge.getTd() == null) {
+        // transfer edge
+        timepoint += lastpredicateedge.getWeight();
+      }
+      // mapping to critical path
+      for(int index = 0; index < criticalPath.size(); index++) {
+        SimExecutionEdge tmpseedge = criticalPath.elementAt(index);
+        SimExecutionNode tmpsenode = 
+          (SimExecutionNode)tmpseedge.getTarget();
+        if(tmpsenode.getTimepoint() > timepoint) {
+          // get the spare core info
+          sparecores = tmpsenode.getSpareCores();
+          break;
+        }
+      }
+
+      if(tooptimize.size() > 0) {
+        Iterator<Integer> it_cores = tooptimize.keySet().iterator();
+        // check if it is possible to optimize these tasks
+        if((sparecores == null) || (sparecores.size() == 0)) {
+          // lack of spare cores
+          while(it_cores.hasNext()) {
+            int corenum = it_cores.next();
+            Vector<SimExecutionEdge> tmptasks = tooptimize.get(corenum);
+            // group the task instantiations according to whether it 
+            // has backward data dependences or not
+            Vector<SimExecutionEdge> candidatetasks = new Vector();
+            for(int ii= 0; ii < tmptasks.size(); ii++) {
+              SimExecutionEdge tmpseedge = tmptasks.elementAt(ii);
+              SimExecutionNode target = (SimExecutionNode)tmpseedge.getTarget();
+              Vector<SimExecutionEdge> children = 
+                (Vector<SimExecutionEdge>)target.getEdgeVector();
+              int jj = 0;
+              for(; jj < children.size(); jj++) {
+                SimExecutionEdge tmpedge = children.elementAt(jj);
+                if(tmpedge.getTd() != null) {
+                  Vector<SimExecutionEdge> predicates = 
+                    tmpedge.getPredicates();
+                  if((predicates != null) && 
+                      (predicates.contains(tmpseedge))) {
+                    break;
+                  }
+                  predicates = null;
+                } else if(tmpedge.getWeight() != 0) {
+                  // transfer edge
+                  if(((SimExecutionNode)tmpedge.getTarget()).getTimepoint() 
+                      == tmpedge.getWeight() + target.getTimepoint()) {
+                    break;
+                  }
+                }
+              }
+              if(jj == children.size()) {
+                candidatetasks.add(tmpseedge);
+              }
+            }
+            if((candidatetasks.size() > 0) && 
+                (candidatetasks.size() < tmptasks.size())) {
+              // there are less important tasks which have no backward
+              // data dependences at this timepoint, try to change 
+              // original execution order
+              Hashtable<Integer, Vector<SimExecutionEdge>> tooptimize2 = 
+                new Hashtable<Integer, Vector<SimExecutionEdge>>();
+              tooptimize2.put(corenum, candidatetasks);
+              Vector<Vector<ScheduleNode>> ops = 
+                innerOptimizeCriticalPath(scheduleGraph,
+                                          tooptimize2,
+                                          null,
+                                          lgid,
+                                          left);
+              if(ops != null) {
+                if(optimizeschedulegraphs == null) {
+                  optimizeschedulegraphs = new Vector<Vector<ScheduleNode>>();
+                }
+                optimizeschedulegraphs.addAll(ops);
+                lgid += ops.size();
+                left -= ops.size();
+              }
+              tooptimize2 = null;
+              ops = null;
+            }
+            tmptasks = null;
+            candidatetasks = null;
+          }
+
+          if(left == 0) {
+            it_cores = null;
+            return optimizeschedulegraphs;
+          }
+
+          // flush the dependences and earliest start time
+          it_cores = tooptimize.keySet().iterator();
+          while(it_cores.hasNext()) {
+            int corenum = it_cores.next();
+            Vector<SimExecutionEdge> edgevec = 
+              tooptimize.get(corenum);
+            for(int j = 0; j < edgevec.size(); j++) {
+              SimExecutionEdge edge = edgevec.elementAt(j);
+              lastpredicateedge = edge.getLastpredicateEdge();
+              lastpredicatenode = edge.getLastpredicateNode();
+              // if(edge.getCoreNum() != lastpredicate.getCoreNum()) // should never hit this
+              timepoint = lastpredicatenode.getTimepoint();
+              if(lastpredicateedge.getTd() == null) {
+                // transfer edge
+                timepoint += lastpredicateedge.getWeight();
+              }
+              // mapping to critical path
+              for(int index = 0; index < criticalPath.size(); index++) {
+                SimExecutionEdge tmpseedge = criticalPath.elementAt(index);
+                SimExecutionNode tmpsenode = 
+                  (SimExecutionNode)tmpseedge.getTarget();
+                if(tmpsenode.getTimepoint() > timepoint) {
+                  // update the predicate info
+                  if(edge.getPredicates() != null) {
+                    edge.getPredicates().remove(lastpredicateedge);
+                  }
+                  edge.addPredicate(criticalPath.elementAt(index));
+                  break;
+                }
+              }
+            }
+            edgevec = null;
+          }
+          it_cores = null;
+          computeBestStartPoint(criticalPath);
+          Vector<Vector<ScheduleNode>> ops = optimizeCriticalPath(scheduleGraph, 
+                                                                  criticalPath, 
+                                                                  lgid,
+                                                                  left);
+          if(ops != null) {
+            if(optimizeschedulegraphs == null) {
+              optimizeschedulegraphs = new Vector<Vector<ScheduleNode>>();
+            }
+            optimizeschedulegraphs.addAll(ops);
+            lgid += ops.size();
+            left -= ops.size();
+          }
+          ops = null;
+        } else {
+          // there are spare cores, try to reorganize the tasks to the spare 
+          // cores
+          Vector<Vector<ScheduleNode>> ops = 
+            innerOptimizeCriticalPath(scheduleGraph,
+                                      tooptimize,
+                                      sparecores,
+                                      lgid,
+                                      left);
+          if(ops != null) {
+            if(optimizeschedulegraphs == null) {
+              optimizeschedulegraphs = new Vector<Vector<ScheduleNode>>();
+            }
+            optimizeschedulegraphs.addAll(ops);
+            lgid += ops.size();
+            left -= ops.size();
+          }
+          ops = null;
+        }
+      }
+      sparecores = null;
+      tooptimize.clear();
+      tooptimize = null;
+    }while(left > 0);
+    toselects.clear();
+    toselects = null;
+
+    return optimizeschedulegraphs;
+  }
+
+  private Vector<Vector<ScheduleNode>> 
+  innerOptimizeCriticalPath(Vector<ScheduleNode> scheduleGraph,
+                            Hashtable<Integer, Vector<SimExecutionEdge>> tooptimize,
+                            Vector<Integer> sparecores,
+                            int gid,
+                            int count) {
+    int lgid = gid;
+    int left = count;
+    Vector<Vector<ScheduleNode>> optimizeschedulegraphs = null;
+
+    // first clone the whole graph
+    Vector<ScheduleNode> newscheduleGraph = 
+      cloneScheduleGraph(scheduleGraph, lgid);
+
+    // these nodes are root nodes
+    Vector<ScheduleNode> roots = new Vector<ScheduleNode>();
+    for(int i = 0; i < newscheduleGraph.size(); i++) {
+      if((sparecores == null) || (sparecores.contains(i))) {
+        roots.add(newscheduleGraph.elementAt(i));
+      }
     }
-    
-    // for test
-    // get the distribution info of new search algorithm
-    public void distribution() {
-       // Print stuff to the original output and error streams.
-       // The stuff printed through the 'origOut' and 'origErr' references
-       // should go to the console on most systems while the messages
-       // printed through the 'System.out' and 'System.err' will end up in
-       // the files we created for them.
-       //origOut.println ("\nRedirect:  Round #2");
-       //System.out.println ("Test output via 'SimulatorResult.out'.");
-       //origOut.println ("Test output via 'origOut' reference.");
-
-       // Save the current standard input, output, and error streams
-       // for later restoration.
-       PrintStream origOut = System.out;
-
-       // Create a new output stream for the standard output.
-       PrintStream stdout  = null;
-       try {
-           stdout = new PrintStream(
-                   new FileOutputStream(this.state.outputdir + "SimulatorResult_" 
-                                        + this.coreNum + ".out"));
-       } catch (Exception e) {
-           // Sigh.  Couldn't open the file.
-           System.out.println("Redirect:  Unable to open output file!");
-           System.exit(1);
-       }
-
-       // Print stuff to the original output and error streams.
-       // On most systems all of this will end up on your console when you
-       // run this application.
-       //origOut.println ("\nRedirect:  Round #1");
-       //System.out.println ("Test output via 'System.out'.");
-       //origOut.println ("Test output via 'origOut' reference.");
-
-       // Set the System out and err streams to use our replacements.
-       System.setOut(stdout);
-
-       if(false) {
-      // Generate all possible schedulings
-           this.scheduleAnalysis.setScheduleThreshold(Integer.MAX_VALUE);
-           this.scheduleAnalysis.schedule(-1);
-
-           Vector<Vector<ScheduleNode>> totestscheduleGraphs = 
-               this.scheduleAnalysis.getScheduleGraphs();
-           Vector<Vector<Schedule>> schedulings = new Vector<Vector<Schedule>>();
-           Vector<Integer> selectedSchedulings = new Vector<Integer>();
-           Vector<Vector<SimExecutionEdge>> selectedSimExeGraphs = 
-               new Vector<Vector<SimExecutionEdge>>();
-
-           // check all multi-parameter tasks
-           Vector<TaskDescriptor> multiparamtds = new Vector<TaskDescriptor>();
-           Iterator it_tasks = this.state.getTaskSymbolTable().getDescriptorsIterator();
-           while(it_tasks.hasNext()) {
-               TaskDescriptor td = (TaskDescriptor)it_tasks.next();
-               if(td.numParameters() > 1) {
-                   multiparamtds.addElement(td);
-               }
-           }
-           it_tasks = null;
-
-           File file=new File(this.state.outputdir + "distributeinfo_s_" + this.coreNum + ".out");
-           FileOutputStream dotstream = null; 
-           try {
-               dotstream = new FileOutputStream(file,false);
-           } catch (Exception e) {
-               e.printStackTrace();
-               System.exit(-1);
-           }
-           PrintWriter output = new java.io.PrintWriter(dotstream, true);
-           output.println("start time(1,000,000 cycles): " + totestscheduleGraphs.size());
-           for(int ii = 0; ii < totestscheduleGraphs.size(); ii++) {
-               Vector<Vector<ScheduleNode>> newscheduleGraphs = 
-                   new Vector<Vector<ScheduleNode>>();
-               newscheduleGraphs.add(totestscheduleGraphs.elementAt(ii));
-               // simulate the generated schedulings and try to optimize it   
-               schedulings.clear();
-               // get scheduling layouts from schedule graphs
-               for(int i = 0; i < newscheduleGraphs.size(); i++) {
-                   Vector<ScheduleNode> scheduleGraph = newscheduleGraphs.elementAt(i);
-                   Vector<Schedule> tmpscheduling = 
-                       generateScheduling(scheduleGraph, multiparamtds);
-                   schedulings.add(tmpscheduling);
-                   scheduleGraph = null;
-                   tmpscheduling = null;
-               }
-               selectedSchedulings.clear();
-               for(int i = 0; i < selectedSimExeGraphs.size(); i++) {
-                   selectedSimExeGraphs.elementAt(i).clear();
-               }
-               selectedSimExeGraphs.clear();
-               long tmpexetime = this.scheduleSimulator.simulate(schedulings, 
-                       selectedSchedulings, 
-                       selectedSimExeGraphs);
-               output.println(((float)tmpexetime/1000000));
-           }
-        
-       } else {
-           // generate multiple schedulings
-           this.scheduleThreshold = 20;
-           this.generateThreshold = 30;
-           this.probThreshold = 0;
-           this.scheduleAnalysis.setScheduleThreshold(1000);
-           this.scheduleAnalysis.schedule(this.generateThreshold);
-
-           Vector<Vector<ScheduleNode>> scheduleGraphs = null;
-           Vector<Vector<ScheduleNode>> totestscheduleGraphs = 
-               this.scheduleAnalysis.getScheduleGraphs();
-           Vector<Vector<Schedule>> schedulings = new Vector<Vector<Schedule>>();
-           Vector<Integer> selectedSchedulings = new Vector<Integer>();
-           Vector<Vector<SimExecutionEdge>> selectedSimExeGraphs = 
-               new Vector<Vector<SimExecutionEdge>>();
-
-           // check all multi-parameter tasks
-           Vector<TaskDescriptor> multiparamtds = new Vector<TaskDescriptor>();
-           Iterator it_tasks = this.state.getTaskSymbolTable().getDescriptorsIterator();
-           while(it_tasks.hasNext()) {
-               TaskDescriptor td = (TaskDescriptor)it_tasks.next();
-               if(td.numParameters() > 1) {
-                   multiparamtds.addElement(td);
-               }
-           }
-           it_tasks = null;
-
-           File file=new File(this.state.outputdir + "distributeinfo_s_" + this.coreNum + ".out");
-           FileOutputStream dotstream = null; 
-           File file2=new File(this.state.outputdir + "distributeinfo_o_" + this.coreNum + ".out");
-           FileOutputStream dotstream2 = null; 
-           try {
-               dotstream = new FileOutputStream(file,false);
-               dotstream2 = new FileOutputStream(file2,false);
-           } catch (Exception e) {
-               e.printStackTrace();
-               System.exit(-1);
-           }
-           PrintWriter output = new java.io.PrintWriter(dotstream, true);
-           PrintWriter output2 = new java.io.PrintWriter(dotstream2, true);
-           output.println("start time(1,000,000 cycles): " + totestscheduleGraphs.size());
-           output2.println("optimized time(1,000,000 cycles): " + totestscheduleGraphs.size());
-           for(int ii = 0; ii < totestscheduleGraphs.size(); ii++) {
-               Vector<Vector<ScheduleNode>> newscheduleGraphs = 
-                   new Vector<Vector<ScheduleNode>>();
-               newscheduleGraphs.add(totestscheduleGraphs.elementAt(ii));
-               int tryindex = 1;
-               long bestexetime = Long.MAX_VALUE;
-               int gid = 1;
-               Vector<Schedule> scheduling = null;
-               Vector<ScheduleNode> schedulinggraph = null;
-               boolean isfirst = true;
-               Random rand = new Random();
-               // simulate the generated schedulings and try to optimize it
-               System.out.print("=========================================================\n");
-               System.out.print("# " + ii + ": \n");
-               do {
-                   System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-                   System.out.print("Simulate and optimize round: #" + tryindex + ": \n");    
-                   gid += newscheduleGraphs.size();
-                   if(scheduleGraphs != null) {
-                       for(int i = 0; i < scheduleGraphs.size(); i++) {
-                           Vector<ScheduleNode> tmpgraph = scheduleGraphs.elementAt(i);
-                           for(int j = 0; j < tmpgraph.size(); j++) {
-                               ScheduleNode snode = tmpgraph.elementAt(j);
-                               snode.getEdgeVector().clear();
-                               snode.getInedgeVector().clear();
-                               snode.getScheduleEdges().clear();
-                               snode.getClassNodes().clear();
-                           }
-                           tmpgraph.clear();
-                           tmpgraph = null;
-                       }
-                       scheduleGraphs.clear();
-                   }
-                   scheduleGraphs = newscheduleGraphs;
-                   schedulings.clear();
-                   // get scheduling layouts from schedule graphs
-                   for(int i = 0; i < scheduleGraphs.size(); i++) {
-                       Vector<ScheduleNode> scheduleGraph = scheduleGraphs.elementAt(i);
-                       Vector<Schedule> tmpscheduling = 
-                           generateScheduling(scheduleGraph, multiparamtds);
-                       schedulings.add(tmpscheduling);
-                       scheduleGraph = null;
-                       tmpscheduling = null;
-                   }
-                   selectedSchedulings.clear();
-                   for(int i = 0; i < selectedSimExeGraphs.size(); i++) {
-                       Vector<SimExecutionEdge> tmpgraph = 
-                           selectedSimExeGraphs.elementAt(i);
-                       for(int j = 0; j < tmpgraph.size(); j++) {
-                           tmpgraph.elementAt(j).destroy();
-                       }
-                       tmpgraph.clear();
-                   }
-                   selectedSimExeGraphs.clear();
-                   long tmpexetime = this.scheduleSimulator.simulate(schedulings, 
-                           selectedSchedulings, 
-                           selectedSimExeGraphs);
-                   if(isfirst) {
-                       output.println(((float)tmpexetime/1000000));
-                       isfirst = false;
-                   }
-                   if(tmpexetime < bestexetime) {
-                       bestexetime = tmpexetime;
-                       if(scheduling != null) {
-                           scheduling.clear();
-                           for(int j = 0; j < schedulinggraph.size(); j++) {
-                               ScheduleNode snode = schedulinggraph.elementAt(j);
-                               snode.getEdgeVector().clear();
-                               snode.getInedgeVector().clear();
-                               snode.getScheduleEdges().clear();
-                               snode.getClassNodes().clear();
-                           }
-                           schedulinggraph.clear();
-                       }
-                       scheduling = schedulings.elementAt(selectedSchedulings.elementAt(0));
-                       schedulinggraph = scheduleGraphs.elementAt(selectedSchedulings.elementAt(0));
-                       tryindex++;
-                       System.out.print("end of: #" + tryindex + " (bestexetime: " + bestexetime + ")\n");
-                       System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-                   } else if(tmpexetime == bestexetime) {
-                       System.out.print("end of: #" + tryindex + " (bestexetime: " + bestexetime + ")\n");
-                       System.out.print("+++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-                       tryindex++;
-                       if((Math.abs(rand.nextInt()) % 100) < this.probThreshold) {
-                           break;
-                       }
-                   } else {
-                       break;
-                   }
-
-                   // try to optimize theschedulings best one scheduling
-                   newscheduleGraphs = optimizeScheduling(scheduleGraphs, 
-                           selectedSchedulings, 
-                           selectedSimExeGraphs,
-                           gid,
-                           this.scheduleThreshold);
-                   if(tmpexetime < bestexetime) {
-                       scheduleGraphs.remove(selectedSchedulings.elementAt(0));
-                   }
-               }while(newscheduleGraphs != null); // TODO: could it possibly lead to endless loop?
-
-               scheduleGraphs.clear();
-               scheduleGraphs = null;
-               scheduling = null;
-               schedulinggraph = null;
-               if(newscheduleGraphs != null) {
-                   newscheduleGraphs.clear();
-               }
-               newscheduleGraphs = null;
-               totestscheduleGraphs.elementAt(ii).clear();
-               for(int i = 0; i < schedulings.size(); i++) {
-                   schedulings.elementAt(i).clear();
-               }
-               schedulings.clear();
-               selectedSchedulings.clear();
-               for(int i = 0; i < selectedSimExeGraphs.size(); i++) {
-                   selectedSimExeGraphs.elementAt(i).clear();
-               }
-               selectedSimExeGraphs.clear();
-
-               output2.println(((float)bestexetime/1000000));
-               System.out.print("=========================================================\n");
-           }
-
-           if(scheduleGraphs != null) {
-               scheduleGraphs.clear();
-           }
-           scheduleGraphs = null;
-           totestscheduleGraphs = null;
-           for(int i = 0; i < schedulings.size(); i++) {
-               schedulings.elementAt(i).clear();
-           }
-           schedulings.clear();
-           schedulings = null;
-           selectedSchedulings.clear();
-           selectedSchedulings = null;
-           selectedSimExeGraphs.clear();
-           selectedSimExeGraphs = null;
-           multiparamtds.clear();
-           multiparamtds = null;
-
-
-           // Close the streams.
-           try {
-               output.close();
-               stdout.close();
-               output = null;
-               stdout = null;
-               System.setOut(origOut);
-           } catch (Exception e) {
-               origOut.println("Redirect:  Unable to close files!");
-           }
-       }
-
-       return;
+
+    // map the tasks associated to SimExecutionedges to original 
+    // ClassNode in the ScheduleGraph and split them from previous 
+    // ScheduleGraph
+    Vector<ScheduleNode> tocombines = new Vector<ScheduleNode>();
+    Iterator<Integer> it_cores = tooptimize.keySet().iterator();
+    while(it_cores.hasNext()) {
+      int corenum = it_cores.next();
+      Vector<SimExecutionEdge> candidatetasks = 
+        tooptimize.get(corenum);
+      for(int i = 0; i < candidatetasks.size(); i++) {
+        TaskDescriptor td = candidatetasks.elementAt(i).getTd();
+        // TODO: currently do not consider multi-param tasks
+        if(td.numParameters() == 1) {
+          ClassDescriptor cd = td.getParamType(0).getClassDesc();
+          ScheduleNode snode = newscheduleGraph.elementAt(corenum); // corresponding ScheduleNode
+          Iterator<ClassNode> it_cnodes = snode.getClassNodesIterator();
+          ClassNode tosplit = null;
+          while(it_cnodes.hasNext()) {
+            ClassNode cnode = it_cnodes.next();
+            if(cnode.getClassDescriptor().equals(cd)) {
+              tosplit= cnode;
+              break;
+            }
+          }
+          it_cnodes = null;
+          // split the node
+          ScheduleNode splitnode = snode.spliteClassNode(tosplit);
+          newscheduleGraph.add(splitnode);
+          tocombines.add(splitnode);
+          tosplit = null;
+        }
+      }
+      candidatetasks = null;
     }
+    it_cores = null;
 
-    private Vector<Vector<ScheduleNode>> optimizeScheduling(Vector<Vector<ScheduleNode>> scheduleGraphs,
-                                                           Vector<Integer> selectedScheduleGraphs,
-                                                           Vector<Vector<SimExecutionEdge>> selectedSimExeGraphs,
-                                                           int gid,
-                                                           int count) {
-       if(this.coreNum == 1) {
-           // single core
-           return null;
-       }
-       
-       Vector<Vector<ScheduleNode>> optimizeschedulegraphs = null;
-       int lgid = gid;
-       int left = count;
-
-       for(int i = 0; i < selectedScheduleGraphs.size(); i++) {
-           Vector<ScheduleNode> schedulegraph = scheduleGraphs.elementAt(
-                   selectedScheduleGraphs.elementAt(i));
-           Vector<SimExecutionEdge> simexegraph = selectedSimExeGraphs.elementAt(i);
-           Vector<SimExecutionEdge> criticalPath = analyzeCriticalPath(simexegraph); 
-           Vector<Vector<ScheduleNode>> tmposchedulegraphs = optimizeCriticalPath(schedulegraph, 
-                                                                                  criticalPath,
-                                                                                  lgid,
-                                                                                  left);
-           if(tmposchedulegraphs != null) {
-               if(optimizeschedulegraphs == null) {
-                   optimizeschedulegraphs = new Vector<Vector<ScheduleNode>>();
-               }
-               optimizeschedulegraphs.addAll(tmposchedulegraphs);
-               lgid += tmposchedulegraphs.size();
-               left -= tmposchedulegraphs.size();
-               if(left == 0) {
-                   schedulegraph = null;
-                   simexegraph = null;
-                   criticalPath = null;
-                   tmposchedulegraphs = null;
-                   break;
-               }
-           }
-           schedulegraph = null;
-           simexegraph = null;
-           criticalPath = null;
-           tmposchedulegraphs = null;
-       }
-
-       return optimizeschedulegraphs;
+    if(tocombines.size() == 0) {
+      return optimizeschedulegraphs;
     }
 
-    private Vector<SimExecutionEdge> analyzeCriticalPath(Vector<SimExecutionEdge> simexegraph) {
-       // first figure out the critical path
-       Vector<SimExecutionEdge> criticalPath = new Vector<SimExecutionEdge>();
-       SimExecutionNode senode = (SimExecutionNode)simexegraph.elementAt(0).getSource();
-       getCriticalPath(senode, criticalPath);
-       computeBestStartPoint(criticalPath);
-       
-       return criticalPath;
+    SchedulingUtil.assignCids(newscheduleGraph);
+
+    // get all the ScheduleEdge
+    Vector<ScheduleEdge> scheduleEdges = new Vector<ScheduleEdge>();
+    for(int i= 0; i < newscheduleGraph.size(); i++) {
+      scheduleEdges.addAll(
+          (Vector<ScheduleEdge>)newscheduleGraph.elementAt(i).getEdgeVector());
     }
-    
-    // TODO: currently only get one critical path. It's possible that there are
-    // multiple critical paths and some of them can not be optimized while others
-    // can. Need to fix up for this situation.
-    private long getCriticalPath(SimExecutionNode senode, 
-                                Vector<SimExecutionEdge> criticalPath) {
-       Vector<SimExecutionEdge> edges = (Vector<SimExecutionEdge>)senode.getEdgeVector();
-       if((edges == null) || (edges.size() == 0)) {
-           edges = null;
-           return 0;
-       }
-       Vector<SimExecutionEdge> subcriticalpath = new Vector<SimExecutionEdge>();
-       SimExecutionEdge edge = edges.elementAt(0);
-       long sum = edge.getWeight() + getCriticalPath((SimExecutionNode)edge.getTarget(),
-                                                     subcriticalpath);
-       criticalPath.clear();
-       criticalPath.add(edge);
-       criticalPath.addAll(subcriticalpath);
-       for(int i = 1; i < edges.size(); i++) {
-           edge = edges.elementAt(i);
-           subcriticalpath.clear();
-           long tmpsum = edge.getWeight() 
-                      + getCriticalPath((SimExecutionNode)edge.getTarget(),
-                                         subcriticalpath);
-           if(tmpsum > sum) {
-               // find a longer path
-               sum = tmpsum;
-               criticalPath.clear();
-               criticalPath.add(edge);
-               criticalPath.addAll(subcriticalpath);
-           }
-       }
-       edges = null;
-       subcriticalpath.clear();
-       subcriticalpath = null;
-       return sum;
+
+    Vector<Vector<ScheduleNode>> rootNodes =  
+      SchedulingUtil.rangeScheduleNodes(roots);
+    Vector<Vector<ScheduleNode>> nodes2combine = 
+      SchedulingUtil.rangeScheduleNodes(tocombines);
+
+    CombinationUtil.CombineGenerator cGen = 
+      CombinationUtil.allocateCombineGenerator(rootNodes, nodes2combine);
+    Random rand = new Random();
+    while ((left > 0) && (cGen.nextGen())) {
+      if(Math.abs(rand.nextInt()) % 100 > this.generateThreshold) {
+        Vector<Vector<CombinationUtil.Combine>> combine = cGen.getCombine();
+        Vector<ScheduleNode> sNodes = 
+          SchedulingUtil.generateScheduleGraph(this.state,
+                                               newscheduleGraph,
+                                               scheduleEdges,
+                                               rootNodes, 
+                                               combine, 
+                                               lgid++);
+        if(optimizeschedulegraphs == null) {
+          optimizeschedulegraphs = new Vector<Vector<ScheduleNode>>();
+        }
+        optimizeschedulegraphs.add(sNodes);
+        combine = null;
+        sNodes = null;
+        left--;
+      }
     }
-    
-    private void computeBestStartPoint(Vector<SimExecutionEdge> criticalPath) {
-       // calculate the earliest start time of each task on the critial path
-       for(int i = 0; i < criticalPath.size(); i++) {
-           SimExecutionEdge seedge = criticalPath.elementAt(i);
-           Vector<SimExecutionEdge> predicates = seedge.getPredicates();
-           if(predicates != null) {
-               // have predicates
-               long starttime = 0;
-               // check the latest finish time of all the predicates
-               for(int j = 0; j < predicates.size(); j++) {
-                   SimExecutionEdge predicate = predicates.elementAt(j);
-                   long tmptime = predicate.getBestStartPoint() + predicate.getWeight();
-                   if(tmptime > starttime) {
-                       starttime = tmptime;
-                       seedge.setLastpredicateEdge(predicate);
-                       if(predicate.getTd() != null) {
-                           seedge.setLastpredicateNode((SimExecutionNode)predicate.getTarget());
-                       } else {
-                           // transfer edge
-                           seedge.setLastpredicateNode((SimExecutionNode)predicate.getSource());
-                       }
-                   }
-               }
-               seedge.setBestStartPoint(starttime);
-           } else if(seedge.getSource().getInedgeVector().size() > 0) {
-               // should have only one in edge
-               long starttime = ((SimExecutionNode)seedge.getSource()).getTimepoint();
-               seedge.setBestStartPoint(starttime);
-           } else {
-               // no predicates
-               seedge.setBestStartPoint(0);
-           }
-           predicates = null;
-       }
+    cGen.clear();
+    rootNodes = null;
+    nodes2combine = null;
+    newscheduleGraph = null;
+    scheduleEdges.clear();
+    scheduleEdges = null;
+    roots = null;
+    tocombines = null;
+
+    return optimizeschedulegraphs;
+  }
+
+  private Vector<ScheduleNode> 
+  cloneScheduleGraph(Vector<ScheduleNode> scheduleGraph,
+                     int gid) {
+    Vector<ScheduleNode> result = new Vector<ScheduleNode>();
+
+    // get all the ScheduleEdge
+    Vector<ScheduleEdge> scheduleEdges = new Vector<ScheduleEdge>();
+    for(int i= 0; i < scheduleGraph.size(); i++) {
+      scheduleEdges.addAll(
+          (Vector<ScheduleEdge>)scheduleGraph.elementAt(i).getEdgeVector());
     }
-
-    private Vector<Vector<ScheduleNode>> optimizeCriticalPath(Vector<ScheduleNode> scheduleGraph,
-                                                             Vector<SimExecutionEdge> criticalPath,
-                                                             int gid,
-                                                             int count) {
-       Vector<Vector<ScheduleNode>> optimizeschedulegraphs = null;
-       int lgid = gid;
-       int left = count;
-       
-       // for test, print out the criticalPath
-       if(this.state.PRINTCRITICALPATH) {
-           SchedulingUtil.printCriticalPath(this.state.outputdir + "criticalpath_" + lgid + ".dot", 
-                                            criticalPath);
-       }
-       
-       // first check all seedges whose real start point is late than predicted
-       // earliest start time and group them
-       long opcheckpoint = Long.MAX_VALUE;
-       Vector<Integer> sparecores = null;
-       // group according to core index
-       Hashtable<Long, Hashtable<Integer, Vector<SimExecutionEdge>>> toselects = 
-           new Hashtable<Long, Hashtable<Integer, Vector<SimExecutionEdge>>>();
-       Random rand = new Random();
-       for(int i = 0; i < criticalPath.size(); i++) {
-           SimExecutionEdge seedge = criticalPath.elementAt(i);
-           long starttime = seedge.getBestStartPoint();
-           if(starttime < ((SimExecutionNode)seedge.getSource()).getTimepoint()) {
-               // no restrictions due to data dependencies
-               // have potential to be parallelled and start execution earlier
-               seedge.setFixedTime(false);
-               // consider to optimize it only when its predicates can NOT 
-               // be optimized, otherwise first considering optimize its predicates
-               SimExecutionEdge lastpredicateedge = seedge.getLastpredicateEdge();
-               if(lastpredicateedge.isFixedTime()) {                   
-                   int corenum = seedge.getCoreNum();
-                   if(!toselects.containsKey(starttime)) {
-                       toselects.put(starttime, 
-                               new Hashtable<Integer, Vector<SimExecutionEdge>>());
-                   }
-                   if(!toselects.get(starttime).containsKey(corenum)) {
-                       toselects.get(starttime).put(corenum, 
-                               new Vector<SimExecutionEdge>());
-                   }
-                   toselects.get(starttime).get(corenum).add(seedge);
-               }
-           }
-       }
-       
-       // Randomly choose the tasks to optimize(previously only 
-       // consider the tasks with smallest best start time)
-       Vector<Long> keys = new Vector<Long>(toselects.keySet());
-       do{
-           int length = keys.size();
-           if(length == 0) {
-               return optimizeschedulegraphs;
-           }
-           int tochoose = Math.abs(rand.nextInt()) % length;
-           opcheckpoint = (keys.elementAt(tochoose)).longValue();
-           keys.removeElementAt(tochoose);
-           Hashtable<Integer, Vector<SimExecutionEdge>> tooptimize = 
-               toselects.get(opcheckpoint);
-           SimExecutionEdge seedge = tooptimize.values().iterator().next().elementAt(0);
-           SimExecutionNode lastpredicatenode = seedge.getLastpredicateNode();
-           SimExecutionEdge lastpredicateedge = seedge.getLastpredicateEdge();
-           long timepoint = lastpredicatenode.getTimepoint();
-           if(lastpredicateedge.getTd() == null) {
-               // transfer edge
-               timepoint += lastpredicateedge.getWeight();
-           }
-           // mapping to critical path
-           for(int index = 0; index < criticalPath.size(); index++) {
-               SimExecutionEdge tmpseedge = criticalPath.elementAt(index);
-               SimExecutionNode tmpsenode = 
-                   (SimExecutionNode)tmpseedge.getTarget();
-               if(tmpsenode.getTimepoint() > timepoint) {
-                   // get the spare core info
-                   sparecores = tmpsenode.getSpareCores();
-                   break;
-               }
-           }
-
-           if(tooptimize.size() > 0) {
-               Iterator<Integer> it_cores = tooptimize.keySet().iterator();
-               // check if it is possible to optimize these tasks
-               if((sparecores == null) || (sparecores.size() == 0)) {
-                   // lack of spare cores
-                   while(it_cores.hasNext()) {
-                       int corenum = it_cores.next();
-                       Vector<SimExecutionEdge> tmptasks = tooptimize.get(corenum);
-                       // group the task instantiations according to whether it 
-                       // has backward data dependences or not
-                       Vector<SimExecutionEdge> candidatetasks = new Vector();
-                       for(int ii= 0; ii < tmptasks.size(); ii++) {
-                           SimExecutionEdge tmpseedge = tmptasks.elementAt(ii);
-                           SimExecutionNode target = (SimExecutionNode)tmpseedge.getTarget();
-                           Vector<SimExecutionEdge> children = 
-                               (Vector<SimExecutionEdge>)target.getEdgeVector();
-                           int jj = 0;
-                           for(; jj < children.size(); jj++) {
-                               SimExecutionEdge tmpedge = children.elementAt(jj);
-                               if(tmpedge.getTd() != null) {
-                                   Vector<SimExecutionEdge> predicates = 
-                                       tmpedge.getPredicates();
-                                   if((predicates != null) && 
-                                           (predicates.contains(tmpseedge))) {
-                                       break;
-                                   }
-                                   predicates = null;
-                               } else if(tmpedge.getWeight() != 0) {
-                                   // transfer edge
-                                   if(((SimExecutionNode)tmpedge.getTarget()).getTimepoint() 
-                                           == tmpedge.getWeight() + target.getTimepoint()) {
-                                       break;
-                                   }
-                               }
-                           }
-                           if(jj == children.size()) {
-                               candidatetasks.add(tmpseedge);
-                           }
-                       }
-                       if((candidatetasks.size() > 0) && 
-                               (candidatetasks.size() < tmptasks.size())) {
-                           // there are less important tasks which have no backward
-                           // data dependences at this timepoint, try to change 
-                           // original execution order
-                           Hashtable<Integer, Vector<SimExecutionEdge>> tooptimize2 = 
-                               new Hashtable<Integer, Vector<SimExecutionEdge>>();
-                           tooptimize2.put(corenum, candidatetasks);
-                           Vector<Vector<ScheduleNode>> ops = innerOptimizeCriticalPath(scheduleGraph,
-                                   tooptimize2,
-                                   null,
-                                   lgid,
-                                   left);
-                           if(ops != null) {
-                               if(optimizeschedulegraphs == null) {
-                                   optimizeschedulegraphs = new Vector<Vector<ScheduleNode>>();
-                               }
-                               optimizeschedulegraphs.addAll(ops);
-                               lgid += ops.size();
-                               left -= ops.size();
-                           }
-                           tooptimize2 = null;
-                           ops = null;
-                       }
-                       tmptasks = null;
-                       candidatetasks = null;
-                   }
-
-                   if(left == 0) {
-                       it_cores = null;
-                       return optimizeschedulegraphs;
-                   }
-
-                   // flush the dependences and earliest start time
-                   it_cores = tooptimize.keySet().iterator();
-                   while(it_cores.hasNext()) {
-                       int corenum = it_cores.next();
-                       Vector<SimExecutionEdge> edgevec = 
-                           tooptimize.get(corenum);
-                       for(int j = 0; j < edgevec.size(); j++) {
-                           SimExecutionEdge edge = edgevec.elementAt(j);
-                           lastpredicateedge = edge.getLastpredicateEdge();
-                           lastpredicatenode = edge.getLastpredicateNode();
-                           // if(edge.getCoreNum() != lastpredicate.getCoreNum()) // should never hit this
-                           timepoint = lastpredicatenode.getTimepoint();
-                           if(lastpredicateedge.getTd() == null) {
-                               // transfer edge
-                               timepoint += lastpredicateedge.getWeight();
-                           }
-                           // mapping to critical path
-                           for(int index = 0; index < criticalPath.size(); index++) {
-                               SimExecutionEdge tmpseedge = criticalPath.elementAt(index);
-                               SimExecutionNode tmpsenode = 
-                                   (SimExecutionNode)tmpseedge.getTarget();
-                               if(tmpsenode.getTimepoint() > timepoint) {
-                                   // update the predicate info
-                                   if(edge.getPredicates() != null) {
-                                       edge.getPredicates().remove(lastpredicateedge);
-                                   }
-                                   edge.addPredicate(criticalPath.elementAt(index));
-                                   break;
-                               }
-                           }
-                       }
-                       edgevec = null;
-                   }
-                   it_cores = null;
-                   computeBestStartPoint(criticalPath);
-                   Vector<Vector<ScheduleNode>> ops = optimizeCriticalPath(scheduleGraph, 
-                           criticalPath, 
-                           lgid,
-                           left);
-                   if(ops != null) {
-                       if(optimizeschedulegraphs == null) {
-                           optimizeschedulegraphs = new Vector<Vector<ScheduleNode>>();
-                       }
-                       optimizeschedulegraphs.addAll(ops);
-                       lgid += ops.size();
-                       left -= ops.size();
-                   }
-                   ops = null;
-               } else {
-                   // there are spare cores, try to reorganize the tasks to the spare 
-                   // cores
-                   Vector<Vector<ScheduleNode>> ops = innerOptimizeCriticalPath(scheduleGraph,
-                           tooptimize,
-                           sparecores,
-                           lgid,
-                           left);
-                   if(ops != null) {
-                       if(optimizeschedulegraphs == null) {
-                           optimizeschedulegraphs = new Vector<Vector<ScheduleNode>>();
-                       }
-                       optimizeschedulegraphs.addAll(ops);
-                       lgid += ops.size();
-                       left -= ops.size();
-                   }
-                   ops = null;
-               }
-           }
-           sparecores = null;
-           tooptimize.clear();
-           tooptimize = null;
-       }while(left > 0);
-       toselects.clear();
-       toselects = null;
-
-       return optimizeschedulegraphs;
+    Hashtable<ScheduleNode, Hashtable<ClassNode, ClassNode>> sn2hash = 
+      new Hashtable<ScheduleNode, Hashtable<ClassNode, ClassNode>>();
+    Hashtable<ScheduleNode, ScheduleNode> sn2sn = 
+      new Hashtable<ScheduleNode, ScheduleNode>();
+    SchedulingUtil.cloneScheduleGraph(scheduleGraph,
+        scheduleEdges,
+        sn2hash,
+        sn2sn,
+        result,
+        gid);
+
+    SchedulingUtil.assignCids(result);
+    scheduleEdges.clear();
+    scheduleEdges = null;
+    sn2hash.clear();
+    sn2hash = null;
+    sn2sn.clear();
+    sn2sn = null;
+
+    return result;
+  }
+
+  private Vector<Schedule> 
+  generateScheduling(Vector<ScheduleNode> scheduleGraph,
+                     Hashtable<TaskDescriptor, ClassDescriptor> td2maincd) {
+    Hashtable<TaskDescriptor, Vector<Schedule>> td2cores = 
+      new Hashtable<TaskDescriptor, Vector<Schedule>>(); // multiparam tasks reside on which cores
+    Vector<Schedule> scheduling = new Vector<Schedule>(scheduleGraph.size());
+    // for each ScheduleNode create a schedule node representing a core
+    Hashtable<ScheduleNode, Integer> sn2coreNum = 
+      new Hashtable<ScheduleNode, Integer>();
+    Hashtable<TaskDescriptor, Integer> td2maincore = 
+      new Hashtable<TaskDescriptor, Integer>();
+    Hashtable<TaskDescriptor, Vector<Schedule>> td2allycores = 
+      new Hashtable<TaskDescriptor, Vector<Schedule>>();
+    int j = 0;
+    for(j = 0; j < scheduleGraph.size(); j++) {
+      sn2coreNum.put(scheduleGraph.elementAt(j), j);
     }
-    
-    private Vector<Vector<ScheduleNode>> innerOptimizeCriticalPath(Vector<ScheduleNode> scheduleGraph,
-                                                                  Hashtable<Integer, Vector<SimExecutionEdge>> tooptimize,
-                                                                  Vector<Integer> sparecores,
-                                                                  int gid,
-                                                                  int count) {
-       int lgid = gid;
-       int left = count;
-       Vector<Vector<ScheduleNode>> optimizeschedulegraphs = null;
-       
-       // first clone the whole graph
-       Vector<ScheduleNode> newscheduleGraph = 
-           cloneScheduleGraph(scheduleGraph, lgid);
-
-       // these nodes are root nodes
-       Vector<ScheduleNode> roots = new Vector<ScheduleNode>();
-       for(int i = 0; i < newscheduleGraph.size(); i++) {
-           if((sparecores == null) || (sparecores.contains(i))) {
-               roots.add(newscheduleGraph.elementAt(i));
-           }
-       }
-
-       // map the tasks associated to SimExecutionedges to original 
-       // ClassNode in the ScheduleGraph and split them from previous 
-       // ScheduleGraph
-       Vector<ScheduleNode> tocombines = new Vector<ScheduleNode>();
-       Iterator<Integer> it_cores = tooptimize.keySet().iterator();
-       while(it_cores.hasNext()) {
-           int corenum = it_cores.next();
-           Vector<SimExecutionEdge> candidatetasks = 
-               tooptimize.get(corenum);
-           for(int i = 0; i < candidatetasks.size(); i++) {
-               TaskDescriptor td = candidatetasks.elementAt(i).getTd();
-               // TODO: currently do not consider multi-param tasks
-               if(td.numParameters() == 1) {
-                   ClassDescriptor cd = td.getParamType(0).getClassDesc();
-                   ScheduleNode snode = newscheduleGraph.elementAt(corenum); // corresponding ScheduleNode
-                   Iterator<ClassNode> it_cnodes = snode.getClassNodesIterator();
-                   ClassNode tosplit = null;
-                   while(it_cnodes.hasNext()) {
-                       ClassNode cnode = it_cnodes.next();
-                       if(cnode.getClassDescriptor().equals(cd)) {
-                           tosplit= cnode;
-                           break;
-                       }
-                   }
-                   it_cnodes = null;
-                   // split the node
-                   ScheduleNode splitnode = snode.spliteClassNode(tosplit);
-                   newscheduleGraph.add(splitnode);
-                   tocombines.add(splitnode);
-                   tosplit = null;
-               }
-           }
-           candidatetasks = null;
-       }
-       it_cores = null;
-       
-       if(tocombines.size() == 0) {
-           return optimizeschedulegraphs;
-       }
-
-       SchedulingUtil.assignCids(newscheduleGraph);
-
-       // get all the ScheduleEdge
-       Vector<ScheduleEdge> scheduleEdges = new Vector<ScheduleEdge>();
-       for(int i= 0; i < newscheduleGraph.size(); i++) {
-           scheduleEdges.addAll((Vector<ScheduleEdge>)newscheduleGraph.elementAt(i).getEdgeVector());
-       }
-
-       Vector<Vector<ScheduleNode>> rootNodes =  
-           SchedulingUtil.rangeScheduleNodes(roots);
-       Vector<Vector<ScheduleNode>> nodes2combine = 
-           SchedulingUtil.rangeScheduleNodes(tocombines);
-
-       CombinationUtil.CombineGenerator cGen = 
-           CombinationUtil.allocateCombineGenerator(rootNodes, nodes2combine);
-       Random rand = new Random();
-       while ((left > 0) && (cGen.nextGen())) {
-           if(Math.abs(rand.nextInt()) % 100 > this.generateThreshold) {
-               Vector<Vector<CombinationUtil.Combine>> combine = cGen.getCombine();
-               Vector<ScheduleNode> sNodes = SchedulingUtil.generateScheduleGraph(this.state,
-                                                                                  newscheduleGraph,
-                                                                                  scheduleEdges,
-                                                                                  rootNodes, 
-                                                                                  combine, 
-                                                                                  lgid++);
-               if(optimizeschedulegraphs == null) {
-                   optimizeschedulegraphs = new Vector<Vector<ScheduleNode>>();
-               }
-               optimizeschedulegraphs.add(sNodes);
-               combine = null;
-               sNodes = null;
-               left--;
-           }
-       }
-       cGen.clear();
-       rootNodes = null;
-       nodes2combine = null;
-       newscheduleGraph = null;
-       scheduleEdges.clear();
-       scheduleEdges = null;
-       roots = null;
-       tocombines = null;
-       
-       return optimizeschedulegraphs;
+    int startupcore = 0;
+    boolean setstartupcore = false;
+    Schedule startup = null;
+    int gid = scheduleGraph.elementAt(0).getGid();
+    for(j = 0; j < scheduleGraph.size(); j++) {
+      Schedule tmpSchedule = new Schedule(j, gid);
+      ScheduleNode sn = scheduleGraph.elementAt(j);
+
+      Vector<ClassNode> cNodes = sn.getClassNodes();
+      for(int k = 0; k < cNodes.size(); k++) {
+        ClassNode cNode = cNodes.elementAt(k);
+        ClassDescriptor cd = cNode.getClassDescriptor();
+        Iterator it_flags = cNode.getFlags();
+        while(it_flags.hasNext()) {
+          FlagState fs = (FlagState)it_flags.next();
+          Iterator it_edges = fs.edges();
+          while(it_edges.hasNext()) {
+            FEdge tmpfe = (FEdge)it_edges.next();
+            TaskDescriptor td = (tmpfe).getTask();
+            boolean contain = true;
+            if(td.numParameters() > 1) {
+              // td is a multi-param task, check if this core contains the 
+              // main cd of it
+              if(td2maincd.get(td).equals(cd)) {
+                contain = true;
+                td2maincore.put(td, tmpSchedule.getCoreNum());
+              } else {
+                contain = false;
+                if(!td2allycores.containsKey(td)) {
+                  td2allycores.put(td, new Vector<Schedule>());
+                }
+                Vector<Schedule> allycores = td2allycores.get(td);
+                if(!allycores.contains(tmpSchedule)) {
+                  allycores.addElement(tmpSchedule);
+                }
+                allycores = null;
+              }
+              // If the FlagState can be fed to some multi-param tasks,
+              // need to record corresponding ally cores later.  
+              tmpSchedule.addFState4TD(td, fs);
+            }
+            if(contain) {
+              tmpSchedule.addTask(td);
+              if(!td2cores.containsKey(td)) {
+                td2cores.put(td, new Vector<Schedule>());
+              }
+              Vector<Schedule> tmpcores = td2cores.get(td);
+              if(!tmpcores.contains(tmpSchedule)) {
+                tmpcores.add(tmpSchedule);
+              }
+              tmpcores = null;
+            }
+            if(td.getParamType(0).getClassDesc().getSymbol().equals(
+                TypeUtil.StartupClass)) {
+              assert(!setstartupcore);
+              startupcore = j;
+              startup = tmpSchedule;
+              setstartupcore = true;
+            }
+          }
+          it_edges = null;
+        }
+        it_flags = null;
+      }
+      cNodes = null;
+
+      // For each of the ScheduleEdge out of this ScheduleNode, add the 
+      // target ScheduleNode into the queue inside sn
+      Iterator it_edges = sn.edges();
+      while(it_edges.hasNext()) {
+        ScheduleEdge se = (ScheduleEdge)it_edges.next();
+        ScheduleNode target = (ScheduleNode)se.getTarget();
+        Integer targetcore = sn2coreNum.get(target);
+        switch(se.getType()) {
+        case ScheduleEdge.NEWEDGE: {
+          FlagState fs = se.getFstate();
+          // Check if the new obj could be fed to some 
+          // multi-parameter task, if so, add for ally cores 
+          // checking
+          Iterator it = fs.edges();
+          boolean canTriggerSTask = false; // Flag indicates if fs can trigger
+                                           // some single-param task
+          while(it.hasNext()) {
+            TaskDescriptor td = ((FEdge)it.next()).getTask();
+            if(td.numParameters() > 1) {
+              tmpSchedule.addFState4TD(td, fs);  // TODO    
+            } else {
+              canTriggerSTask = true;
+            }
+          }
+          for(int k = 0; k < se.getNewRate(); k++) {
+            if(canTriggerSTask) {
+              // Only transfer the obj when it can trigger some single-parm task
+              // TODO: ensure that multi-param tasks have these objects
+              tmpSchedule.addTargetCore(fs, targetcore);
+            }
+          }
+          break;
+        }
+
+        case ScheduleEdge.TRANSEDGE: {
+          // 'transmit' edge
+          tmpSchedule.addTargetCore(se.getFstate(), 
+                                    targetcore, 
+                                    se.getTargetFState());
+          // check if missed some FlagState associated with some 
+          // multi-parameter task, which has been cloned when 
+          // splitting a ClassNode
+          FlagState fs = se.getSourceFState();
+          FlagState tfs = se.getTargetFState();
+          Iterator it = tfs.edges();
+          while(it.hasNext()) {
+            TaskDescriptor td = ((FEdge)it.next()).getTask();
+            if(td.numParameters() > 1) {
+              tmpSchedule.addFState4TD(td, fs);
+            }
+          }
+          break;
+        }
+        }
+      }
+      it_edges = sn.getScheduleEdgesIterator();
+      while(it_edges.hasNext()) {
+        ScheduleEdge se = (ScheduleEdge)it_edges.next();
+        switch(se.getType()) {
+        case ScheduleEdge.NEWEDGE: {
+          // TODO, added 09/07/06
+          FlagState fs = se.getFstate();
+          // Check if the new obj could be fed to some 
+          // multi-parameter task, if so, add for ally cores 
+          // checking
+          Iterator it = fs.edges();
+          boolean canTriggerSTask = false; // Flag indicates if fs can trigger
+                                           // some single-param task
+          while(it.hasNext()) {
+            TaskDescriptor td = ((FEdge)it.next()).getTask();
+            if(td.numParameters() > 1) {
+              tmpSchedule.addFState4TD(td, fs);  // TODO    
+            } else {
+              canTriggerSTask = true;
+            }
+          }
+          for(int k = 0; k < se.getNewRate(); k++) {
+            if(canTriggerSTask) {
+              tmpSchedule.addTargetCore(se.getFstate(), j);
+            }
+          }
+          break;
+        }
+
+        case ScheduleEdge.TRANSEDGE: {
+          // 'transmit' edge
+          tmpSchedule.addTargetCore(se.getFstate(), 
+                                    j, 
+                                    se.getTargetFState());
+          break;
+        }
+        }
+      }
+      it_edges = null;
+      scheduling.add(tmpSchedule);
     }
-    
-    private Vector<ScheduleNode> cloneScheduleGraph(Vector<ScheduleNode> scheduleGraph,
-                                                   int gid) {
-       Vector<ScheduleNode> result = new Vector<ScheduleNode>();
-       
-       // get all the ScheduleEdge
-       Vector<ScheduleEdge> scheduleEdges = new Vector<ScheduleEdge>();
-       for(int i= 0; i < scheduleGraph.size(); i++) {
-           scheduleEdges.addAll((Vector<ScheduleEdge>)scheduleGraph.elementAt(i).getEdgeVector());
-       }
-       Hashtable<ScheduleNode, Hashtable<ClassNode, ClassNode>> sn2hash = 
-           new Hashtable<ScheduleNode, Hashtable<ClassNode, ClassNode>>();
-       Hashtable<ScheduleNode, ScheduleNode> sn2sn = 
-           new Hashtable<ScheduleNode, ScheduleNode>();
-       SchedulingUtil.cloneScheduleGraph(scheduleGraph,
-                                         scheduleEdges,
-                                         sn2hash,
-                                         sn2sn,
-                                         result,
-                                         gid);
-       
-       SchedulingUtil.assignCids(result);
-       scheduleEdges.clear();
-       scheduleEdges = null;
-       sn2hash.clear();
-       sn2hash = null;
-       sn2sn.clear();
-       sn2sn = null;
-       
-       return result;
+
+    int number = this.coreNum;
+    if(scheduling.size() < number) {
+      number = scheduling.size();
     }
 
-    private Vector<Schedule> generateScheduling(Vector<ScheduleNode> scheduleGraph,
-                                               Vector<TaskDescriptor> multiparamtds) {
-       Hashtable<TaskDescriptor, Vector<Schedule>> td2cores = 
-           new Hashtable<TaskDescriptor, Vector<Schedule>>(); // multiparam tasks reside on which cores
-       Vector<Schedule> scheduling = new Vector<Schedule>(scheduleGraph.size());
-       // for each ScheduleNode create a schedule node representing a core
-       Hashtable<ScheduleNode, Integer> sn2coreNum = 
-           new Hashtable<ScheduleNode, Integer>();
-       int j = 0;
-       for(j = 0; j < scheduleGraph.size(); j++) {
-           sn2coreNum.put(scheduleGraph.elementAt(j), j);
-       }
-       int startupcore = 0;
-       boolean setstartupcore = false;
-       Schedule startup = null;
-       int gid = scheduleGraph.elementAt(0).getGid();
-       for(j = 0; j < scheduleGraph.size(); j++) {
-           Schedule tmpSchedule = new Schedule(j, gid);
-           ScheduleNode sn = scheduleGraph.elementAt(j);
-
-           Vector<ClassNode> cNodes = sn.getClassNodes();
-           for(int k = 0; k < cNodes.size(); k++) {
-               Iterator it_flags = cNodes.elementAt(k).getFlags();
-               Vector rootnodes = this.taskAnalysis.getRootNodes(
-                       cNodes.elementAt(k).getClassDescriptor());
-               while(it_flags.hasNext()) {
-                   FlagState fs = (FlagState)it_flags.next();
-                   Iterator it_edges = fs.edges();
-                   while(it_edges.hasNext()) {
-                       FEdge tmpfe = (FEdge)it_edges.next();
-                       TaskDescriptor td = (tmpfe).getTask();
-                       tmpSchedule.addTask(td);
-                       if(!td2cores.containsKey(td)) {
-                           td2cores.put(td, new Vector<Schedule>());
-                       }
-                       Vector<Schedule> tmpcores = td2cores.get(td);
-                       if(!tmpcores.contains(tmpSchedule)) {
-                           tmpcores.add(tmpSchedule);
-                       }
-                       tmpcores = null;
-                       // If the FlagState can be fed to some multi-param tasks,
-                       // need to record corresponding ally cores later.  
-                       if(td.numParameters() > 1) {
-                           tmpSchedule.addFState4TD(td, fs);
-                       }
-                       if(td.getParamType(0).getClassDesc().getSymbol().equals(
-                               TypeUtil.StartupClass)) {
-                           assert(!setstartupcore);
-                           startupcore = j;
-                           startup = tmpSchedule;
-                           setstartupcore = true;
-                       }
-                   }
-                   it_edges = null;
-               }
-               it_flags = null;
-           }
-           cNodes = null;
-
-           // For each of the ScheduleEdge out of this ScheduleNode, add the 
-           // target ScheduleNode into the queue inside sn
-           Iterator it_edges = sn.edges();
-           while(it_edges.hasNext()) {
-               ScheduleEdge se = (ScheduleEdge)it_edges.next();
-               ScheduleNode target = (ScheduleNode)se.getTarget();
-               Integer targetcore = sn2coreNum.get(target);
-               switch(se.getType()) {
-               case ScheduleEdge.NEWEDGE: {
-                   for(int k = 0; k < se.getNewRate(); k++) {
-                       FlagState fs = se.getFstate();
-                       tmpSchedule.addTargetCore(fs, targetcore);
-                       // Check if the new obj could be fed to some 
-                       // multi-parameter task, if so, add for ally cores 
-                       // checking
-                       Iterator it = fs.edges();
-                       while(it.hasNext()) {
-                           TaskDescriptor td = ((FEdge)it.next()).getTask();
-                           if(td.numParameters() > 1) {
-                               tmpSchedule.addFState4TD(td, fs);       
-                           }
-                       }
-                   }
-                   break;
-               }
-
-               case ScheduleEdge.TRANSEDGE: {
-                   // 'transmit' edge
-                   tmpSchedule.addTargetCore(se.getFstate(), 
-                                             targetcore, 
-                                             se.getTargetFState());
-                   // check if missed some FlagState associated with some 
-                   // multi-parameter task, which has been cloned when 
-                   // splitting a ClassNode
-                   FlagState fs = se.getSourceFState();
-                   FlagState tfs = se.getTargetFState();
-                   Iterator it = tfs.edges();
-                   while(it.hasNext()) {
-                       TaskDescriptor td = ((FEdge)it.next()).getTask();
-                       if(td.numParameters() > 1) {
-                           tmpSchedule.addFState4TD(td, fs);
-                       }
-                   }
-                   break;
-               }
-               }
-           }
-           it_edges = sn.getScheduleEdgesIterator();
-           while(it_edges.hasNext()) {
-               ScheduleEdge se = (ScheduleEdge)it_edges.next();
-               switch(se.getType()) {
-               case ScheduleEdge.NEWEDGE: {
-                   for(int k = 0; k < se.getNewRate(); k++) {
-                       tmpSchedule.addTargetCore(se.getFstate(), j);
-                   }
-                   break;
-               }
-
-               case ScheduleEdge.TRANSEDGE: {
-                   // 'transmit' edge
-                   tmpSchedule.addTargetCore(se.getFstate(), 
-                                             j, 
-                                             se.getTargetFState());
-                   break;
-               }
-               }
-           }
-           it_edges = null;
-           scheduling.add(tmpSchedule);
-       }
-
-       int number = this.coreNum;
-       if(scheduling.size() < number) {
-           number = scheduling.size();
-       }
-
-       // set up all the associate ally cores
-       if(multiparamtds.size() > 0) {
-           for(j = 0; j < multiparamtds.size(); ++j) {
-               TaskDescriptor td = multiparamtds.elementAt(j);
-               Vector<FEdge> fes = 
-                   (Vector<FEdge>) this.taskAnalysis.getFEdgesFromTD(td);
-               Vector<Schedule> cores = td2cores.get(td);
-               for(int k = 0; k < cores.size(); ++k) {
-                   Schedule tmpSchedule = cores.elementAt(k);
-
-                   // Make sure all the parameter objs of a multi-parameter 
-                   // task would be send to right place
-                   for(int h = 0; h < fes.size(); ++h) {
-                       FEdge tmpfe = fes.elementAt(h);
-                       FlagState tmpfs = (FlagState)tmpfe.getTarget();
-                       Vector<TaskDescriptor> tmptds = 
-                           new Vector<TaskDescriptor>();
-                       if((tmpSchedule.getTargetCoreTable() == null) 
-                               || (!tmpSchedule.getTargetCoreTable().containsKey(tmpfs))) {
-                           // add up all possible cores' info
-                           Iterator it_edges = tmpfs.edges();
-                           while(it_edges.hasNext()) {
-                               TaskDescriptor tmptd = ((FEdge)it_edges.next()).getTask();
-                               if(!tmptds.contains(tmptd)) {
-                                   tmptds.add(tmptd);
-                                   Vector<Schedule> tmpcores = td2cores.get(tmptd);
-                                   for(int m = 0; m < tmpcores.size(); ++m) {
-                                       if(m != tmpSchedule.getCoreNum()) {
-                                           // if the FlagState can be fed to some multi-param tasks,
-                                           // need to record corresponding ally cores later
-                                           if(tmptd.numParameters() > 1) {
-                                               tmpSchedule.addAllyCore(tmpfs, 
-                                                                       tmpcores.elementAt(m).getCoreNum());
-                                           } else {
-                                               tmpSchedule.addTargetCore(tmpfs, 
-                                                                         tmpcores.elementAt(m).getCoreNum());
-                                           }
-                                       }
-                                   }
-                                   tmpcores = null;
-                               }
-                           }
-                           it_edges = null;
-                       }
-                       tmptds = null;
-                   }
-
-                   // Make sure all objs which could be feed to a multi-parameter
-                   // task would be send to all the possible task instances
-                   if(cores.size() > 1) {
-                       Vector<FlagState> tmpfss = tmpSchedule.getFStates4TD(td);
-                       for(int h = 0; h < tmpfss.size(); ++h) {
-                           for(int l = 0; l < cores.size(); ++l) {
-                               if(l != k) {
-                                   tmpSchedule.addAllyCore(tmpfss.elementAt(h), 
-                                                           cores.elementAt(l).getCoreNum());
-                               }
-                           }
-                       }
-                       tmpfss = null;
-                   }
-               }
-               fes = null;
-               cores = null;
-           }
-       }
-       td2cores = null;
-       sn2coreNum = null;
-
-       return scheduling;
+    Iterator<TaskDescriptor> it_mptds = td2maincd.keySet().iterator();
+    // set up all the associate ally cores
+    while(it_mptds.hasNext()) {
+      TaskDescriptor td = it_mptds.next();
+      Vector<FEdge> fes = (Vector<FEdge>) this.taskAnalysis.getFEdgesFromTD(td);
+      Vector<Schedule> cores = td2cores.get(td); 
+      assert(cores.size() == 1); // should have only one core
+      for(int k = 0; k < cores.size(); ++k) {
+        Schedule tmpSchedule = cores.elementAt(k);
+
+        // Make sure all the parameter objs of a multi-parameter 
+        // task would be send to right place after the task finished
+        for(int h = 0; h < fes.size(); ++h) {
+          FEdge tmpfe = fes.elementAt(h);
+          FlagState tmpfs = (FlagState)tmpfe.getTarget();
+          Vector<TaskDescriptor> tmptds = new Vector<TaskDescriptor>();
+          if((tmpSchedule.getTargetCoreTable() == null) 
+              || (!tmpSchedule.getTargetCoreTable().containsKey(tmpfs))) {
+            // add up all possible cores' info
+            Iterator it_edges = tmpfs.edges();
+            while(it_edges.hasNext()) {
+              TaskDescriptor tmptd = ((FEdge)it_edges.next()).getTask();
+              if(!tmptds.contains(tmptd)) {
+                tmptds.add(tmptd);
+                Vector<Schedule> tmpcores = td2cores.get(tmptd);
+                for(int m = 0; m < tmpcores.size(); ++m) {
+                  Schedule target = tmpcores.elementAt(m);
+                  int targetcore = target.getCoreNum();
+                  int num = target.getTaskNum(tmptd);
+                  for(int n = 0; n < num; n++) {
+                    tmpSchedule.addTargetCore(tmpfs, targetcore);
+                  }
+                }
+                tmpcores = null;
+              }
+            }
+            it_edges = null;
+          }
+          tmptds = null;
+        }
+      }
+      fes = null;
+      cores = null;
+    }
+    // Make sure all objs which could be feed to a multi-parameter
+    // task would be send to all the possible task instances
+    it_mptds = td2allycores.keySet().iterator();
+    while(it_mptds.hasNext()) {
+      TaskDescriptor td = it_mptds.next();
+      Vector<Schedule> allycores = td2allycores.get(td);
+      for(int i = 0; i < allycores.size(); i++) {
+        Schedule tSchedule = allycores.elementAt(i);
+        Vector<FlagState> tmpfss = tSchedule.getFStates4TD(td);
+        int targetcore = td2maincore.get(td).intValue();
+        for(int h = 0; h < tmpfss.size(); ++h) {
+          tSchedule.addAllyCore(tmpfss.elementAt(h), targetcore);
+        }
+        tmpfss = null;
+      }
     }
+    it_mptds = null;
+    td2cores = null;
+    sn2coreNum = null;
+    td2maincore = null;
+    td2allycores = null;
+
+    return scheduling;
+  }
 }
index 27b2d7e16a568bba126938e1db773abb1f9afae4..100b85389d86ca8b49107ee13feb7ab19014094f 100644 (file)
@@ -14,6 +14,9 @@ public class ObjectSimulator {
   boolean shared;
   boolean hold;
   int version;
+  
+  // TODO, crack for KMeans
+  int counter;
 
   public ObjectSimulator(ClassDescriptor cd, 
                         FlagState currentFS) {
@@ -25,12 +28,25 @@ public class ObjectSimulator {
     this.shared = false;
     this.hold = false;
     this.version = 0;
+    if(this.cd.getSymbol().equals("Cluster")) {
+      this.counter = 83 * 2 + 1; //102 * 2 + 1; //83 * 2 + 1;
+    } else {
+      this.counter = -1;
+    }
   }
 
   public void applyEdge(FEdge fedge) {
     if(!currentFS.equals((FlagState)fedge.getTarget())) {
       this.changed = true;
       currentFS = (FlagState)fedge.getTarget();
+      if(this.counter > 0) {
+        //System.err.println(this.counter);
+        this.counter--;
+      }
+      if((this.cd.getSymbol().equals("Cluster")) && (this.counter == 0)) {
+        // go to end state
+        this.currentFS = new FlagState(this.cd);
+      }
     } else {
       this.changed = false;
     }
@@ -80,4 +96,4 @@ public class ObjectSimulator {
   public void increaseVersion() {
     this.version++;
   }
-}
\ No newline at end of file
+}
index 7adfc80eaffae9c587764f9120abb553cd31d0f5..6bd28bb4377603eb051b967b694aeaffceaa5060 100644 (file)
@@ -14,6 +14,7 @@ public class Schedule {
   private int gid;
   private int coreNum;
   private Vector<TaskDescriptor> tasks;
+  private Hashtable<TaskDescriptor, Integer> td2num;
   private Hashtable<FlagState, Queue<Integer>> targetCores;
   private Hashtable<FlagState, FlagState> targetFState;   // only affected by transimit edges
   private Hashtable<FlagState, Vector<Integer>> allyCores;
@@ -24,6 +25,7 @@ public class Schedule {
     this.gid = gid;
     this.coreNum = coreNum;
     this.tasks = null;
+    this.td2num = null;
     this.targetCores = null;
     this.targetFState = null;
     this.allyCores = null;
@@ -143,9 +145,17 @@ public class Schedule {
   public void addTask(TaskDescriptor task) {
     if(this.tasks == null) {
       this.tasks = new Vector<TaskDescriptor>();
+      this.td2num = new Hashtable<TaskDescriptor, Integer>();
     }
     if(!this.tasks.contains(task)) {
       this.tasks.add(task);
+      this.td2num.put(task, 1);
+    } else {
+      this.td2num.put(task, this.td2num.get(task).intValue()+1);
     }
   }
+  
+  public int getTaskNum(TaskDescriptor task) {
+    return this.td2num.get(task);
+  }
 }
index 6889c8f9a7cbd9ae497e4cc9f3e8dcc5ec79c580..52bd071c57a9c8c7ddd4f05317cdf26e16425376 100644 (file)
@@ -12,1216 +12,1284 @@ import java.util.*;
  */
 public class ScheduleAnalysis {
 
-    State state;
-    TaskAnalysis taskanalysis;
-    
-    Vector<ScheduleNode> scheduleNodes;
-    Vector<ClassNode> classNodes;
-    Vector<ScheduleEdge> scheduleEdges;
-    Hashtable<ClassDescriptor, ClassNode> cd2ClassNode;
-    boolean sorted = false;
-
-    int transThreshold;
-
-    int scheduleThreshold;
-    int coreNum;
-    Vector<Vector<ScheduleNode>> scheduleGraphs;
-
-    public ScheduleAnalysis(State state, 
-                           TaskAnalysis taskanalysis) {
-       this.state = state;
-       this.taskanalysis = taskanalysis;
-       this.scheduleNodes = new Vector<ScheduleNode>();
-       this.classNodes = new Vector<ClassNode>();
-       this.scheduleEdges = new Vector<ScheduleEdge>();
-       this.cd2ClassNode = new Hashtable<ClassDescriptor, ClassNode>();
-       this.transThreshold = 45; // defaultly 45
-       this.scheduleThreshold = 1000; // defaultly 1000
-       this.coreNum = -1;
-       this.scheduleGraphs = null;
+  State state;
+  TaskAnalysis taskanalysis;
+
+  Vector<ScheduleNode> scheduleNodes;
+  Vector<ClassNode> classNodes;
+  Vector<ScheduleEdge> scheduleEdges;
+  Hashtable<ClassDescriptor, ClassNode> cd2ClassNode;
+  boolean sorted = false;
+
+  int transThreshold;
+
+  int scheduleThreshold;
+  int coreNum;
+  Vector<Vector<ScheduleNode>> scheduleGraphs;
+  
+  // Main CD table for multi-param tasks
+  Hashtable<TaskDescriptor, ClassDescriptor> td2maincd;
+
+  public ScheduleAnalysis(State state, 
+                          TaskAnalysis taskanalysis) {
+    this.state = state;
+    this.taskanalysis = taskanalysis;
+    this.scheduleNodes = new Vector<ScheduleNode>();
+    this.classNodes = new Vector<ClassNode>();
+    this.scheduleEdges = new Vector<ScheduleEdge>();
+    this.cd2ClassNode = new Hashtable<ClassDescriptor, ClassNode>();
+    this.transThreshold = 45; // defaultly 45
+    this.scheduleThreshold = 1000; // defaultly 1000
+    this.coreNum = -1;
+    this.scheduleGraphs = null;
+    this.td2maincd = null;
+  }
+
+  public void setTransThreshold(int tt) {
+    this.transThreshold = tt;
+  }
+
+  public void setScheduleThreshold(int stt) {
+    this.scheduleThreshold = stt;
+  }
+
+  public int getCoreNum() {
+    return coreNum;
+  }
+
+  public void setCoreNum(int coreNum) {
+    this.coreNum = coreNum;
+  }
+
+  public Vector<Vector<ScheduleNode>> getScheduleGraphs() {
+    return this.scheduleGraphs;
+  }
+
+  // for test
+  public Vector<ScheduleEdge> getSEdges4Test() {
+    return scheduleEdges;
+  }
+
+  public Hashtable<TaskDescriptor, ClassDescriptor> getTd2maincd() {
+    return td2maincd;
+  }
+
+  public boolean schedule(int generateThreshold,
+                          Vector<TaskDescriptor> multiparamtds) {
+    boolean tooptimize = true;
+    try {
+      Vector<ScheduleEdge> toBreakDown = new Vector<ScheduleEdge>();
+      ScheduleNode startupNode = null;
+      
+      if((multiparamtds != null) || (multiparamtds.size() > 0)) {
+        this.td2maincd = new Hashtable<TaskDescriptor, ClassDescriptor>();
+      }
+
+      // necessary preparation such as read profile info etc.
+      preSchedule();
+      // build the CFSTG
+      startupNode = buildCFSTG(toBreakDown, multiparamtds);
+      // do Tree transform
+      treeTransform(toBreakDown, startupNode);
+      // do CFSTG transform to explore the potential parallelism as much
+      // as possible
+      CFSTGTransform();
+      // mappint to real multi-core processor
+      tooptimize = coreMapping(generateThreshold);
+      toBreakDown = null;
+    } catch (Exception e) {
+      e.printStackTrace();
+      System.exit(-1);
     }
+    return tooptimize;
+  }
+
+  private void preSchedule() {
+    this.checkBackEdge();
 
-    public void setTransThreshold(int tt) {
-       this.transThreshold = tt;
+    // set up profiling data
+    if(state.USEPROFILE) {
+      java.util.Hashtable<String, TaskInfo> taskinfos = 
+        new java.util.Hashtable<String, TaskInfo>();
+      this.readProfileInfo(taskinfos);
+
+      long tint = 0;
+      Iterator it_classes = state.getClassSymbolTable().getDescriptorsIterator();
+      while(it_classes.hasNext()) {
+        ClassDescriptor cd = (ClassDescriptor) it_classes.next();
+        if(cd.hasFlags()) {
+          Vector rootnodes = this.taskanalysis.getRootNodes(cd);
+          if(rootnodes!=null) {
+            Iterator it_rootnodes = rootnodes.iterator();
+            while(it_rootnodes.hasNext()) {
+              FlagState root = (FlagState)it_rootnodes.next();
+              Vector allocatingTasks = root.getAllocatingTasks();
+              if(allocatingTasks != null) {
+                for(int k = 0; k < allocatingTasks.size(); k++) {
+                  TaskDescriptor td =
+                    (TaskDescriptor)allocatingTasks.elementAt(k);
+                  Vector<FEdge> fev = this.taskanalysis.getFEdgesFromTD(td);
+                  int numEdges = fev.size();
+                  for(int j = 0; j < numEdges; j++) {
+                    FEdge pfe = fev.elementAt(j);
+                    TaskInfo taskinfo = taskinfos.get(td.getSymbol());
+                    tint = taskinfo.m_exetime[pfe.getTaskExitIndex()];
+                    pfe.setExeTime(tint);
+                    double idouble = 
+                      taskinfo.m_probability[pfe.getTaskExitIndex()];
+                    pfe.setProbability(idouble);
+                    int newRate = 0;
+                    int tindex = pfe.getTaskExitIndex();
+                    if((taskinfo.m_newobjinfo.elementAt(tindex) != null)
+                        && (taskinfo.m_newobjinfo.elementAt(tindex).containsKey(
+                            cd.getSymbol()))) {
+                      newRate = taskinfo.m_newobjinfo.elementAt(tindex).get(
+                          cd.getSymbol());
+                    }
+                    pfe.addNewObjInfo(cd, newRate, idouble);
+                    if(taskinfo.m_byObj != -1) {
+                      ((FlagState)pfe.getSource()).setByObj(taskinfo.m_byObj);
+                    }
+                  }
+                  fev = null;
+                }
+              }
+            }
+            it_rootnodes = null;
+          }
+          Iterator it_flags = this.taskanalysis.getFlagStates(cd).iterator();
+          while(it_flags.hasNext()) {
+            FlagState fs = (FlagState)it_flags.next();
+            Iterator it_edges = fs.edges();
+            while(it_edges.hasNext()) {
+              FEdge edge = (FEdge)it_edges.next();
+              TaskInfo taskinfo = taskinfos.get(edge.getTask().getSymbol());
+              tint = taskinfo.m_exetime[edge.getTaskExitIndex()];
+              edge.setExeTime(tint);
+              double idouble = taskinfo.m_probability[edge.getTaskExitIndex()];
+              edge.setProbability(idouble);
+              if(taskinfo.m_byObj != -1) {
+                ((FlagState)edge.getSource()).setByObj(taskinfo.m_byObj);
+              }
+            }
+            it_edges = null;
+          }
+          it_flags = null;
+        }
+      }
+      taskinfos = null;
+      it_classes = null;
+    } else {
+      randomProfileSetting();
     }
-    
-    public void setScheduleThreshold(int stt) {
-       this.scheduleThreshold = stt;
+  }
+
+  private void checkBackEdge() {
+    // Indentify backedges
+    Iterator it_classes=state.getClassSymbolTable().getDescriptorsIterator();
+    while(it_classes.hasNext()) {
+      ClassDescriptor cd=(ClassDescriptor) it_classes.next();
+      if(cd.hasFlags()) {
+        Set<FlagState> fss = this.taskanalysis.getFlagStates(cd);
+        SCC scc=GraphNode.DFS.computeSCC(fss);
+        if (scc.hasCycles()) {
+          for(int i=0; i<scc.numSCC(); i++) {
+            if (scc.hasCycle(i)) {
+              Set cycleset = scc.getSCC(i);
+              Iterator it_fs = cycleset.iterator();
+              while(it_fs.hasNext()) {
+                FlagState fs = (FlagState)it_fs.next();
+                Iterator it_edges = fs.edges();
+                while(it_edges.hasNext()) {
+                  FEdge edge = (FEdge)it_edges.next();
+                  if(cycleset.contains(edge.getTarget())) {
+                    // a backedge
+                    edge.setisbackedge(true);
+                  }
+                }
+                it_edges = null;
+              }
+              it_fs = null;
+            }
+          }
+        }
+        fss = null;
+      }
     }
+    it_classes = null;
+  }
+
+  private void readProfileInfo(java.util.Hashtable<String, TaskInfo> taskinfos){
+    try {
+      // read in profile data and set
+      //FileInputStream inStream = new FileInputStream("/scratch/profile.rst");
+      FileInputStream inStream = 
+        new FileInputStream("/scratch/" + this.state.profilename);
+      byte[] b = new byte[1024 * 100];
+      int length = inStream.read(b);
+      if(length < 0) {
+        System.out.print("No content in input file: /scratch/" 
+            + this.state.profilename + "\n");
+        System.exit(-1);
+      }
+      String profiledata = new String(b, 0, length);
+
+      // profile data format:
+      //   taskname, numoftaskexits(; exetime, probability, numofnewobjtypes(, 
+      //   newobj type, num of objs)+)+
+      int inindex = profiledata.indexOf('\n');
+      while((inindex != -1) ) {
+        String inline = profiledata.substring(0, inindex);
+        profiledata = profiledata.substring(inindex + 1);
+        //System.printString(inline + "\n");
+        int tmpinindex = inline.indexOf(',');
+        if(tmpinindex == -1) {
+          break;
+        }
+        String inname = inline.substring(0, tmpinindex);
+        String inint = inline.substring(tmpinindex + 1);
+        while(inint.startsWith(" ")) {
+          inint = inint.substring(1);
+        }
+        tmpinindex = inint.indexOf(',');
+        if(tmpinindex == -1) {
+          break;
+        }
+        int numofexits = Integer.parseInt(inint.substring(0, tmpinindex));
+        TaskInfo tinfo = new TaskInfo(numofexits);
+        inint = inint.substring(tmpinindex + 1);
+        while(inint.startsWith(" ")) {
+          inint = inint.substring(1);
+        }
+        tmpinindex = inint.indexOf(';');
+        int byObj = Integer.parseInt(inint.substring(0, tmpinindex));
+        if(byObj != -1) {
+          tinfo.m_byObj = byObj;
+        }
+        inint = inint.substring(tmpinindex + 1);
+        while(inint.startsWith(" ")) {
+          inint = inint.substring(1);
+        }
+        for(int i = 0; i < numofexits; i++) {
+          String tmpinfo = null;
+          if(i < numofexits - 1) {
+            tmpinindex = inint.indexOf(';');
+            tmpinfo = inint.substring(0, tmpinindex);
+            inint = inint.substring(tmpinindex + 1);
+            while(inint.startsWith(" ")) {
+              inint = inint.substring(1);
+            }
+          } else {
+            tmpinfo = inint;
+          }
 
-    public int getCoreNum() {
-       return coreNum;
+          tmpinindex = tmpinfo.indexOf(',');
+          tinfo.m_exetime[i] = Long.parseLong(tmpinfo.substring(0, tmpinindex));
+          tmpinfo = tmpinfo.substring(tmpinindex + 1);
+          while(tmpinfo.startsWith(" ")) {
+            tmpinfo = tmpinfo.substring(1);
+          }
+          tmpinindex = tmpinfo.indexOf(',');
+          tinfo.m_probability[i] = Double.parseDouble(
+              tmpinfo.substring(0,tmpinindex));
+          tmpinfo = tmpinfo.substring(tmpinindex + 1);
+          while(tmpinfo.startsWith(" ")) {
+            tmpinfo = tmpinfo.substring(1);
+          }
+          tmpinindex = tmpinfo.indexOf(',');
+          int numofnobjs = 0;
+          if(tmpinindex == -1) {
+            numofnobjs = Integer.parseInt(tmpinfo);
+            if(numofnobjs != 0) {
+              System.err.println("Error profile data format!");
+              System.exit(-1);
+            }
+          } else {
+            tinfo.m_newobjinfo.setElementAt(new Hashtable<String,Integer>(), i);
+            numofnobjs = Integer.parseInt(tmpinfo.substring(0, tmpinindex));
+            tmpinfo = tmpinfo.substring(tmpinindex + 1);
+            while(tmpinfo.startsWith(" ")) {
+              tmpinfo = tmpinfo.substring(1);
+            }
+            for(int j = 0; j < numofnobjs; j++) {
+              tmpinindex = tmpinfo.indexOf(',');
+              String nobjtype = tmpinfo.substring(0, tmpinindex);
+              tmpinfo = tmpinfo.substring(tmpinindex + 1);
+              while(tmpinfo.startsWith(" ")) {
+                tmpinfo = tmpinfo.substring(1);
+              }
+              int objnum = 0;
+              if(j < numofnobjs - 1) {
+                tmpinindex = tmpinfo.indexOf(',');
+                objnum  = Integer.parseInt(tmpinfo.substring(0, tmpinindex));
+                tmpinfo = tmpinfo.substring(tmpinindex + 1);
+                while(tmpinfo.startsWith(" ")) {
+                  tmpinfo = tmpinfo.substring(1);
+                }
+              } else {
+                objnum = Integer.parseInt(tmpinfo);
+              }
+              tinfo.m_newobjinfo.elementAt(i).put(nobjtype, objnum);
+            }
+          }
+        }
+        taskinfos.put(inname, tinfo);
+        inindex = profiledata.indexOf('\n');
+      }
+      inStream.close();
+      inStream = null;
+      b = null;
+    } catch(Exception e) {
+      e.printStackTrace();
     }
+  }
 
-    public void setCoreNum(int coreNum) {
-       this.coreNum = coreNum;
+  // for test
+  private void randomProfileSetting() {
+    // Randomly set the newRate and probability of FEdges
+    java.util.Random r=new java.util.Random();
+    int tint = 0;
+    Iterator it_classes=state.getClassSymbolTable().getDescriptorsIterator();
+    for(; it_classes.hasNext();) {
+      ClassDescriptor cd=(ClassDescriptor) it_classes.next();
+      if(cd.hasFlags()) {
+        Vector rootnodes=this.taskanalysis.getRootNodes(cd);
+        if(rootnodes!=null) {
+          Iterator it_rootnodes=rootnodes.iterator();
+          for(; it_rootnodes.hasNext();) {
+            FlagState root=(FlagState)it_rootnodes.next();
+            Vector allocatingTasks = root.getAllocatingTasks();
+            if(allocatingTasks != null) {
+              for(int k = 0; k < allocatingTasks.size(); k++) {
+                TaskDescriptor td = (TaskDescriptor)allocatingTasks.elementAt(k);
+                Vector<FEdge> fev = 
+                  (Vector<FEdge>)this.taskanalysis.getFEdgesFromTD(td);
+                int numEdges = fev.size();
+                int total = 100;
+                for(int j = 0; j < numEdges; j++) {
+                  FEdge pfe = fev.elementAt(j);
+                  if(numEdges - j == 1) {
+                    pfe.setProbability(total);
+                  } else {
+                    if((total != 0) && (total != 1)) {
+                      do {
+                        tint = r.nextInt()%total;
+                      } while(tint <= 0);
+                    }
+                    pfe.setProbability(tint);
+                    total -= tint;
+                  }
+                  //do {
+                  //   tint = r.nextInt()%10;
+                  //  } while(tint <= 0);
+                  //int newRate = tint;
+                  //int newRate = (j+1)%2+1;
+                  int newRate = 1;
+                  String cdname = cd.getSymbol();
+                  if((cdname.equals("SeriesRunner")) ||
+                      (cdname.equals("MDRunner")) ||
+                      (cdname.equals("Stage")) ||
+                      (cdname.equals("AppDemoRunner")) ||
+                      (cdname.equals("FilterBankAtom")) ||
+                      (cdname.equals("Grid")) ||
+                      (cdname.equals("Fractal")) ||
+                      (cdname.equals("KMeans")) || 
+                      (cdname.equals("ZTransform"))) {
+                    newRate = this.coreNum;
+                  } else if(cdname.equals("SentenceParser")) {
+                    newRate = 4;
+                  }
+                  //do {
+                  //    tint = r.nextInt()%100;
+                  //   } while(tint <= 0);
+                  //   int probability = tint;
+                  int probability = 100;
+                  pfe.addNewObjInfo(cd, newRate, probability);
+                }
+                fev = null;
+              }
+            }
+          }
+          it_rootnodes = null;
+        }
+
+        Iterator it_flags = this.taskanalysis.getFlagStates(cd).iterator();
+        while(it_flags.hasNext()) {
+          FlagState fs = (FlagState)it_flags.next();
+          Iterator it_edges = fs.edges();
+          int total = 100;
+          while(it_edges.hasNext()) {
+            //do {
+            //    tint = r.nextInt()%10;
+            //   } while(tint <= 0);
+            tint = 3;
+            FEdge edge = (FEdge)it_edges.next();
+            edge.setExeTime(tint);
+            if((fs.getClassDescriptor().getSymbol().equals("MD")) 
+                && (edge.getTask().getSymbol().equals("t6"))) {
+              if(edge.isbackedge()) {
+                if(edge.getTarget().equals(edge.getSource())) {
+                  edge.setProbability(93.75);
+                } else {
+                  edge.setProbability(3.125);
+                }
+              } else {
+                edge.setProbability(3.125);
+              }
+              continue;
+            }
+            if(!it_edges.hasNext()) {
+              edge.setProbability(total);
+            } else {
+              if((total != 0) && (total != 1)) {
+                do {
+                  tint = r.nextInt()%total;
+                } while(tint <= 0);
+              }
+              edge.setProbability(tint);
+              total -= tint;
+            }
+          }
+          it_edges = null;
+        }
+        it_flags = null;
+      }
     }
+    it_classes = null;
+  }
+
+  private ScheduleNode buildCFSTG(Vector<ScheduleEdge> toBreakDown,
+                                  Vector<TaskDescriptor> multiparamtds) {
+    Hashtable<ClassDescriptor, ClassNode> cdToCNodes = 
+      new Hashtable<ClassDescriptor, ClassNode>();
+    // Build the combined flag transition diagram
+    // First, for each class create a ClassNode
+    Iterator it_classes = state.getClassSymbolTable().getDescriptorsIterator();
+    while(it_classes.hasNext()) {
+      ClassDescriptor cd = (ClassDescriptor) it_classes.next();
+      Set<FlagState> fStates = taskanalysis.getFlagStates(cd);
 
-    public Vector<Vector<ScheduleNode>> getScheduleGraphs() {
-       return this.scheduleGraphs;
+      //Sort flagState nodes inside this ClassNode
+      Vector<FlagState> sFStates = FlagState.DFS.topology(fStates, null);
+
+      Vector rootnodes  = taskanalysis.getRootNodes(cd);
+      if(((rootnodes != null) && (rootnodes.size() > 0)) 
+          || (cd.getSymbol().equals(TypeUtil.StartupClass))) {
+        ClassNode cNode = new ClassNode(cd, sFStates);
+        cNode.setSorted(true);
+        classNodes.add(cNode);
+        cd2ClassNode.put(cd, cNode);
+        cdToCNodes.put(cd, cNode);
+        cNode.calExeTime();
+      }
+      rootnodes = null;
+      fStates = null;
+      sFStates = null;
+    }
+    it_classes = null;
+
+    ScheduleNode startupNode = null;
+    // For each ClassNode create a ScheduleNode containing it
+    int i = 0;
+    for(i = 0; i < classNodes.size(); i++) {
+      ClassNode cn = classNodes.elementAt(i);
+      ScheduleNode sn = new ScheduleNode(cn, 0);
+      if(cn.getClassDescriptor().getSymbol().equals(TypeUtil.StartupClass)) {
+        startupNode = sn;
+      }
+      cn.setScheduleNode(sn);
+      scheduleNodes.add(sn);
+      try {
+        sn.calExeTime();
+      } catch (Exception e) {
+        e.printStackTrace();
+      }
     }
 
-    // for test
-    public Vector<ScheduleEdge> getSEdges4Test() {
-       return scheduleEdges;
+    // Create 'new' edges between the ScheduleNodes.
+    Vector<ClassDescriptor> singleCDs = new Vector<ClassDescriptor>();
+    for(i = 0; i < classNodes.size(); i++) {
+      ClassNode cNode = classNodes.elementAt(i);
+      ClassDescriptor cd = cNode.getClassDescriptor();
+      Vector rootnodes  = taskanalysis.getRootNodes(cd);
+      boolean isSingle = false;
+      if(rootnodes != null) {
+        for(int h = 0; h < rootnodes.size(); h++) {
+          FlagState root=(FlagState)rootnodes.elementAt(h);
+          Vector allocatingTasks = root.getAllocatingTasks();
+          if(allocatingTasks != null) {
+            for(int k = 0; k < allocatingTasks.size(); k++) {
+              TaskDescriptor td = (TaskDescriptor)allocatingTasks.elementAt(k);
+              Vector<FEdge> fev = 
+                (Vector<FEdge>)taskanalysis.getFEdgesFromTD(td);
+              int numEdges = fev.size();
+              ScheduleNode sNode = cNode.getScheduleNode();
+              for(int j = 0; j < numEdges; j++) {
+                FEdge pfe = fev.elementAt(j);
+                FEdge.NewObjInfo noi = pfe.getNewObjInfo(cd);
+                if ((noi == null) || (noi.getNewRate() == 0) 
+                    || (noi.getProbability() == 0)) {
+                  // fake creating edge, do not need to create corresponding 
+                  // 'new' edge
+                  continue;
+                }
+                if(noi.getNewRate() == 1) {
+                  isSingle = true;
+                }
+                if(noi.getRoot() == null) {
+                  // set root FlagState
+                  noi.setRoot(root);
+                }
+                FlagState pfs = (FlagState)pfe.getTarget();
+                ClassDescriptor pcd = pfs.getClassDescriptor();
+                ClassNode pcNode = cdToCNodes.get(pcd);
+
+                ScheduleEdge sEdge = new ScheduleEdge(sNode, 
+                                                      "new", 
+                                                      root, 
+                                                      ScheduleEdge.NEWEDGE, 
+                                                      0);
+                sEdge.setFEdge(pfe);
+                sEdge.setSourceCNode(pcNode);
+                sEdge.setTargetCNode(cNode);
+                sEdge.setTargetFState(root);
+                sEdge.setNewRate(noi.getNewRate());
+                sEdge.setProbability(noi.getProbability());
+                pcNode.getScheduleNode().addEdge(sEdge);
+                scheduleEdges.add(sEdge);
+                if((j !=0 ) || (k != 0) || (h != 0)) {
+                  toBreakDown.add(sEdge);
+                }
+              }
+              fev = null;
+            }
+            allocatingTasks = null;
+          }
+          if(isSingle) {
+            singleCDs.add(cd);
+          }
+        }
+        rootnodes = null;
+      }
     }
+    cdToCNodes = null;
     
-    public void schedule(int generateThreshold) {
-       try {
-           Vector<ScheduleEdge> toBreakDown = new Vector<ScheduleEdge>();
-           ScheduleNode startupNode = null;
-
-           // necessary preparation such as read profile info etc.
-           preSchedule();
-           // build the CFSTG
-           startupNode = buildCFSTG(toBreakDown);
-           // do Tree transform
-           treeTransform(toBreakDown, startupNode);
-           // do CFSTG transform to explore the potential parallelism as much
-           // as possible
-           CFSTGTransform();
-           // mappint to real multi-core processor
-           coreMapping(generateThreshold);
-           toBreakDown = null;
-       } catch (Exception e) {
-           e.printStackTrace();
-           System.exit(-1);
-       }
+    for(i = 0; i < multiparamtds.size(); i++) {
+      TaskDescriptor td = multiparamtds.elementAt(i);
+      for(int j = 0; j < td.numParameters(); j++) {
+        ClassDescriptor cd = td.getParamType(j).getClassDesc();
+        if(singleCDs.contains(cd)) {
+          // set the first parameter which has single creation rate as main cd
+          // TODO: may have bug when cd has multiple new flag states
+          this.td2maincd.put(td, cd);
+          break;
+        }
+      }
     }
 
-    private void preSchedule() {
-       this.checkBackEdge();
-
-       // set up profiling data
-       if(state.USEPROFILE) {
-           java.util.Hashtable<String, TaskInfo> taskinfos = 
-               new java.util.Hashtable<String, TaskInfo>();
-           this.readProfileInfo(taskinfos);
-
-           long tint = 0;
-           Iterator it_classes =
-               state.getClassSymbolTable().getDescriptorsIterator();
-           while(it_classes.hasNext()) {
-               ClassDescriptor cd = (ClassDescriptor) it_classes.next();
-               if(cd.hasFlags()) {
-                   Vector rootnodes = this.taskanalysis.getRootNodes(cd);
-                   if(rootnodes!=null) {
-                       Iterator it_rootnodes = rootnodes.iterator();
-                       while(it_rootnodes.hasNext()) {
-                           FlagState root = (FlagState)it_rootnodes.next();
-                           Vector allocatingTasks = root.getAllocatingTasks();
-                           if(allocatingTasks != null) {
-                               for(int k = 0; k < allocatingTasks.size(); k++) {
-                                   TaskDescriptor td = 
-                                       (TaskDescriptor)allocatingTasks.elementAt(k);
-                                   Vector<FEdge> fev = 
-                                       this.taskanalysis.getFEdgesFromTD(td);
-                                   int numEdges = fev.size();
-                                   for(int j = 0; j < numEdges; j++) {
-                                       FEdge pfe = fev.elementAt(j);
-                                       TaskInfo taskinfo = 
-                                           taskinfos.get(td.getSymbol());
-                                       tint = taskinfo.m_exetime[pfe.getTaskExitIndex()];
-                                       pfe.setExeTime(tint);
-                                       double idouble = 
-                                           taskinfo.m_probability[pfe.getTaskExitIndex()];
-                                       pfe.setProbability(idouble);
-                                       int newRate = 0;
-                                       if((taskinfo.m_newobjinfo.elementAt(pfe.getTaskExitIndex()) != null)
-                                               && (taskinfo.m_newobjinfo.elementAt(pfe.getTaskExitIndex()).containsKey(cd.getSymbol()))) {
-                                           newRate = taskinfo.m_newobjinfo.elementAt(pfe.getTaskExitIndex()).get(cd.getSymbol());
-                                       }
-                                       pfe.addNewObjInfo(cd, newRate, idouble);
-                                       if(taskinfo.m_byObj != -1) {
-                                           ((FlagState)pfe.getSource()).setByObj(taskinfo.m_byObj);
-                                       }
-                                   }
-                                   fev = null;
-                               }
-                           }
-                       }
-                       it_rootnodes = null;
-                   }
-                   Iterator it_flags = this.taskanalysis.getFlagStates(cd).iterator();
-                   while(it_flags.hasNext()) {
-                       FlagState fs = (FlagState)it_flags.next();
-                       Iterator it_edges = fs.edges();
-                       while(it_edges.hasNext()) {
-                           FEdge edge = (FEdge)it_edges.next();
-                           TaskInfo taskinfo = taskinfos.get(edge.getTask().getSymbol());
-                           tint = taskinfo.m_exetime[edge.getTaskExitIndex()];
-                           edge.setExeTime(tint);
-                           double idouble = taskinfo.m_probability[edge.getTaskExitIndex()];
-                           edge.setProbability(idouble);
-                           if(taskinfo.m_byObj != -1) {
-                               ((FlagState)edge.getSource()).setByObj(taskinfo.m_byObj);
-                           }
-                       }
-                       it_edges = null;
-                   }
-                   it_flags = null;
-               }
-           }
-           taskinfos = null;
-           it_classes = null;
-       } else {
-           randomProfileSetting();
-       }
+    return startupNode;
+  }
+
+  private void treeTransform(Vector<ScheduleEdge> toBreakDown, 
+                             ScheduleNode startupNode) {
+    int i = 0;
+
+    // Break down the 'cycle's
+    try {
+      for(i = 0; i < toBreakDown.size(); i++ ) {
+        cloneSNodeList(toBreakDown.elementAt(i), false);
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+      System.exit(-1);
     }
-    
-    private void checkBackEdge() {
-       // Indentify backedges
-       Iterator it_classes=state.getClassSymbolTable().getDescriptorsIterator();
-       while(it_classes.hasNext()) {
-           ClassDescriptor cd=(ClassDescriptor) it_classes.next();
-           if(cd.hasFlags()) {
-               Set<FlagState> fss = this.taskanalysis.getFlagStates(cd);
-               SCC scc=GraphNode.DFS.computeSCC(fss);
-               if (scc.hasCycles()) {
-                   for(int i=0; i<scc.numSCC(); i++) {
-                       if (scc.hasCycle(i)) {
-                           Set cycleset = scc.getSCC(i);
-                           Iterator it_fs = cycleset.iterator();
-                           while(it_fs.hasNext()) {
-                               FlagState fs = (FlagState)it_fs.next();
-                               Iterator it_edges = fs.edges();
-                               while(it_edges.hasNext()) {
-                                   FEdge edge = (FEdge)it_edges.next();
-                                   if(cycleset.contains(edge.getTarget())) {
-                                       // a backedge
-                                       edge.setisbackedge(true);
-                                   }
-                               }
-                               it_edges = null;
-                           }
-                           it_fs = null;
-                       }
-                   }
-               }
-               fss = null;
-           }
-       }
-       it_classes = null;
+
+    // Remove fake 'new' edges
+    for(i = 0; i < scheduleEdges.size(); i++) {
+      ScheduleEdge se = (ScheduleEdge)scheduleEdges.elementAt(i);
+      if((0 == se.getNewRate()) || (0 == se.getProbability())) {
+        scheduleEdges.removeElement(se);
+        scheduleNodes.removeElement(se.getTarget());
+      }
     }
-    
-    private void readProfileInfo(java.util.Hashtable<String, TaskInfo> taskinfos) {
-       try {
-           // read in profile data and set
-           FileInputStream inStream = new FileInputStream("/scratch/profile.rst");
-           byte[] b = new byte[1024 * 100];
-           int length = inStream.read(b);
-           if(length < 0) {
-               System.out.print("No content in input file: /scratch/profile.rst\n");
-               System.exit(-1);
-           }
-           String profiledata = new String(b, 0, length);
-           
-           // profile data format:
-           //   taskname, numoftaskexits(; exetime, probability, numofnewobjtypes(, newobj type, num of objs)+)+
-           int inindex = profiledata.indexOf('\n');
-           while((inindex != -1) ) {
-               String inline = profiledata.substring(0, inindex);
-               profiledata = profiledata.substring(inindex + 1);
-               //System.printString(inline + "\n");
-               int tmpinindex = inline.indexOf(',');
-               if(tmpinindex == -1) {
-                   break;
-               }
-               String inname = inline.substring(0, tmpinindex);
-               String inint = inline.substring(tmpinindex + 1);
-               while(inint.startsWith(" ")) {
-                   inint = inint.substring(1);
-               }
-               tmpinindex = inint.indexOf(',');
-               if(tmpinindex == -1) {
-                   break;
-               }
-               int numofexits = Integer.parseInt(inint.substring(0, tmpinindex));
-               TaskInfo tinfo = new TaskInfo(numofexits);
-               inint = inint.substring(tmpinindex + 1);
-               while(inint.startsWith(" ")) {
-                   inint = inint.substring(1);
-               }
-               tmpinindex = inint.indexOf(';');
-               int byObj = Integer.parseInt(inint.substring(0, tmpinindex));
-               if(byObj != -1) {
-                   tinfo.m_byObj = byObj;
-               }
-               inint = inint.substring(tmpinindex + 1);
-               while(inint.startsWith(" ")) {
-                   inint = inint.substring(1);
-               }
-               for(int i = 0; i < numofexits; i++) {
-                   String tmpinfo = null;
-                   if(i < numofexits - 1) {
-                       tmpinindex = inint.indexOf(';');
-                       tmpinfo = inint.substring(0, tmpinindex);
-                       inint = inint.substring(tmpinindex + 1);
-                       while(inint.startsWith(" ")) {
-                           inint = inint.substring(1);
-                       }
-                   } else {
-                       tmpinfo = inint;
-                   }
-
-                   tmpinindex = tmpinfo.indexOf(',');
-                   tinfo.m_exetime[i] = Long.parseLong(tmpinfo.substring(0, tmpinindex));
-                   tmpinfo = tmpinfo.substring(tmpinindex + 1);
-                   while(tmpinfo.startsWith(" ")) {
-                       tmpinfo = tmpinfo.substring(1);
-                   }
-                   tmpinindex = tmpinfo.indexOf(',');
-                   tinfo.m_probability[i] = Double.parseDouble(tmpinfo.substring(0, tmpinindex));
-                   tmpinfo = tmpinfo.substring(tmpinindex + 1);
-                   while(tmpinfo.startsWith(" ")) {
-                       tmpinfo = tmpinfo.substring(1);
-                   }
-                   tmpinindex = tmpinfo.indexOf(',');
-                   int numofnobjs = 0;
-                   if(tmpinindex == -1) {
-                       numofnobjs = Integer.parseInt(tmpinfo);
-                       if(numofnobjs != 0) {
-                           System.err.println("Error profile data format!");
-                           System.exit(-1);
-                       }
-                   } else {
-                       tinfo.m_newobjinfo.setElementAt(new Hashtable<String,Integer>(), i);
-                       numofnobjs = Integer.parseInt(tmpinfo.substring(0, tmpinindex));
-                       tmpinfo = tmpinfo.substring(tmpinindex + 1);
-                       while(tmpinfo.startsWith(" ")) {
-                           tmpinfo = tmpinfo.substring(1);
-                       }
-                       for(int j = 0; j < numofnobjs; j++) {
-                           tmpinindex = tmpinfo.indexOf(',');
-                           String nobjtype = tmpinfo.substring(0, tmpinindex);
-                           tmpinfo = tmpinfo.substring(tmpinindex + 1);
-                           while(tmpinfo.startsWith(" ")) {
-                               tmpinfo = tmpinfo.substring(1);
-                           }
-                           int objnum = 0;
-                           if(j < numofnobjs - 1) {
-                               tmpinindex = tmpinfo.indexOf(',');
-                               objnum  = Integer.parseInt(tmpinfo.substring(0, tmpinindex));
-                               tmpinfo = tmpinfo.substring(tmpinindex + 1);
-                               while(tmpinfo.startsWith(" ")) {
-                                   tmpinfo = tmpinfo.substring(1);
-                               }
-                           } else {
-                               objnum = Integer.parseInt(tmpinfo);
-                           }
-                           tinfo.m_newobjinfo.elementAt(i).put(nobjtype, objnum);
-                       }
-                   }
-               }
-               taskinfos.put(inname, tinfo);
-               inindex = profiledata.indexOf('\n');
-           }
-           inStream.close();
-           inStream = null;
-           b = null;
-       } catch(Exception e) {
-           e.printStackTrace();
-       }
+
+    // Do topology sort of the ClassNodes and ScheduleEdges.
+    Vector<ScheduleEdge> ssev = new Vector<ScheduleEdge>();
+    Vector<ScheduleNode> tempSNodes = 
+      ClassNode.DFS.topology(scheduleNodes, ssev);
+    scheduleNodes.removeAllElements();
+    scheduleNodes = tempSNodes;
+    tempSNodes = null;
+    scheduleEdges.removeAllElements();
+    scheduleEdges = ssev;
+    ssev = null;
+    sorted = true;
+
+    // Set the cid of these ScheduleNode
+    Queue<ScheduleNode> toVisit = new LinkedList<ScheduleNode>();
+    toVisit.add(startupNode);
+    while(!toVisit.isEmpty()) {
+      ScheduleNode sn = toVisit.poll();
+      if(sn.getCid() == -1) {
+        // not visited before
+        sn.setCid(ScheduleNode.colorID++);
+        Iterator it_edge = sn.edges();
+        while(it_edge.hasNext()) {
+          toVisit.add((ScheduleNode)((ScheduleEdge)it_edge.next()).getTarget());
+        }
+        it_edge = null;
+      }
     }
+    toVisit = null;
 
-    // for test
-    private void randomProfileSetting() {
-       // Randomly set the newRate and probability of FEdges
-       java.util.Random r=new java.util.Random();
-       int tint = 0;
-       Iterator it_classes=state.getClassSymbolTable().getDescriptorsIterator();
-       for(; it_classes.hasNext();) {
-           ClassDescriptor cd=(ClassDescriptor) it_classes.next();
-           if(cd.hasFlags()) {
-               Vector rootnodes=this.taskanalysis.getRootNodes(cd);
-               if(rootnodes!=null) {
-                   Iterator it_rootnodes=rootnodes.iterator();
-                   for(; it_rootnodes.hasNext();) {
-                       FlagState root=(FlagState)it_rootnodes.next();
-                       Vector allocatingTasks = root.getAllocatingTasks();
-                       if(allocatingTasks != null) {
-                           for(int k = 0; k < allocatingTasks.size(); k++) {
-                               TaskDescriptor td = (TaskDescriptor)allocatingTasks.elementAt(k);
-                               Vector<FEdge> fev = (Vector<FEdge>)this.taskanalysis.getFEdgesFromTD(td);
-                               int numEdges = fev.size();
-                               int total = 100;
-                               for(int j = 0; j < numEdges; j++) {
-                                   FEdge pfe = fev.elementAt(j);
-                                   if(numEdges - j == 1) {
-                                       pfe.setProbability(total);
-                                   } else {
-                                       if((total != 0) && (total != 1)) {
-                                           do {
-                                               tint = r.nextInt()%total;
-                                           } while(tint <= 0);
-                                       }
-                                       pfe.setProbability(tint);
-                                       total -= tint;
-                                   }
-                                   //do {
-                                       //   tint = r.nextInt()%10;
-                                   //  } while(tint <= 0);
-                                   //int newRate = tint;
-                                   //int newRate = (j+1)%2+1;
-                                   int newRate = 1;
-                                   String cdname = cd.getSymbol();
-                                   if((cdname.equals("SeriesRunner")) ||
-                                           (cdname.equals("MDRunner")) ||
-                                           (cdname.equals("Stage")) ||
-                                           (cdname.equals("AppDemoRunner")) ||
-                                           (cdname.equals("FilterBankAtom")) ||
-                                           (cdname.equals("Grid")) ||
-                                           (cdname.equals("Fractal"))) {
-                                       newRate = this.coreNum;
-                                   } else if(cdname.equals("SentenceParser")) {
-                                       newRate = 4;
-                                   }
-                                   //do {
-                                   //    tint = r.nextInt()%100;
-                                   //   } while(tint <= 0);
-                                   //   int probability = tint;
-                                   int probability = 100;
-                                   pfe.addNewObjInfo(cd, newRate, probability);
-                               }
-                               fev = null;
-                           }
-                       }
-                   }
-                   it_rootnodes = null;
-               }
-
-               Iterator it_flags = this.taskanalysis.getFlagStates(cd).iterator();
-               while(it_flags.hasNext()) {
-                   FlagState fs = (FlagState)it_flags.next();
-                   Iterator it_edges = fs.edges();
-                   int total = 100;
-                   while(it_edges.hasNext()) {
-                       //do {
-                       //    tint = r.nextInt()%10;
-                       //   } while(tint <= 0);
-                       tint = 3;
-                       FEdge edge = (FEdge)it_edges.next();
-                       edge.setExeTime(tint);
-                       if((fs.getClassDescriptor().getSymbol().equals("MD")) 
-                               && (edge.getTask().getSymbol().equals("t6"))) {
-                           if(edge.isbackedge()) {
-                               if(edge.getTarget().equals(edge.getSource())) {
-                                   edge.setProbability(93.75);
-                               } else {
-                                   edge.setProbability(3.125);
-                               }
-                           } else {
-                               edge.setProbability(3.125);
-                           }
-                           continue;
-                       }
-                       if(!it_edges.hasNext()) {
-                           edge.setProbability(total);
-                       } else {
-                           if((total != 0) && (total != 1)) {
-                               do {
-                                   tint = r.nextInt()%total;
-                               } while(tint <= 0);
-                           }
-                           edge.setProbability(tint);
-                           total -= tint;
-                       }
-                   }
-                   it_edges = null;
-               }
-               it_flags = null;
-           }
-       }
-       it_classes = null;
+    if(this.state.PRINTSCHEDULING) {
+      SchedulingUtil.printScheduleGraph(
+          this.state.outputdir + "scheduling_ori.dot", this.scheduleNodes);
     }
+  }
+
+  private void CFSTGTransform() {
+    // First iteration
+    int i = 0;
+    //Access the ScheduleEdges in reverse topology order
+    Hashtable<FEdge, Vector<ScheduleEdge>> fe2ses = 
+      new Hashtable<FEdge, Vector<ScheduleEdge>>();
+    Hashtable<ScheduleNode, Vector<FEdge>> sn2fes = 
+      new Hashtable<ScheduleNode, Vector<FEdge>>();
+    ScheduleNode preSNode = null;
+    for(i = scheduleEdges.size(); i > 0; i--) {
+      ScheduleEdge se = (ScheduleEdge)scheduleEdges.elementAt(i-1);
+      if(ScheduleEdge.NEWEDGE == se.getType()) {
+        if(preSNode == null) {
+          preSNode = (ScheduleNode)se.getSource();
+        }
 
-    private ScheduleNode buildCFSTG(Vector<ScheduleEdge> toBreakDown) {
-       Hashtable<ClassDescriptor, ClassNode> cdToCNodes = 
-           new Hashtable<ClassDescriptor, ClassNode>();
-       // Build the combined flag transition diagram
-       // First, for each class create a ClassNode
-       Iterator it_classes = state.getClassSymbolTable().getDescriptorsIterator();
-       while(it_classes.hasNext()) {
-           ClassDescriptor cd = (ClassDescriptor) it_classes.next();
-           Set<FlagState> fStates = taskanalysis.getFlagStates(cd);
-
-           //Sort flagState nodes inside this ClassNode
-           Vector<FlagState> sFStates = FlagState.DFS.topology(fStates, null);
-
-           Vector rootnodes  = taskanalysis.getRootNodes(cd);
-           if(((rootnodes != null) && (rootnodes.size() > 0)) 
-                   || (cd.getSymbol().equals(TypeUtil.StartupClass))) {
-               ClassNode cNode = new ClassNode(cd, sFStates);
-               cNode.setSorted(true);
-               classNodes.add(cNode);
-               cd2ClassNode.put(cd, cNode);
-               cdToCNodes.put(cd, cNode);
-               cNode.calExeTime();
-
-               // for test
-               if(cd.getSymbol().equals("C")) {
-                   cNode.setTransTime(45);
-               }
-           }
-           rootnodes = null;
-           fStates = null;
-           sFStates = null;
-       }
-       it_classes = null;
-
-       ScheduleNode startupNode = null;
-       // For each ClassNode create a ScheduleNode containing it
-       int i = 0;
-       for(i = 0; i < classNodes.size(); i++) {
-           ClassNode cn = classNodes.elementAt(i);
-           ScheduleNode sn = new ScheduleNode(cn, 0);
-           if(cn.getClassDescriptor().getSymbol().equals(TypeUtil.StartupClass)) {
-               startupNode = sn;
-           }
-           cn.setScheduleNode(sn);
-           scheduleNodes.add(sn);
-           try {
-               sn.calExeTime();
-           } catch (Exception e) {
-               e.printStackTrace();
-           }
-       }
-
-       // Create 'new' edges between the ScheduleNodes.
-       //Vector<ScheduleEdge> toBreakDown = new Vector<ScheduleEdge>();
-       for(i = 0; i < classNodes.size(); i++) {
-           ClassNode cNode = classNodes.elementAt(i);
-           ClassDescriptor cd = cNode.getClassDescriptor();
-           Vector rootnodes  = taskanalysis.getRootNodes(cd);
-           if(rootnodes != null) {
-               for(int h = 0; h < rootnodes.size(); h++) {
-                   FlagState root=(FlagState)rootnodes.elementAt(h);
-                   Vector allocatingTasks = root.getAllocatingTasks();
-                   if(allocatingTasks != null) {
-                       for(int k = 0; k < allocatingTasks.size(); k++) {
-                           TaskDescriptor td = (TaskDescriptor)allocatingTasks.elementAt(k);
-                           Vector<FEdge> fev = (Vector<FEdge>)taskanalysis.getFEdgesFromTD(td);
-                           int numEdges = fev.size();
-                           ScheduleNode sNode = cNode.getScheduleNode();
-                           for(int j = 0; j < numEdges; j++) {
-                               FEdge pfe = fev.elementAt(j);
-                               FEdge.NewObjInfo noi = pfe.getNewObjInfo(cd);
-                               if ((noi == null) || (noi.getNewRate() == 0) || (noi.getProbability() == 0)) {
-                                   // fake creating edge, do not need to create corresponding 'new' edge
-                                   continue;
-                               }
-                               if(noi.getRoot() == null) {
-                                   // set root FlagState
-                                   noi.setRoot(root);
-                               }
-                               FlagState pfs = (FlagState)pfe.getTarget();
-                               ClassDescriptor pcd = pfs.getClassDescriptor();
-                               ClassNode pcNode = cdToCNodes.get(pcd);
-
-                               ScheduleEdge sEdge = new ScheduleEdge(sNode, 
-                                                                     "new", 
-                                                                     root, 
-                                                                     ScheduleEdge.NEWEDGE, 
-                                                                     0);
-                               sEdge.setFEdge(pfe);
-                               sEdge.setSourceCNode(pcNode);
-                               sEdge.setTargetCNode(cNode);
-                               sEdge.setTargetFState(root);
-                               sEdge.setNewRate(noi.getNewRate());
-                               sEdge.setProbability(noi.getProbability());
-                               pcNode.getScheduleNode().addEdge(sEdge);
-                               scheduleEdges.add(sEdge);
-                               if((j !=0 ) || (k != 0) || (h != 0)) {
-                                   toBreakDown.add(sEdge);
-                               }
-                           }
-                           fev = null;
-                       }
-                       allocatingTasks = null;
-                   }
-               }
-               rootnodes = null;
-           }
-       }
-       cdToCNodes = null;
-       
-       return startupNode;
+        boolean split = false;
+        FEdge fe = se.getFEdge();
+        if(fe.getSource() == fe.getTarget()) {
+          // back edge
+          try {
+            int repeat = (int)Math.ceil(se.getNewRate()*se.getProbability()/100);
+            int rate = 0;
+            if(repeat > 1) {
+              for(int j = 1; j< repeat; j++ ) {
+                cloneSNodeList(se, true);
+              }
+              se.setNewRate(1);
+              se.setProbability(100);
+            }
+            try {
+              rate = (int)Math.ceil(
+                  se.getListExeTime()/calInExeTime(se.getSourceFState()));
+            } catch (Exception e) {
+              e.printStackTrace();
+            }
+            for(int j = rate - 1; j > 0; j--) {
+              for(int k = repeat; k > 0; k--) {
+                cloneSNodeList(se, true);
+              }
+            }
+          } catch (Exception e) {
+            e.printStackTrace();
+            System.exit(-1);
+          }
+        } else {
+          // if preSNode is not the same as se's source ScheduleNode
+          // handle any ScheduleEdges previously put into fe2ses whose source 
+          // ScheduleNode is preSNode
+          boolean same = (preSNode == se.getSource());
+          if(!same) {
+            // check the topology sort, only process those after se.getSource()
+            if(preSNode.getFinishingTime() < se.getSource().getFinishingTime()){
+              if(sn2fes.containsKey(preSNode)) {
+                Vector<FEdge> fes = sn2fes.remove(preSNode);
+                for(int j = 0; j < fes.size(); j++) {
+                  FEdge tempfe = fes.elementAt(j);
+                  Vector<ScheduleEdge> ses = fe2ses.get(tempfe);
+                  ScheduleEdge tempse = ses.elementAt(0);
+                  long temptime = tempse.getListExeTime();
+                  // find out the ScheduleEdge with least exeTime
+                  for(int k = 1; k < ses.size(); k++) {
+                    long ttemp = ses.elementAt(k).getListExeTime();
+                    if(ttemp < temptime) {
+                      tempse = ses.elementAt(k);
+                      temptime = ttemp;
+                    }
+                  }
+                  // handle the tempse
+                  handleScheduleEdge(tempse, true);
+                  ses.removeElement(tempse);
+                  // handle other ScheduleEdges
+                  for(int k = 0; k < ses.size(); k++) {
+                    handleScheduleEdge(ses.elementAt(k), false);
+                  }
+                  ses = null;
+                  fe2ses.remove(tempfe);
+                }
+                fes = null;
+              }
+            }
+            preSNode = (ScheduleNode)se.getSource();
+          }
+
+          // if fe is the last task inside this ClassNode, delay the expanding 
+          // and merging until we find all such 'new' edges
+          // associated with a last task inside this ClassNode
+          if(!fe.getTarget().edges().hasNext()) {
+            if(fe2ses.get(fe) == null) {
+              fe2ses.put(fe, new Vector<ScheduleEdge>());
+            }
+            if(sn2fes.get((ScheduleNode)se.getSource()) == null) {
+              sn2fes.put((ScheduleNode)se.getSource(), new Vector<FEdge>());
+            }
+            if(!fe2ses.get(fe).contains(se)) {
+              fe2ses.get(fe).add(se);
+            }
+            if(!sn2fes.get((ScheduleNode)se.getSource()).contains(fe)) {
+              sn2fes.get((ScheduleNode)se.getSource()).add(fe);
+            }
+          } else {
+            // As this is not a last task, first handle available ScheduleEdges 
+            // previously put into fe2ses
+            if((same) && (sn2fes.containsKey(preSNode))) {
+              Vector<FEdge> fes = sn2fes.remove(preSNode);
+              for(int j = 0; j < fes.size(); j++) {
+                FEdge tempfe = fes.elementAt(j);
+                Vector<ScheduleEdge> ses = fe2ses.get(tempfe);
+                ScheduleEdge tempse = ses.elementAt(0);
+                long temptime = tempse.getListExeTime();
+                // find out the ScheduleEdge with least exeTime
+                for(int k = 1; k < ses.size(); k++) {
+                  long ttemp = ses.elementAt(k).getListExeTime();
+                  if(ttemp < temptime) {
+                    tempse = ses.elementAt(k);
+                    temptime = ttemp;
+                  }
+                }
+                // handle the tempse
+                handleScheduleEdge(tempse, true);
+                ses.removeElement(tempse);
+                // handle other ScheduleEdges
+                for(int k = 0; k < ses.size(); k++) {
+                  handleScheduleEdge(ses.elementAt(k), false);
+                }
+                ses = null;
+                fe2ses.remove(tempfe);
+              }
+              fes = null;
+            }
+
+            if((!(se.getTransTime() < this.transThreshold)) 
+                && (se.getSourceCNode().getTransTime() < se.getTransTime())) {
+              split = true;
+              splitSNode(se, true);
+            } else {
+              // handle this ScheduleEdge
+              handleScheduleEdge(se, true);
+            }
+          }
+        }
+      }
     }
-    
-    private void treeTransform(Vector<ScheduleEdge> toBreakDown, 
-                             ScheduleNode startupNode) {
-       int i = 0;
-       
-       // Break down the 'cycle's
-       try {
-           for(i = 0; i < toBreakDown.size(); i++ ) {
-               cloneSNodeList(toBreakDown.elementAt(i), false);
-           }
-       } catch (Exception e) {
-           e.printStackTrace();
-           System.exit(-1);
-       }
-
-       // Remove fake 'new' edges
-       for(i = 0; i < scheduleEdges.size(); i++) {
-           ScheduleEdge se = (ScheduleEdge)scheduleEdges.elementAt(i);
-           if((0 == se.getNewRate()) || (0 == se.getProbability())) {
-               scheduleEdges.removeElement(se);
-               scheduleNodes.removeElement(se.getTarget());
-           }
-       }
-
-       // Do topology sort of the ClassNodes and ScheduleEdges.
-       Vector<ScheduleEdge> ssev = new Vector<ScheduleEdge>();
-       Vector<ScheduleNode> tempSNodes = ClassNode.DFS.topology(scheduleNodes, ssev);
-       scheduleNodes.removeAllElements();
-       scheduleNodes = tempSNodes;
-       tempSNodes = null;
-       scheduleEdges.removeAllElements();
-       scheduleEdges = ssev;
-       ssev = null;
-       sorted = true;
-
-       // Set the cid of these ScheduleNode
-       Queue<ScheduleNode> toVisit = new LinkedList<ScheduleNode>();
-       toVisit.add(startupNode);
-       while(!toVisit.isEmpty()) {
-           ScheduleNode sn = toVisit.poll();
-           if(sn.getCid() == -1) {
-               // not visited before
-               sn.setCid(ScheduleNode.colorID++);
-               Iterator it_edge = sn.edges();
-               while(it_edge.hasNext()) {
-                   toVisit.add((ScheduleNode)((ScheduleEdge)it_edge.next()).getTarget());
-               }
-               it_edge = null;
-           }
-       }
-       toVisit = null;
-
-       if(this.state.PRINTSCHEDULING) {
-           SchedulingUtil.printScheduleGraph(this.state.outputdir + "scheduling_ori.dot", 
-                                             this.scheduleNodes);
-       }
+    if(!fe2ses.isEmpty()) {
+      Set<FEdge> keys = fe2ses.keySet();
+      Iterator it_keys = keys.iterator();
+      while(it_keys.hasNext()) {
+        FEdge tempfe = (FEdge)it_keys.next();
+        Vector<ScheduleEdge> ses = fe2ses.get(tempfe);
+        ScheduleEdge tempse = ses.elementAt(0);
+        long temptime = tempse.getListExeTime();
+        // find out the ScheduleEdge with least exeTime
+        for(int k = 1; k < ses.size(); k++) {
+          long ttemp = ses.elementAt(k).getListExeTime();
+          if(ttemp < temptime) {
+            tempse = ses.elementAt(k);
+            temptime = ttemp;
+          }
+        }
+        // handle the tempse
+        handleScheduleEdge(tempse, true);
+        ses.removeElement(tempse);
+        // handle other ScheduleEdges
+        for(int k = 0; k < ses.size(); k++) {
+          handleScheduleEdge(ses.elementAt(k), false);
+        }
+        ses = null;
+      }
+      keys = null;
+      it_keys = null;
     }
+    fe2ses.clear();
+    sn2fes.clear();
+    fe2ses = null;
+    sn2fes = null;
 
-    private void CFSTGTransform() {
-       // First iteration
-       int i = 0;
-       //Access the ScheduleEdges in reverse topology order
-       Hashtable<FEdge, Vector<ScheduleEdge>> fe2ses = new Hashtable<FEdge, Vector<ScheduleEdge>>();
-       Hashtable<ScheduleNode, Vector<FEdge>> sn2fes = new Hashtable<ScheduleNode, Vector<FEdge>>();
-       ScheduleNode preSNode = null;
-       for(i = scheduleEdges.size(); i > 0; i--) {
-           ScheduleEdge se = (ScheduleEdge)scheduleEdges.elementAt(i-1);
-           if(ScheduleEdge.NEWEDGE == se.getType()) {
-               if(preSNode == null) {
-                   preSNode = (ScheduleNode)se.getSource();
-               }
-
-               boolean split = false;
-               FEdge fe = se.getFEdge();
-               if(fe.getSource() == fe.getTarget()) {
-                   // back edge
-                   try {
-                       int repeat = (int)Math.ceil(se.getNewRate() * se.getProbability() / 100);
-                       int rate = 0;
-                       if(repeat > 1) {
-                           for(int j = 1; j< repeat; j++ ) {
-                               cloneSNodeList(se, true);
-                           }
-                           se.setNewRate(1);
-                           se.setProbability(100);
-                       }
-                       try {
-                           rate = (int)Math.ceil(se.getListExeTime()/ calInExeTime(se.getSourceFState()));
-                       } catch (Exception e) {
-                           e.printStackTrace();
-                       }
-                       for(int j = rate - 1; j > 0; j--) {
-                           for(int k = repeat; k > 0; k--) {
-                               cloneSNodeList(se, true);
-                           }
-                       }
-                   } catch (Exception e) {
-                       e.printStackTrace();
-                       System.exit(-1);
-                   }
-               } else {
-                   // if preSNode is not the same as se's source ScheduleNode
-                   // handle any ScheduleEdges previously put into fe2ses whose source ScheduleNode is preSNode
-                   boolean same = (preSNode == se.getSource());
-                   if(!same) {
-                       // check the topology sort, only process those after se.getSource()
-                       if(preSNode.getFinishingTime() < se.getSource().getFinishingTime()) {
-                           if(sn2fes.containsKey(preSNode)) {
-                               Vector<FEdge> fes = sn2fes.remove(preSNode);
-                               for(int j = 0; j < fes.size(); j++) {
-                                   FEdge tempfe = fes.elementAt(j);
-                                   Vector<ScheduleEdge> ses = fe2ses.get(tempfe);
-                                   ScheduleEdge tempse = ses.elementAt(0);
-                                   long temptime = tempse.getListExeTime();
-                                   // find out the ScheduleEdge with least exeTime
-                                   for(int k = 1; k < ses.size(); k++) {
-                                       long ttemp = ses.elementAt(k).getListExeTime();
-                                       if(ttemp < temptime) {
-                                           tempse = ses.elementAt(k);
-                                           temptime = ttemp;
-                                       }
-                                   }
-                                   // handle the tempse
-                                   handleScheduleEdge(tempse, true);
-                                   ses.removeElement(tempse);
-                                   // handle other ScheduleEdges
-                                   for(int k = 0; k < ses.size(); k++) {
-                                       handleScheduleEdge(ses.elementAt(k), false);
-                                   }
-                                   ses = null;
-                                   fe2ses.remove(tempfe);
-                               }
-                               fes = null;
-                           }
-                       }
-                       preSNode = (ScheduleNode)se.getSource();
-                   }
-
-                   // if fe is the last task inside this ClassNode, delay the expanding and merging until we find all such 'new' edges
-                   // associated with a last task inside this ClassNode
-                   if(!fe.getTarget().edges().hasNext()) {
-                       if(fe2ses.get(fe) == null) {
-                           fe2ses.put(fe, new Vector<ScheduleEdge>());
-                       }
-                       if(sn2fes.get((ScheduleNode)se.getSource()) == null) {
-                           sn2fes.put((ScheduleNode)se.getSource(), new Vector<FEdge>());
-                       }
-                       if(!fe2ses.get(fe).contains(se)) {
-                           fe2ses.get(fe).add(se);
-                       }
-                       if(!sn2fes.get((ScheduleNode)se.getSource()).contains(fe)) {
-                           sn2fes.get((ScheduleNode)se.getSource()).add(fe);
-                       }
-                   } else {
-                       // As this is not a last task, first handle available ScheduleEdges previously put into fe2ses
-                       if((same) && (sn2fes.containsKey(preSNode))) {
-                           Vector<FEdge> fes = sn2fes.remove(preSNode);
-                           for(int j = 0; j < fes.size(); j++) {
-                               FEdge tempfe = fes.elementAt(j);
-                               Vector<ScheduleEdge> ses = fe2ses.get(tempfe);
-                               ScheduleEdge tempse = ses.elementAt(0);
-                               long temptime = tempse.getListExeTime();
-                               // find out the ScheduleEdge with least exeTime
-                               for(int k = 1; k < ses.size(); k++) {
-                                   long ttemp = ses.elementAt(k).getListExeTime();
-                                   if(ttemp < temptime) {
-                                       tempse = ses.elementAt(k);
-                                       temptime = ttemp;
-                                   }
-                               }
-                               // handle the tempse
-                               handleScheduleEdge(tempse, true);
-                               ses.removeElement(tempse);
-                               // handle other ScheduleEdges
-                               for(int k = 0; k < ses.size(); k++) {
-                                   handleScheduleEdge(ses.elementAt(k), false);
-                               }
-                               ses = null;
-                               fe2ses.remove(tempfe);
-                           }
-                           fes = null;
-                       }
-
-                       if((!(se.getTransTime() < this.transThreshold)) && (se.getSourceCNode().getTransTime() < se.getTransTime())) {
-                           split = true;
-                           splitSNode(se, true);
-                       } else {
-                           // handle this ScheduleEdge
-                           handleScheduleEdge(se, true);
-                       }
-                   }
-               }
-           }
-       }
-       if(!fe2ses.isEmpty()) {
-           Set<FEdge> keys = fe2ses.keySet();
-           Iterator it_keys = keys.iterator();
-           while(it_keys.hasNext()) {
-               FEdge tempfe = (FEdge)it_keys.next();
-               Vector<ScheduleEdge> ses = fe2ses.get(tempfe);
-               ScheduleEdge tempse = ses.elementAt(0);
-               long temptime = tempse.getListExeTime();
-               // find out the ScheduleEdge with least exeTime
-               for(int k = 1; k < ses.size(); k++) {
-                   long ttemp = ses.elementAt(k).getListExeTime();
-                   if(ttemp < temptime) {
-                       tempse = ses.elementAt(k);
-                       temptime = ttemp;
-                   }
-               }
-               // handle the tempse
-               handleScheduleEdge(tempse, true);
-               ses.removeElement(tempse);
-               // handle other ScheduleEdges
-               for(int k = 0; k < ses.size(); k++) {
-                   handleScheduleEdge(ses.elementAt(k), false);
-               }
-               ses = null;
-           }
-           keys = null;
-           it_keys = null;
-       }
-       fe2ses.clear();
-       sn2fes.clear();
-       fe2ses = null;
-       sn2fes = null;
-
-       if(this.state.PRINTSCHEDULING) {
-           SchedulingUtil.printScheduleGraph(this.state.outputdir + "scheduling_extend.dot", 
-                                             this.scheduleNodes);
-       }
+    if(this.state.PRINTSCHEDULING) {
+      SchedulingUtil.printScheduleGraph(
+          this.state.outputdir + "scheduling_extend.dot", this.scheduleNodes);
     }
+  }
 
-    private void handleScheduleEdge(ScheduleEdge se, 
-                                   boolean merge) {
-       try {
-           int rate = 0;
-           int repeat = (int)Math.ceil(se.getNewRate() * se.getProbability() / 100);
-           if(merge) {
-               try {
-                   if(se.getListExeTime() == 0) {
-                       rate = repeat;
-                   } else {
-                       rate = (int)Math.ceil((se.getTransTime() - calInExeTime(se.getSourceFState()))/ se.getListExeTime());
-                   }
-                   if(rate < 0 ) {
-                       rate = 0;
-                   }
-               } catch (Exception e) {
-                   e.printStackTrace();
-               }
-               if(0 == rate) {
-                   // clone the whole ScheduleNode lists starting with se's target
-                   for(int j = 1; j < repeat; j++ ) {
-                       cloneSNodeList(se, true);
-                   }
-                   se.setNewRate(1);
-                   se.setProbability(100);
-               } else {
-                   repeat -= rate;
-                   if(repeat > 0) {
-                       // clone the whole ScheduleNode lists starting with se's target
-                       for(int j = 0; j < repeat; j++ ) {
-                           cloneSNodeList(se, true);
-                       }
-                       se.setNewRate(rate);
-                       se.setProbability(100);
-                   }
-               }
-               // merge the original ScheduleNode to the source ScheduleNode
-               ((ScheduleNode)se.getSource()).mergeSEdge(se);
-               scheduleNodes.remove(se.getTarget());
-               scheduleEdges.remove(se);
-               // As se has been changed into an internal edge inside a ScheduleNode,
-               // change the source and target of se from original ScheduleNodes 
-               // into ClassNodes.
-               if(se.getType() == ScheduleEdge.NEWEDGE) {
-                   se.setTarget(se.getTargetCNode());
-                   //se.setSource(se.getSourceCNode());
-                   //se.getTargetCNode().addEdge(se);
-                   se.getSourceCNode().addEdge(se);
-               }
-           } else {
-               // clone the whole ScheduleNode lists starting with se's target
-               for(int j = 1; j < repeat; j++ ) {
-                   cloneSNodeList(se, true);
-               }
-               se.setNewRate(1);
-               se.setProbability(100);
-           }
-       } catch (Exception e) {
-           e.printStackTrace();
-           System.exit(-1);
-       }
+  private void handleScheduleEdge(ScheduleEdge se, 
+      boolean merge) {
+    try {
+      int rate = 0;
+      int repeat = (int)Math.ceil(se.getNewRate() * se.getProbability() / 100);
+      if(merge) {
+        try {
+          if(se.getListExeTime() == 0) {
+            rate = repeat;
+          } else {
+            rate = (int)Math.ceil(
+                (se.getTransTime()-calInExeTime(se.getSourceFState()))
+                /se.getListExeTime());
+          }
+          if(rate < 0 ) {
+            rate = 0;
+          }
+        } catch (Exception e) {
+          e.printStackTrace();
+        }
+        if(0 == rate) {
+          // clone the whole ScheduleNode lists starting with se's target
+          for(int j = 1; j < repeat; j++ ) {
+            cloneSNodeList(se, true);
+          }
+          se.setNewRate(1);
+          se.setProbability(100);
+        } else {
+          repeat -= rate;
+          if(repeat > 0) {
+            // clone the whole ScheduleNode lists starting with se's target
+            for(int j = 0; j < repeat; j++ ) {
+              cloneSNodeList(se, true);
+            }
+            se.setNewRate(rate);
+            se.setProbability(100);
+          }
+        }
+        // merge the original ScheduleNode to the source ScheduleNode
+        ((ScheduleNode)se.getSource()).mergeSEdge(se);
+        scheduleNodes.remove(se.getTarget());
+        scheduleEdges.remove(se);
+        // As se has been changed into an internal edge inside a ScheduleNode,
+        // change the source and target of se from original ScheduleNodes 
+        // into ClassNodes.
+        if(se.getType() == ScheduleEdge.NEWEDGE) {
+          se.setTarget(se.getTargetCNode());
+          //se.setSource(se.getSourceCNode());
+          //se.getTargetCNode().addEdge(se);
+          se.getSourceCNode().addEdge(se);
+        }
+      } else {
+        // clone the whole ScheduleNode lists starting with se's target
+        for(int j = 1; j < repeat; j++ ) {
+          cloneSNodeList(se, true);
+        }
+        se.setNewRate(1);
+        se.setProbability(100);
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+      System.exit(-1);
     }
+  }
+
+  private void cloneSNodeList(ScheduleEdge sEdge, 
+      boolean copyIE) throws Exception {
+    Hashtable<ClassNode, ClassNode> cn2cn = 
+      new Hashtable<ClassNode, ClassNode>(); // hashtable from classnode in 
+                                             // orignal se's targe to cloned one
+    ScheduleNode csNode = 
+      (ScheduleNode)((ScheduleNode)sEdge.getTarget()).clone(cn2cn, 0);
+    scheduleNodes.add(csNode);
+
+    // Clone all the external in ScheduleEdges
+    int i;
+    if(copyIE) {
+      Vector inedges = sEdge.getTarget().getInedgeVector();
+      for(i = 0; i < inedges.size(); i++) {
+        ScheduleEdge tse = (ScheduleEdge)inedges.elementAt(i);
+        ScheduleEdge se;
+        switch(tse.getType()) {
+        case ScheduleEdge.NEWEDGE: {
+          se = new ScheduleEdge(csNode,"new",tse.getFstate(),tse.getType(),0);
+          se.setProbability(100);
+          se.setNewRate(1);
+          break;
+        }
 
-    private void cloneSNodeList(ScheduleEdge sEdge, 
-                               boolean copyIE) throws Exception {
-       Hashtable<ClassNode, ClassNode> cn2cn = new Hashtable<ClassNode, ClassNode>();     // hashtable from classnode in orignal se's targe to cloned one
-       ScheduleNode csNode = (ScheduleNode)((ScheduleNode)sEdge.getTarget()).clone(cn2cn, 0);
-       scheduleNodes.add(csNode);
-
-       // Clone all the external in ScheduleEdges
-       int i;
-       if(copyIE) {
-           Vector inedges = sEdge.getTarget().getInedgeVector();
-           for(i = 0; i < inedges.size(); i++) {
-               ScheduleEdge tse = (ScheduleEdge)inedges.elementAt(i);
-               ScheduleEdge se;
-               switch(tse.getType()) {
-               case ScheduleEdge.NEWEDGE: {
-                   se = new ScheduleEdge(csNode, "new", tse.getFstate(), tse.getType(), 0);
-                   se.setProbability(100);
-                   se.setNewRate(1);
-                   break;
-               }
-
-               case ScheduleEdge.TRANSEDGE: {
-                   se = new ScheduleEdge(csNode, "transmit", tse.getFstate(), tse.getType(), 0);
-                   se.setProbability(tse.getProbability());
-                   se.setNewRate(tse.getNewRate());
-                   break;
-               }
-
-               default: {
-                   throw new Exception("Error: not valid ScheduleEdge here");
-               }
-               }
-               se.setSourceCNode(tse.getSourceCNode());
-               se.setTargetCNode(cn2cn.get(tse.getTargetCNode()));
-               se.setFEdge(tse.getFEdge());
-               se.setTargetFState(tse.getTargetFState());
-               se.setIsclone(true);
-               tse.getSource().addEdge(se);
-               scheduleEdges.add(se);
-           }
-           inedges = null;
-       } else {
-           sEdge.getTarget().removeInedge(sEdge);
-           sEdge.setTarget(csNode);
-           csNode.getInedgeVector().add(sEdge);
-           sEdge.setTargetCNode(cn2cn.get(sEdge.getTargetCNode()));
-           sEdge.setIsclone(true);
-       }
-
-       Queue<ScheduleNode> toClone = new LinkedList<ScheduleNode>();     // all nodes to be cloned
-       Queue<ScheduleNode> clone = new LinkedList<ScheduleNode>();     //clone nodes
-       Queue<Hashtable> qcn2cn = new LinkedList<Hashtable>();     // queue of the mappings of classnodes inside cloned ScheduleNode
-       Vector<ScheduleNode> origins = new Vector<ScheduleNode>();     // queue of source ScheduleNode cloned
-       Hashtable<ScheduleNode, ScheduleNode> sn2sn = new Hashtable<ScheduleNode, ScheduleNode>();     // mapping from cloned ScheduleNode to clone ScheduleNode
-       clone.add(csNode);
-       toClone.add((ScheduleNode)sEdge.getTarget());
-       origins.addElement((ScheduleNode)sEdge.getTarget());
-       sn2sn.put((ScheduleNode)sEdge.getTarget(), csNode);
-       qcn2cn.add(cn2cn);
-       while(!toClone.isEmpty()) {
-           Hashtable<ClassNode, ClassNode> tocn2cn = new Hashtable<ClassNode, ClassNode>();
-           csNode = clone.poll();
-           ScheduleNode osNode = toClone.poll();
-           cn2cn = qcn2cn.poll();
-           // Clone all the external ScheduleEdges and the following ScheduleNodes
-           Vector edges = osNode.getEdgeVector();
-           for(i = 0; i < edges.size(); i++) {
-               ScheduleEdge tse = (ScheduleEdge)edges.elementAt(i);
-               ScheduleNode tSNode = (ScheduleNode)((ScheduleNode)tse.getTarget()).clone(tocn2cn, 0);
-               scheduleNodes.add(tSNode);
-               clone.add(tSNode);
-               toClone.add((ScheduleNode)tse.getTarget());
-               origins.addElement((ScheduleNode)tse.getTarget());
-               sn2sn.put((ScheduleNode)tse.getTarget(), tSNode);
-               qcn2cn.add(tocn2cn);
-               ScheduleEdge se = null;
-               switch(tse.getType()) {
-               case ScheduleEdge.NEWEDGE: {
-                   se = new ScheduleEdge(tSNode, "new", tse.getFstate(), tse.getType(), 0);
-                   break;
-               }
-
-               case ScheduleEdge.TRANSEDGE: {
-                   se = new ScheduleEdge(tSNode, "transmit", tse.getFstate(), tse.getType(), 0);
-                   break;
-               }
-
-               default: {
-                   throw new Exception("Error: not valid ScheduleEdge here");
-               }
-               }
-               se.setSourceCNode(cn2cn.get(tse.getSourceCNode()));
-               se.setTargetCNode(tocn2cn.get(tse.getTargetCNode()));
-               se.setFEdge(tse.getFEdge());
-               se.setTargetFState(tse.getTargetFState());
-               se.setProbability(tse.getProbability());
-               se.setNewRate(tse.getNewRate());
-               se.setIsclone(true);
-               csNode.addEdge(se);
-               scheduleEdges.add(se);
-           }
-           tocn2cn = null;
-           edges = null;
-       }
-
-       toClone = null;
-       clone = null;
-       qcn2cn = null;
-       cn2cn.clear();
-       cn2cn = null;
-       origins = null;
-       sn2sn = null;
+        case ScheduleEdge.TRANSEDGE: {
+          se = new ScheduleEdge(csNode,"transmit",tse.getFstate(),tse.getType(),0);
+          se.setProbability(tse.getProbability());
+          se.setNewRate(tse.getNewRate());
+          break;
+        }
+
+        default: {
+          throw new Exception("Error: not valid ScheduleEdge here");
+        }
+        }
+        se.setSourceCNode(tse.getSourceCNode());
+        se.setTargetCNode(cn2cn.get(tse.getTargetCNode()));
+        se.setFEdge(tse.getFEdge());
+        se.setTargetFState(tse.getTargetFState());
+        se.setIsclone(true);
+        tse.getSource().addEdge(se);
+        scheduleEdges.add(se);
+      }
+      inedges = null;
+    } else {
+      sEdge.getTarget().removeInedge(sEdge);
+      sEdge.setTarget(csNode);
+      csNode.getInedgeVector().add(sEdge);
+      sEdge.setTargetCNode(cn2cn.get(sEdge.getTargetCNode()));
+      sEdge.setIsclone(true);
     }
 
-    private long calInExeTime(FlagState fs) throws Exception {
-       long exeTime = 0;
-       ClassDescriptor cd = fs.getClassDescriptor();
-       ClassNode cNode = cd2ClassNode.get(cd);
-       exeTime = cNode.getFlagStates().elementAt(0).getExeTime() - fs.getExeTime();
-       while(true) {
-           Vector inedges = cNode.getInedgeVector();
-           // Now that there are associate ScheduleEdges, there may be multiple inedges of a ClassNode
-           if(inedges.size() > 1) {
-               throw new Exception("Error: ClassNode's inedges more than one!");
-           }
-           if(inedges.size() > 0) {
-               ScheduleEdge sEdge = (ScheduleEdge)inedges.elementAt(0);
-               cNode = (ClassNode)sEdge.getSource();
-               exeTime += cNode.getFlagStates().elementAt(0).getExeTime();
-           } else {
-               break;
-           }
-           inedges = null;
-       }
-       exeTime = cNode.getScheduleNode().getExeTime() - exeTime;
-       return exeTime;
+    Queue<ScheduleNode> toClone = new LinkedList<ScheduleNode>(); // all nodes to be cloned
+    Queue<ScheduleNode> clone = new LinkedList<ScheduleNode>();  //clone nodes
+    Queue<Hashtable> qcn2cn = new LinkedList<Hashtable>(); // queue of the mappings of classnodes inside cloned ScheduleNode
+    Vector<ScheduleNode> origins = new Vector<ScheduleNode>();  // queue of source ScheduleNode cloned
+    Hashtable<ScheduleNode, ScheduleNode> sn2sn = 
+      new Hashtable<ScheduleNode, ScheduleNode>(); // mapping from cloned ScheduleNode to clone ScheduleNode
+    clone.add(csNode);
+    toClone.add((ScheduleNode)sEdge.getTarget());
+    origins.addElement((ScheduleNode)sEdge.getTarget());
+    sn2sn.put((ScheduleNode)sEdge.getTarget(), csNode);
+    qcn2cn.add(cn2cn);
+    while(!toClone.isEmpty()) {
+      Hashtable<ClassNode, ClassNode> tocn2cn = 
+        new Hashtable<ClassNode, ClassNode>();
+      csNode = clone.poll();
+      ScheduleNode osNode = toClone.poll();
+      cn2cn = qcn2cn.poll();
+      // Clone all the external ScheduleEdges and the following ScheduleNodes
+      Vector edges = osNode.getEdgeVector();
+      for(i = 0; i < edges.size(); i++) {
+        ScheduleEdge tse = (ScheduleEdge)edges.elementAt(i);
+        ScheduleNode tSNode = 
+          (ScheduleNode)((ScheduleNode)tse.getTarget()).clone(tocn2cn, 0);
+        scheduleNodes.add(tSNode);
+        clone.add(tSNode);
+        toClone.add((ScheduleNode)tse.getTarget());
+        origins.addElement((ScheduleNode)tse.getTarget());
+        sn2sn.put((ScheduleNode)tse.getTarget(), tSNode);
+        qcn2cn.add(tocn2cn);
+        ScheduleEdge se = null;
+        switch(tse.getType()) {
+        case ScheduleEdge.NEWEDGE: {
+          se = new ScheduleEdge(tSNode,"new",tse.getFstate(),tse.getType(),0);
+          break;
+        }
+
+        case ScheduleEdge.TRANSEDGE: {
+          se = new ScheduleEdge(tSNode,"transmit",tse.getFstate(),tse.getType(),0);
+          break;
+        }
+
+        default: {
+          throw new Exception("Error: not valid ScheduleEdge here");
+        }
+        }
+        se.setSourceCNode(cn2cn.get(tse.getSourceCNode()));
+        se.setTargetCNode(tocn2cn.get(tse.getTargetCNode()));
+        se.setFEdge(tse.getFEdge());
+        se.setTargetFState(tse.getTargetFState());
+        se.setProbability(tse.getProbability());
+        se.setNewRate(tse.getNewRate());
+        se.setIsclone(true);
+        csNode.addEdge(se);
+        scheduleEdges.add(se);
+      }
+      tocn2cn = null;
+      edges = null;
     }
 
-    private ScheduleNode splitSNode(ScheduleEdge se, 
-                                   boolean copy) {
-       assert(ScheduleEdge.NEWEDGE == se.getType());
-
-       FEdge fe = se.getFEdge();
-       FlagState fs = (FlagState)fe.getTarget();
-       FlagState nfs = (FlagState)fs.clone();
-       fs.getEdgeVector().removeAllElements();
-       nfs.getInedgeVector().removeAllElements();
-       ClassNode sCNode = se.getSourceCNode();
-
-       // split the subtree whose root is nfs from the whole flag transition tree
-       Vector<FlagState> sfss = sCNode.getFlagStates();
-       Vector<FlagState> fStates = new Vector<FlagState>();
-       Queue<FlagState> toiterate = new LinkedList<FlagState>();
-       toiterate.add(nfs);
-       fStates.add(nfs);
-       while(!toiterate.isEmpty()) {
-           FlagState tfs = toiterate.poll();
-           Iterator it_edges = tfs.edges();
-           while(it_edges.hasNext()) {
-               FlagState temp = (FlagState)((FEdge)it_edges.next()).getTarget();
-               if(!fStates.contains(temp)) {
-                   fStates.add(temp);
-                   toiterate.add(temp);
-                   sfss.removeElement(temp);
-               }
-           }
-           it_edges = null;
-       }
-       sfss = null;
-       Vector<FlagState> sFStates = FlagState.DFS.topology(fStates, null);
-       fStates = null;
-       // create a ClassNode and ScheduleNode for this subtree
-       ClassNode cNode = new ClassNode(sCNode.getClassDescriptor(), sFStates);
-       ScheduleNode sNode = new ScheduleNode(cNode, 0);
-       cNode.setScheduleNode(sNode);
-       cNode.setSorted(true);
-       cNode.setTransTime(sCNode.getTransTime());
-       classNodes.add(cNode);
-       scheduleNodes.add(sNode);
-       try {
-           sNode.calExeTime();
-       } catch (Exception e) {
-           e.printStackTrace();
-       }
-       // flush the exeTime of fs and its ancestors
-       fs.setExeTime(0);
-       toiterate.add(fs);
-       while(!toiterate.isEmpty()) {
-           FlagState tfs = toiterate.poll();
-           long ttime = tfs.getExeTime();
-           Iterator it_inedges = tfs.inedges();
-           while(it_inedges.hasNext()) {
-               FEdge fEdge = (FEdge)it_inedges.next();
-               FlagState temp = (FlagState)fEdge.getSource();
-               long time = fEdge.getExeTime() + ttime;
-               if(temp.getExeTime() > time) {
-                   temp.setExeTime(time);
-                   toiterate.add(temp);
-               }
-           }
-           it_inedges = null;
-       }
-       toiterate = null;
-
-       // create a 'trans' ScheudleEdge between this new ScheduleNode and se's source ScheduleNode
-       ScheduleEdge sEdge = new ScheduleEdge(sNode, "transmit", fs, ScheduleEdge.TRANSEDGE, 0);   //new ScheduleEdge(sNode, "transmit", cNode.getClassDescriptor(), false, 0);
-       sEdge.setFEdge(fe);
-       sEdge.setSourceCNode(sCNode);
-       sEdge.setTargetCNode(cNode);
-       sEdge.setTargetFState(nfs);
-       // TODO
-       // Add calculation codes for calculating transmit time of an object
-       sEdge.setTransTime(cNode.getTransTime());
-       se.getSource().addEdge(sEdge);
-       scheduleEdges.add(sEdge);
-       // remove the ClassNodes and internal ScheduleEdges out of this subtree 
-       // to the new ScheduleNode
-       ScheduleNode oldSNode = (ScheduleNode)se.getSource();
-       Iterator it_isEdges = oldSNode.getScheduleEdgesIterator();
-       Vector<ScheduleEdge> toremove = new Vector<ScheduleEdge>();
-       Vector<ClassNode> rCNodes = new Vector<ClassNode>();
-       rCNodes.addElement(sCNode);
-       if(it_isEdges != null) {
-           while(it_isEdges.hasNext()) {
-               ScheduleEdge tse = (ScheduleEdge)it_isEdges.next();
-               if(rCNodes.contains(tse.getSourceCNode())) {
-                   if(sCNode.equals(tse.getSourceCNode())) {
-                       if (!(tse.getSourceFState().equals(fs)) 
-                               && (sFStates.contains(tse.getSourceFState()))) {
-                           tse.setSource(cNode);
-                           tse.setSourceCNode(cNode);
-                       } else {
-                           continue;
-                       }
-                   }
-                   sNode.getScheduleEdges().addElement(tse);
-                   sNode.getClassNodes().addElement(tse.getTargetCNode());
-                   rCNodes.addElement(tse.getTargetCNode());
-                   oldSNode.getClassNodes().removeElement(tse.getTargetCNode());
-                   toremove.addElement(tse);
-               }
-           }
-       }
-       it_isEdges = null;
-       oldSNode.getScheduleEdges().removeAll(toremove);
-       toremove.clear();
-       // redirect ScheudleEdges out of this subtree to the new ScheduleNode
-       Iterator it_sEdges = se.getSource().edges();
-       while(it_sEdges.hasNext()) {
-           ScheduleEdge tse = (ScheduleEdge)it_sEdges.next();
-           if(!(tse.equals(se)) && !(tse.equals(sEdge)) 
-                   && (tse.getSourceCNode().equals(sCNode))) {
-               if(!(tse.getSourceFState().equals(fs)) 
-                       && (sFStates.contains(tse.getSourceFState()))) {
-                   tse.setSource(sNode);
-                   tse.setSourceCNode(cNode);
-                   sNode.getEdgeVector().addElement(tse);
-                   toremove.add(tse);
-               }
-           }
-       }
-       it_sEdges = null;
-       se.getSource().getEdgeVector().removeAll(toremove);
-       toremove = null;
-       rCNodes = null;
-       sFStates = null;
-
-       try {
-           if(!copy) {
-               //merge se into its source ScheduleNode
-               sNode.setCid(((ScheduleNode)se.getSource()).getCid());
-               ((ScheduleNode)se.getSource()).mergeSEdge(se);
-               scheduleNodes.remove(se.getTarget());
-               scheduleEdges.removeElement(se);
-               // As se has been changed into an internal edge inside a ScheduleNode,
-               // change the source and target of se from original ScheduleNodes 
-               // into ClassNodes.
-               if(se.getType() == ScheduleEdge.NEWEDGE) {
-                   se.setTarget(se.getTargetCNode());
-                   //se.setSource(se.getSourceCNode());
-                   //se.getTargetCNode().addEdge(se);
-                   se.getSourceCNode().addEdge(se);
-               }
-           } else {
-               sNode.setCid(ScheduleNode.colorID++);
-               handleScheduleEdge(se, true);
-           }
-       } catch (Exception e) {
-           e.printStackTrace();
-           System.exit(-1);
-       }
-
-       return sNode;
+    toClone = null;
+    clone = null;
+    qcn2cn = null;
+    cn2cn.clear();
+    cn2cn = null;
+    origins = null;
+    sn2sn = null;
+  }
+
+  private long calInExeTime(FlagState fs) throws Exception {
+    long exeTime = 0;
+    ClassDescriptor cd = fs.getClassDescriptor();
+    ClassNode cNode = cd2ClassNode.get(cd);
+    exeTime = cNode.getFlagStates().elementAt(0).getExeTime() - fs.getExeTime();
+    while(true) {
+      Vector inedges = cNode.getInedgeVector();
+      // Now that there are associate ScheduleEdges, there may be 
+      // multiple inedges of a ClassNode
+      if(inedges.size() > 1) {
+        throw new Exception("Error: ClassNode's inedges more than one!");
+      }
+      if(inedges.size() > 0) {
+        ScheduleEdge sEdge = (ScheduleEdge)inedges.elementAt(0);
+        cNode = (ClassNode)sEdge.getSource();
+        exeTime += cNode.getFlagStates().elementAt(0).getExeTime();
+      } else {
+        break;
+      }
+      inedges = null;
     }
+    exeTime = cNode.getScheduleNode().getExeTime() - exeTime;
+    return exeTime;
+  }
 
-    // TODO: restrict the number of generated scheduling according to the setted
-    // scheduleThreshold
-    private void coreMapping(int generateThreshold) throws Exception {
-       if(this.coreNum == -1) {
-           throw new Exception("Error: un-initialized coreNum when doing scheduling.");
-       }
-
-       if(this.scheduleGraphs == null) {
-           this.scheduleGraphs = new Vector<Vector<ScheduleNode>>();
-       }
-
-       int reduceNum = this.scheduleNodes.size() - this.coreNum;
-
-       // Combine some ScheduleNode if necessary
-       // May generate multiple graphs suggesting candidate schedulings
-       if(!(reduceNum > 0)) {
-           // Enough cores, no need to combine any ScheduleNode
-           this.scheduleGraphs.addElement(this.scheduleNodes);
-           int gid = 1;
-           if(this.state.PRINTSCHEDULING) {
-               String path = this.state.outputdir + "scheduling_" + gid + ".dot";
-               SchedulingUtil.printScheduleGraph(path, this.scheduleNodes);
-           }
-       } else {
-           SchedulingUtil.assignCids(this.scheduleNodes);
-           
-           // Go through all the Schedule Nodes, organize them in order of their cid
-           Vector<Vector<ScheduleNode>> sNodeVecs = 
-               SchedulingUtil.rangeScheduleNodes(this.scheduleNodes);
-
-           CombinationUtil.RootsGenerator rGen = 
-               CombinationUtil.allocateRootsGenerator(sNodeVecs, 
-                                                      this.coreNum);
-
-           int gid = 1;
-           Random rand = new Random();
-           while((gid <= this.scheduleThreshold) && (rGen.nextGen())) {
-               // first get the chosen rootNodes
-               Vector<Vector<ScheduleNode>> rootNodes = rGen.getRootNodes();
-               Vector<Vector<ScheduleNode>> nodes2combine = rGen.getNode2Combine();
-
-               CombinationUtil.CombineGenerator cGen = 
-                   CombinationUtil.allocateCombineGenerator(rootNodes, 
-                                                            nodes2combine);
-               while((gid <= this.scheduleThreshold) && cGen.nextGen()) {      
-                   if(Math.abs(rand.nextInt()) % 100 > generateThreshold) {
-                       Vector<Vector<CombinationUtil.Combine>> combine = cGen.getCombine();
-                       Vector<ScheduleNode> sNodes = 
-                           SchedulingUtil.generateScheduleGraph(this.state,
-                                                                this.scheduleNodes,
-                                                                this.scheduleEdges,
-                                                                rootNodes, 
-                                                                combine, 
-                                                                gid++);
-                       this.scheduleGraphs.add(sNodes);
-                       combine = null;
-                       sNodes = null;
-                   }
-               }
-               cGen.clear();
-               rootNodes = null;
-               nodes2combine = null;
-           }
-           rGen.clear();
-           sNodeVecs = null;
-       }
+  private ScheduleNode splitSNode(ScheduleEdge se, 
+      boolean copy) {
+    assert(ScheduleEdge.NEWEDGE == se.getType());
+
+    FEdge fe = se.getFEdge();
+    FlagState fs = (FlagState)fe.getTarget();
+    FlagState nfs = (FlagState)fs.clone();
+    fs.getEdgeVector().removeAllElements();
+    nfs.getInedgeVector().removeAllElements();
+    ClassNode sCNode = se.getSourceCNode();
+
+    // split the subtree whose root is nfs from the whole flag transition tree
+    Vector<FlagState> sfss = sCNode.getFlagStates();
+    Vector<FlagState> fStates = new Vector<FlagState>();
+    Queue<FlagState> toiterate = new LinkedList<FlagState>();
+    toiterate.add(nfs);
+    fStates.add(nfs);
+    while(!toiterate.isEmpty()) {
+      FlagState tfs = toiterate.poll();
+      Iterator it_edges = tfs.edges();
+      while(it_edges.hasNext()) {
+        FlagState temp = (FlagState)((FEdge)it_edges.next()).getTarget();
+        if(!fStates.contains(temp)) {
+          fStates.add(temp);
+          toiterate.add(temp);
+          sfss.removeElement(temp);
+        }
+      }
+      it_edges = null;
     }
-    
-    static class TaskInfo {
-       public int m_numofexits;
-       public long[] m_exetime;
-       public double[] m_probability;
-       public Vector<Hashtable<String, Integer>> m_newobjinfo;
-       public int m_byObj;
-
-       public TaskInfo(int numofexits) {
-           this.m_numofexits = numofexits;
-           this.m_exetime = new long[this.m_numofexits];
-           this.m_probability = new double[this.m_numofexits];
-           this.m_newobjinfo = new Vector<Hashtable<String, Integer>>();
-           for(int i = 0; i < this.m_numofexits; i++) {
-               this.m_newobjinfo.add(null);
-           }
-           this.m_byObj = -1;
-       }
+    sfss = null;
+    Vector<FlagState> sFStates = FlagState.DFS.topology(fStates, null);
+    fStates = null;
+    // create a ClassNode and ScheduleNode for this subtree
+    ClassNode cNode = new ClassNode(sCNode.getClassDescriptor(), sFStates);
+    ScheduleNode sNode = new ScheduleNode(cNode, 0);
+    cNode.setScheduleNode(sNode);
+    cNode.setSorted(true);
+    cNode.setTransTime(sCNode.getTransTime());
+    classNodes.add(cNode);
+    scheduleNodes.add(sNode);
+    try {
+      sNode.calExeTime();
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+    // flush the exeTime of fs and its ancestors
+    fs.setExeTime(0);
+    toiterate.add(fs);
+    while(!toiterate.isEmpty()) {
+      FlagState tfs = toiterate.poll();
+      long ttime = tfs.getExeTime();
+      Iterator it_inedges = tfs.inedges();
+      while(it_inedges.hasNext()) {
+        FEdge fEdge = (FEdge)it_inedges.next();
+        FlagState temp = (FlagState)fEdge.getSource();
+        long time = fEdge.getExeTime() + ttime;
+        if(temp.getExeTime() > time) {
+          temp.setExeTime(time);
+          toiterate.add(temp);
+        }
+      }
+      it_inedges = null;
+    }
+    toiterate = null;
+
+    // create a 'trans' ScheudleEdge between this new ScheduleNode and se's 
+    // source ScheduleNode
+    ScheduleEdge sEdge = 
+      new ScheduleEdge(sNode, "transmit", fs, ScheduleEdge.TRANSEDGE, 0);
+    sEdge.setFEdge(fe);
+    sEdge.setSourceCNode(sCNode);
+    sEdge.setTargetCNode(cNode);
+    sEdge.setTargetFState(nfs);
+    // TODO
+    // Add calculation codes for calculating transmit time of an object
+    sEdge.setTransTime(cNode.getTransTime());
+    se.getSource().addEdge(sEdge);
+    scheduleEdges.add(sEdge);
+    // remove the ClassNodes and internal ScheduleEdges out of this subtree 
+    // to the new ScheduleNode
+    ScheduleNode oldSNode = (ScheduleNode)se.getSource();
+    Iterator it_isEdges = oldSNode.getScheduleEdgesIterator();
+    Vector<ScheduleEdge> toremove = new Vector<ScheduleEdge>();
+    Vector<ClassNode> rCNodes = new Vector<ClassNode>();
+    rCNodes.addElement(sCNode);
+    if(it_isEdges != null) {
+      while(it_isEdges.hasNext()) {
+        ScheduleEdge tse = (ScheduleEdge)it_isEdges.next();
+        if(rCNodes.contains(tse.getSourceCNode())) {
+          if(sCNode.equals(tse.getSourceCNode())) {
+            if (!(tse.getSourceFState().equals(fs)) 
+                && (sFStates.contains(tse.getSourceFState()))) {
+              tse.setSource(cNode);
+              tse.setSourceCNode(cNode);
+            } else {
+              continue;
+            }
+          }
+          sNode.getScheduleEdges().addElement(tse);
+          sNode.getClassNodes().addElement(tse.getTargetCNode());
+          rCNodes.addElement(tse.getTargetCNode());
+          oldSNode.getClassNodes().removeElement(tse.getTargetCNode());
+          toremove.addElement(tse);
+        }
+      }
+    }
+    it_isEdges = null;
+    oldSNode.getScheduleEdges().removeAll(toremove);
+    toremove.clear();
+    // redirect ScheudleEdges out of this subtree to the new ScheduleNode
+    Iterator it_sEdges = se.getSource().edges();
+    while(it_sEdges.hasNext()) {
+      ScheduleEdge tse = (ScheduleEdge)it_sEdges.next();
+      if(!(tse.equals(se)) && !(tse.equals(sEdge)) 
+          && (tse.getSourceCNode().equals(sCNode))) {
+        if(!(tse.getSourceFState().equals(fs)) 
+            && (sFStates.contains(tse.getSourceFState()))) {
+          tse.setSource(sNode);
+          tse.setSourceCNode(cNode);
+          sNode.getEdgeVector().addElement(tse);
+          toremove.add(tse);
+        }
+      }
+    }
+    it_sEdges = null;
+    se.getSource().getEdgeVector().removeAll(toremove);
+    toremove = null;
+    rCNodes = null;
+    sFStates = null;
+
+    try {
+      if(!copy) {
+        //merge se into its source ScheduleNode
+        sNode.setCid(((ScheduleNode)se.getSource()).getCid());
+        ((ScheduleNode)se.getSource()).mergeSEdge(se);
+        scheduleNodes.remove(se.getTarget());
+        scheduleEdges.removeElement(se);
+        // As se has been changed into an internal edge inside a ScheduleNode,
+        // change the source and target of se from original ScheduleNodes 
+        // into ClassNodes.
+        if(se.getType() == ScheduleEdge.NEWEDGE) {
+          se.setTarget(se.getTargetCNode());
+          //se.setSource(se.getSourceCNode());
+          //se.getTargetCNode().addEdge(se);
+          se.getSourceCNode().addEdge(se);
+        }
+      } else {
+        sNode.setCid(ScheduleNode.colorID++);
+        handleScheduleEdge(se, true);
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+      System.exit(-1);
+    }
+
+    return sNode;
+  }
+
+  // TODO: restrict the number of generated scheduling according to the setted
+  // scheduleThreshold
+  private boolean coreMapping(int generateThreshold) throws Exception {
+    if(this.coreNum == -1) {
+      throw new Exception("Error: un-initialized coreNum when doing scheduling.");
+    }
+
+    if(this.scheduleGraphs == null) {
+      this.scheduleGraphs = new Vector<Vector<ScheduleNode>>();
+    }
+
+    int reduceNum = this.scheduleNodes.size() - this.coreNum;
+
+    // Combine some ScheduleNode if necessary
+    // May generate multiple graphs suggesting candidate schedulings
+    if(!(reduceNum > 0)) {
+      // Enough cores, no need to combine any ScheduleNode
+      this.scheduleGraphs.addElement(this.scheduleNodes);
+      int gid = 1;
+      if(this.state.PRINTSCHEDULING) {
+        String path = this.state.outputdir + "scheduling_" + gid + ".dot";
+        SchedulingUtil.printScheduleGraph(path, this.scheduleNodes);
+      }
+      return false;
+    } else {
+      SchedulingUtil.assignCids(this.scheduleNodes);
+
+      // Go through all the Schedule Nodes, organize them in order of their cid
+      Vector<Vector<ScheduleNode>> sNodeVecs = 
+        SchedulingUtil.rangeScheduleNodes(this.scheduleNodes);
+
+      CombinationUtil.RootsGenerator rGen = 
+        CombinationUtil.allocateRootsGenerator(sNodeVecs, 
+            this.coreNum);
+
+      int gid = 1;
+      Random rand = new Random();
+      boolean isBig = Math.pow(this.coreNum, reduceNum) > 1000;
+      while((!isBig || (gid <= this.scheduleThreshold)) && (rGen.nextGen())) {
+        // first get the chosen rootNodes
+        Vector<Vector<ScheduleNode>> rootNodes = rGen.getRootNodes();
+        Vector<Vector<ScheduleNode>> nodes2combine = rGen.getNode2Combine();
+
+        CombinationUtil.CombineGenerator cGen = 
+          CombinationUtil.allocateCombineGenerator(rootNodes, 
+              nodes2combine);
+        while((!isBig || (gid <= this.scheduleThreshold)) && (cGen.nextGen())) {  
+          boolean implement = true;
+          if(isBig) {
+            implement = Math.abs(rand.nextInt()) % 100 > generateThreshold;
+          }
+          if(implement) {
+            Vector<Vector<CombinationUtil.Combine>> combine = cGen.getCombine();
+            Vector<ScheduleNode> sNodes = 
+              SchedulingUtil.generateScheduleGraph(this.state,
+                  this.scheduleNodes,
+                  this.scheduleEdges,
+                  rootNodes, 
+                  combine, 
+                  gid++);
+            this.scheduleGraphs.add(sNodes);
+            combine = null;
+            sNodes = null;
+          }
+        }
+        cGen.clear();
+        rootNodes = null;
+        nodes2combine = null;
+      }
+      rGen.clear();
+      sNodeVecs = null;
+      return isBig;
+    }
+  }
+
+  static class TaskInfo {
+    public int m_numofexits;
+    public long[] m_exetime;
+    public double[] m_probability;
+    public Vector<Hashtable<String, Integer>> m_newobjinfo;
+    public int m_byObj;
+
+    public TaskInfo(int numofexits) {
+      this.m_numofexits = numofexits;
+      this.m_exetime = new long[this.m_numofexits];
+      this.m_probability = new double[this.m_numofexits];
+      this.m_newobjinfo = new Vector<Hashtable<String, Integer>>();
+      for(int i = 0; i < this.m_numofexits; i++) {
+        this.m_newobjinfo.add(null);
+      }
+      this.m_byObj = -1;
     }
+  }
 }
index 9544928e55704248a904255ec948a43f50259d31..f2eb67330ade3d84d44bea61968e3224861fd95a 100644 (file)
@@ -61,7 +61,7 @@ public class ScheduleSimulator {
   
   public long simulate(Vector<Vector<Schedule>> schedulings,
                      Vector<Integer> selectedScheduling,
-                     Vector<Vector<SimExecutionEdge>> selectedSimExeGraphs) {      
+                     Vector<SimExecutionNode> selectedSimExeGraphs) {      
       long processTime = Long.MAX_VALUE;
       /*if(schedulings.size() > 1500) {
          int index = 0;
@@ -99,19 +99,19 @@ public class ScheduleSimulator {
                  (Vector<Schedule>)it_scheduling.next();
              System.out.println("Scheduling index:" + scheduling.elementAt(0).getGid());
              this.setScheduling(scheduling);
-             Vector<SimExecutionEdge> simexegraph = new Vector<SimExecutionEdge>();
+             Vector<SimExecutionNode> simexegraph = new Vector<SimExecutionNode>();
              Vector<CheckPoint> checkpoints = new Vector<CheckPoint>();
              long tmpTime = process(checkpoints, simexegraph);
              if(tmpTime < processTime) {
                  selectedScheduling.clear();
                  selectedScheduling.add(index);
                  selectedSimExeGraphs.clear();
-                 selectedSimExeGraphs.add(simexegraph);
+                 selectedSimExeGraphs.add(simexegraph.elementAt(0));
                  processTime = tmpTime;
              } else if(tmpTime == processTime) {
                  if(!selectedScheduling.contains(index)) {
                      selectedScheduling.add(index);
-                     selectedSimExeGraphs.add(simexegraph);
+                     selectedSimExeGraphs.add(simexegraph.elementAt(0));
                  }
              }
              scheduling = null;
@@ -207,7 +207,7 @@ public class ScheduleSimulator {
   }
 
   public long process(Vector<CheckPoint> checkpoints,
-                     Vector<SimExecutionEdge> simexegraph) {
+                     Vector<SimExecutionNode> simexegraph) {
     assert(this.scheduling != null);
 
     this.invoketime++;
@@ -303,7 +303,6 @@ public class ScheduleSimulator {
            // handle TransTaskSimulator task's completion
            finishTransTaskSimulator(task,
                                      cp,
-                                     simexegraph,
                                      senode2action,
                                      lastseNodes,
                                      action2exetime,
@@ -345,7 +344,6 @@ public class ScheduleSimulator {
                          cp,
                          transObjs,
                          tttasks,
-                         simexegraph,
                          senode2action,
                          lastseNodes,
                          action2exetime,
@@ -363,6 +361,7 @@ public class ScheduleSimulator {
       
     // add the end node into the SimExecutionGraph
     SimExecutionNode seNode = new SimExecutionNode(this.coreNum, this.processTime);
+    simexegraph.addElement(seNode);
     for(int j = 0; j < lastseNodes.length; j++) {
        SimExecutionNode lastsenode = lastseNodes[j];
        // create edges between previous senode on this core to this node
@@ -398,7 +397,6 @@ public class ScheduleSimulator {
                                System.exit(-1);
                            }
                            lastedge.getTarget().addEdge(transseEdge);
-                           simexegraph.add(transseEdge);
                            transseEdge.addPredicate(lastedge);
                            seEdge.addPredicate(transseEdge);
                        } else {
@@ -410,7 +408,6 @@ public class ScheduleSimulator {
                }
            }
            taskparams = null;
-           simexegraph.add(seEdge); // add the seEdge afger all corresponding transfer edges
        }         
        lastseNodes[j] = null;
     }
@@ -446,7 +443,6 @@ public class ScheduleSimulator {
   
   private void finishTransTaskSimulator(TaskSimulator task,
                                        CheckPoint cp,
-                                       Vector<SimExecutionEdge> simexegraph,
                                        Hashtable<SimExecutionNode, Action> senode2action,
                                        SimExecutionNode[] lastseNodes,
                                        Hashtable<Action, Long> action2exetime,
@@ -507,7 +503,6 @@ public class ScheduleSimulator {
                                                        tmpaction.getTaskParams());
                      }
                      lastsenode.addEdge(seEdge);
-                     simexegraph.add(seEdge);
                  }
                  lastseNodes[targetCoreNum] = seNode;        
              }
@@ -683,7 +678,6 @@ public class ScheduleSimulator {
                               CheckPoint cp,
                               Vector<ObjectSimulator> nobjs,
                               Vector<TransTaskSimulator> tttasks,
-                              Vector<SimExecutionEdge> simexegraph,
                               Hashtable<SimExecutionNode, Action> senode2action,
                               SimExecutionNode[] lastseNodes,
                               Hashtable<Action, Long> action2exetime,
@@ -767,7 +761,6 @@ public class ScheduleSimulator {
                              System.exit(-1);
                          }
                          lastedge.getTarget().addEdge(transseEdge);
-                         simexegraph.add(transseEdge);
                          transseEdge.addPredicate(lastedge);
                          seEdge.addPredicate(transseEdge);
                      } else {
@@ -779,7 +772,6 @@ public class ScheduleSimulator {
              }
          }
          taskparams = null;
-         simexegraph.add(seEdge); // add the seEdge afger all corresponding transfer edges
          
          // set seEdge as the last execution edge for all newly created objs
          if(nobjs != null) {
index e6e9da63a1b4bf5a37a1355a5df10baffba05c08..8e7061c6d9d8203b9be2c4a5c535f5f8e04d808d 100644 (file)
@@ -964,9 +964,9 @@ public class BuildCodeMultiCore extends BuildCode {
   }
 
   protected void generateObjectDistribute(FlatFlagActionNode ffan, 
-                                         FlatMethod fm, 
-                                         LocalityBinding lb, 
-                                         TempDescriptor temp,
+                                             FlatMethod fm, 
+                                             LocalityBinding lb, 
+                                             TempDescriptor temp,
                                           PrintWriter output) {
     ClassDescriptor cd = temp.getType().getClassDesc();
     Vector<FlagState> initfstates = null;
@@ -1085,19 +1085,19 @@ public class BuildCodeMultiCore extends BuildCode {
                  output.println("enqueueObject("+super.generateTemp(fm, temp, lb)+", " + qinfo.qname +
                                 ", " + qinfo.length + ");");
                  output.println("}");
-               } else {
+               } /*else {
                  // TODO
                  // really needed?
-                 output.println("/* possibly needed by multi-parameter tasks on this core*/");
+                 output.println("/* possibly needed by multi-parameter tasks on this core*//*");
                  output.println("enqueueObject("+super.generateTemp(fm, temp, lb)+", NULL, 0);");
-               }
+               }*/  // deleted 09/07/06, multi-param tasks are pinned to one core now
              } else {
-               if(!isolate) {
+               /*if(!isolate) {
                  // TODO
                  // Is it possible to decide the actual queues?
-                 output.println("/* possibly needed by multi-parameter tasks on this core*/");
+                 output.println("/* possibly needed by multi-parameter tasks on this core*//*");
                  output.println("enqueueObject("+super.generateTemp(fm, temp, lb)+", NULL, 0);");
-               }
+               }*/ // deleted 09/07/06, multi-param tasks are pinned to one core now
                output.println("/* transfer to core " + targetcore.toString() + "*/");
                output.println("{");
                // enqueue this object and its destinations for later process
@@ -1125,12 +1125,12 @@ public class BuildCodeMultiCore extends BuildCode {
            }
            output.println("}");
          } else {
-           if(!isolate) {
+           /*if(!isolate) {
              // TODO
              // Is it possible to decide the actual queues?
-             output.println("/* possibly needed by multi-parameter tasks on this core*/");
+             output.println("/* possibly needed by multi-parameter tasks on this core*//*");
              output.println("enqueueObject("+super.generateTemp(fm, temp, lb)+", NULL, 0);");
-           }
+           }*/ // deleted 09/07/06, multi-param tasks are pinned to one core now
            output.println("/* transfer to core " + targetcore.toString() + "*/");
            output.println("{");
            // enqueue this object and its destinations for later process
@@ -1165,11 +1165,11 @@ public class BuildCodeMultiCore extends BuildCode {
            output.println("enqueueObject("+super.generateTemp(fm, temp, lb)+", " + qinfo.qname +
                           ", " + qinfo.length + ");");
            output.println("}");
-         } else {
+         } /*else {
            // TODO
            // really needed?
            output.println("enqueueObject("+super.generateTemp(fm, temp, lb)+", NULL, 0);");
-         }
+         }*/ // deleted 09/07/06, multi-param tasks are pinned to one core now
        }
 
        // codes for multi-params tasks
index 92eddf90480da13a8fa4fcf9ada91f4aed7811d8..1d52a75097fce82cd9fdf2caafadb6c3cdf79f10 100644 (file)
@@ -71,6 +71,7 @@ public class State {
   public boolean ARRAYPAD=false;
   public boolean SCHEDULING=false;
   public boolean USEPROFILE=false;
+  public String profilename = null;
   public boolean THREAD=false;
   public boolean CONSCHECK=false;
   public boolean INSTRUCTIONFAILURE=false;
index 6d8be64a21ab8af04711c8e9b152c2c6a38feb57..340df9d83c74f66365996ec0f47e2b7a37cf28a6 100644 (file)
@@ -62,6 +62,8 @@ public class Main {
 
     String outputdir = null;
     boolean isDistributeInfo = false;
+    boolean isDisAll = false;
+    int startnum = 0;
 
     for(int i=0; i<args.length; i++) {
       String option=args[i];
@@ -151,8 +153,14 @@ public class Main {
        state.SCHEDULING=true;
       else if (option.equals("-distributioninfo"))
        isDistributeInfo=true;
-      else if (option.equals("-useprofile"))
+      else if (option.equals("-disall"))
+        isDisAll=true;
+      else if (option.equals("-disstart"))
+        startnum = Integer.parseInt(args[++i]);
+      else if (option.equals("-useprofile")) {
        state.USEPROFILE=true;
+    state.profilename = args[++i];
+      }
       else if (option.equals("-thread"))
        state.THREAD=true;
       else if (option.equals("-dsm"))
@@ -386,17 +394,18 @@ public class Main {
                                                              ta,
                                                              oa);
        if(isDistributeInfo) {
-           mcImplSynthesis.distribution();
+           mcImplSynthesis.distribution(isDisAll, startnum);
        } else {
-           //double timeStartAnalysis = (double) System.nanoTime();
+           double timeStartAnalysis = (double) System.nanoTime();
            mcImplSynthesis.setScheduleThreshold(20);
            mcImplSynthesis.setProbThreshold(0);
            mcImplSynthesis.setGenerateThreshold(30);
            Vector<Schedule> scheduling = mcImplSynthesis.synthesis();
            
-           //double timeEndAnalysis = (double) System.nanoTime();
-           //double dt = (timeEndAnalysis - timeStartAnalysis)/(Math.pow( 10.0, 9.0 ) );
-           //System.err.println("The analysis took" + dt +  "sec.");
+           double timeEndAnalysis = (double) System.nanoTime();
+           double dt = (timeEndAnalysis - timeStartAnalysis)/(Math.pow( 10.0, 9.0 ) );
+           System.err.println("The analysis took" + dt +  "sec.");
+        System.exit(0);
 
            // generate multicore codes
            if(state.MULTICORE) {
index be74ff420514aa0703989ff5d238c47b44db9baf..a52ef83446cdd5b4e6858fc9cd98b47e67447b14 100644 (file)
 
 void * mycalloc(int m, int size) {
   void * p = NULL;
-  int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
+  int isize = size; //2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
   BAMBOO_START_CRITICAL_SECTION_MEM();
   p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
   if(p == NULL) {
-         exit(0xa024);
+         BAMBOO_EXIT(0xa024);
   }
   BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
-  return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
+  //return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
+  return p;
 }
 
 void * mycalloc_share(int m, int size) {
@@ -31,7 +32,7 @@ void * mycalloc_share(int m, int size) {
   BAMBOO_START_CRITICAL_SECTION_MEM();
   p = BAMBOO_SHARE_MEM_CALLOC_I(m, isize); // calloc(m, isize);
   if(p == NULL) {
-         exit(0xa025);
+         BAMBOO_EXIT(0xa025);
   }
   BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
   return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
@@ -39,15 +40,17 @@ void * mycalloc_share(int m, int size) {
 
 void * mycalloc_i(int m, int size) {
   void * p = NULL;
-  int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
+  int isize = size; //2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
   p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
   if(p == NULL) {
-         exit(0xa026);
+         BAMBOO_EXIT(0xa026);
   }
-  return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
+  return p;
+  //return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
 }
 
 void myfree(void * ptr) {
+  BAMBOO_LOCAL_MEM_FREE(ptr);
   return;
 }
 
index 8e033a789608710888c9a54235ed82b3d1d6942f..f6e1e3507853aebf1b5381bbb4b76e6445acc4d5 100644 (file)
@@ -30,7 +30,7 @@ void myfree(void * ptr);
 #define FREEMALLOC(x) mycalloc_share(1,x)
 #define RUNMALLOC(x) mycalloc(1,x) // handle interruption inside
 #define RUNMALLOC_I(x) mycalloc_i(1,x) // with interruption blocked beforehand
-#define RUNFREE(x) myfree(x);
+#define RUNFREE(x) myfree(x)
 //#define PTR(x) (32+(x-1)&~31)
 #endif  // #ifdef THREADSIMULATE
 #endif  // #ifdef MULTICORE
index 1601f536afe6e945d838672646315fd2f21a8732..80108f3ca84ce8c7be85e4dc309a819b27407484 100644 (file)
@@ -54,22 +54,17 @@ struct Queue objqueue;
 #define BAMBOO_SMEM_SIZE 16 * BAMBOO_PAGE_SIZE
 
 bool smemflag;
-struct bamboo_shared_mem {
-       mspace msp;
-       struct bamboo_shared_mem * next;
-};
-struct bamboo_smem_list {
-       struct bamboo_shared_mem * head;
-       struct bamboo_shared_mem * tail;
-};
-struct bamboo_smem_list * bamboo_free_msps;
+mspace bamboo_free_msp;
 mspace bamboo_cur_msp;
 int bamboo_smem_size;
 
+// for test TODO
+int total_num_t6;
+
 // data structures for profile mode
 #ifdef PROFILE
 
-#define TASKINFOLENGTH 10000
+#define TASKINFOLENGTH 30000
 //#define INTERRUPTINFOLENGTH 500
 
 bool stall;
@@ -146,6 +141,7 @@ void outputProfileData();
 //  BAMBOO_DEBUGPRINT_REG(x): print out value of variable x                        //
 //  BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of whose   //
 //                                 size in bytes is y on local memory              //
+//  BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory                //
 //  BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of whose   //
 //                                 size in bytes is y on shared memory             //
 //  BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE()                                      //
index 76e945b7162fcf4f473a1397dd23d7aa5c6d5e1f..f5ed2cc94214227b285e46620694b3888ca9d7fc 100644 (file)
@@ -78,6 +78,8 @@ inline void run(void * arg) {
       profilestatus[i] = 1;
     }  
 #endif
+       // TODO for test
+       total_num_t6 = 0;
   }
   busystatus = true;
   self_numsendobjs = 0;
@@ -127,8 +129,8 @@ inline void run(void * arg) {
   //isInterrupt = true;
   totalexetime = -1;
   taskInfoIndex = 0;
-  /*interruptInfoIndex = 0;
   taskInfoOverflow = false;
+  /*interruptInfoIndex = 0;
   interruptInfoOverflow = false;*/
 #endif
 
@@ -203,12 +205,14 @@ inline void run(void * arg) {
                  // check if there are some pending objects, if yes, enqueue them and executetasks again
                  tocontinue = false;
 #ifdef PROFILE
+#ifdef ACCURATEPROFILE
                  {
                          bool isChecking = false;
                          if(!isEmpty(&objqueue)) {
                                  profileTaskStart("objqueue checking");
                                  isChecking = true;
                          }
+#endif
 #endif
                  while(!isEmpty(&objqueue)) {
                          void * obj = NULL;
@@ -310,11 +314,13 @@ objqueuebreak:
 #endif
                  }
 #ifdef PROFILE
+#ifdef ACCURATEPROFILE
                      if(isChecking) {
                                  profileTaskEnd();
                          }
                  }
 #endif
+#endif
 #ifdef DEBUG
                  BAMBOO_DEBUGPRINT(0xee02);
 #endif
@@ -399,6 +405,7 @@ objqueuebreak:
                                                                  totalexetime = BAMBOO_GET_EXE_TIME();
 #else
                                                                  BAMBOO_DEBUGPRINT(BAMBOO_GET_EXE_TIME());
+                                                                 BAMBOO_DEBUGPRINT_REG(total_num_t6); // TODO for test
                                                                  BAMBOO_DEBUGPRINT(0xbbbbbbbb);
 #endif
                                                                  // profile mode, send msgs to other cores to request pouring
@@ -1611,30 +1618,10 @@ msg:
                  BAMBOO_DEBUGPRINT(0xe88a);
 #endif
 #endif
-                 struct bamboo_shared_mem * cur_mem = bamboo_free_msps->head;
-                 void * mem = mspace_calloc(cur_mem->msp, 1, msgdata[1]);
-                 struct bamboo_shared_mem * failmem = cur_mem;
-                 while(mem == NULL) {
-                         if(msgdata[1] > BAMBOO_SMEM_SIZE) {
-                                 // move current head to the tail
-                                 bamboo_free_msps->tail->next = cur_mem;
-                                 bamboo_free_msps->tail = cur_mem;
-                                 bamboo_free_msps->head = cur_mem->next;
-                                 cur_mem->next = NULL;
-                                 cur_mem = bamboo_free_msps->head;
-                                 if(cur_mem == failmem) {
-                                         BAMBOO_EXIT(0xa016);
-                                 }
-                         } else {
-                                 // remove the head
-                                 bamboo_free_msps->head = cur_mem->next;
-                                 RUNFREE(cur_mem);
-                                 cur_mem = bamboo_free_msps->head;
-                                 if(cur_mem == NULL) {
-                                         BAMBOO_EXIT(0xa017);
-                                 }
-                         }
-                         mem = mspace_calloc(cur_mem->msp, 1, msgdata[1]);
+                 void * mem = mspace_calloc(bamboo_free_msp, 1, msgdata[1]);
+                 if(mem == NULL) {
+                         BAMBOO_DEBUGPRINT(0xa016);
+                         BAMBOO_EXIT(0xa016);
                  }
                  // send the start_va to request core
                 if(isMsgSending) {
@@ -1717,7 +1704,7 @@ int processlockrequest(int locktype, int lock, int obj, int requestcore, int roo
          BAMBOO_DEBUGPRINT_REG(lock);
          BAMBOO_DEBUGPRINT_REG(corenum);
 #endif
-         BAMBOO_EXIT(0xa018);
+         BAMBOO_EXIT(0xa017);
   }
   /*if((corenum == STARTUPCORE) && waitconfirm) {
          waitconfirm = false;
@@ -1840,7 +1827,7 @@ bool getreadlock(void * ptr) {
 #endif
                } else {
                        // conflicts on lockresults
-                       BAMBOO_EXIT(0xa019);
+                       BAMBOO_EXIT(0xa018);
                }
        }
     return true;
@@ -1870,7 +1857,7 @@ void releasereadlock(void * ptr) {
     // reside on this core
     if(!RuntimeHashcontainskey(locktbl, reallock)) {
       // no locks for this object, something is wrong
-      BAMBOO_EXIT(0xa01a);
+      BAMBOO_EXIT(0xa019);
     } else {
       int rwlock_obj = 0;
          struct LockValue * lockvalue = NULL;
@@ -1926,7 +1913,7 @@ bool getreadlock_I_r(void * ptr, void * redirectlock, int core, bool cache) {
 #endif
                        } else {
                                // conflicts on lockresults
-                               BAMBOO_EXIT(0xa01b);
+                               BAMBOO_EXIT(0xa01a);
                        }
                        return true;
                } else {
@@ -2010,7 +1997,7 @@ bool getwritelock(void * ptr) {
 #endif
                } else {
                        // conflicts on lockresults
-                       BAMBOO_EXIT(0xa01c);
+                       BAMBOO_EXIT(0xa01b);
                }
        }
     return true;
@@ -2047,7 +2034,7 @@ void releasewritelock(void * ptr) {
     // reside on this core
     if(!RuntimeHashcontainskey(locktbl, reallock)) {
       // no locks for this object, something is wrong
-      BAMBOO_EXIT(0xa01d);
+      BAMBOO_EXIT(0xa01c);
     } else {
       int rwlock_obj = 0;
          struct LockValue * lockvalue = NULL;
@@ -2087,7 +2074,7 @@ void releasewritelock_r(void * lock, void * redirectlock) {
     // reside on this core
     if(!RuntimeHashcontainskey(locktbl, reallock)) {
       // no locks for this object, something is wrong
-      BAMBOO_EXIT(0xa01e);
+      BAMBOO_EXIT(0xa01d);
     } else {
       int rwlock_obj = 0;
          struct LockValue * lockvalue = NULL;
@@ -2164,7 +2151,7 @@ bool getwritelock_I(void * ptr) {
 #endif
                } else {
                        // conflicts on lockresults
-                       BAMBOO_EXIT(0xa01f);
+                       BAMBOO_EXIT(0xa01e);
                }
                return true;
        }
@@ -2222,7 +2209,7 @@ bool getwritelock_I_r(void * ptr, void * redirectlock, int core, bool cache) {
 #endif
                        } else {
                                // conflicts on lockresults
-                               BAMBOO_EXIT(0xa020);
+                               BAMBOO_EXIT(0xa01f);
                        }
                        return true;
                } else {
@@ -2268,7 +2255,7 @@ void releasewritelock_I(void * ptr) {
     // reside on this core
     if(!RuntimeHashcontainskey(locktbl, reallock)) {
       // no locks for this object, something is wrong
-      BAMBOO_EXIT(0xa021);
+      BAMBOO_EXIT(0xa020);
     } else {
       int rwlock_obj = 0;
          struct LockValue * lockvalue = NULL;
@@ -2300,7 +2287,7 @@ void releasewritelock_I_r(void * lock, void * redirectlock) {
     // reside on this core
     if(!RuntimeHashcontainskey(locktbl, reallock)) {
       // no locks for this object, something is wrong
-      BAMBOO_EXIT(0xa022);
+      BAMBOO_EXIT(0xa021);
     } else {
       int rwlock_obj = 0;
          struct LockValue * lockvalue = NULL;
@@ -2617,7 +2604,9 @@ newtask:
     if (hashsize(activetasks)>0) {
       int i;
 #ifdef PROFILE
+#ifdef ACCURATEPROFILE
          profileTaskStart("tpd checking");
+#endif
 #endif
          busystatus = true;
       currtpd=(struct taskparamdescriptor *) getfirstkey(activetasks);
@@ -2735,8 +2724,10 @@ newtask:
                                  }
                          }
 #ifdef PROFILE
+#ifdef ACCURATEPROFILE
                          // fail, set the end of the checkTaskInfo
                          profileTaskEnd();
+#endif
 #endif
                          goto newtask;
                  } // line 2794: if(grount == 0)
@@ -2821,8 +2812,10 @@ newtask:
            RUNFREE(currtpd->parameterArray);
            RUNFREE(currtpd);
 #ifdef PROFILE
+#ifdef ACCURATEPROFILE
            // fail, set the end of the checkTaskInfo
                profileTaskEnd();
+#endif
 #endif
            goto newtask;
          } // line 2878: if (!ismet)
@@ -2887,7 +2880,7 @@ parameterpresent:
             reverse=NULL;
 #endif  // #if 0: for recovery
          BAMBOO_DEBUGPRINT_REG(x);
-         BAMBOO_EXIT(0xa023);
+         BAMBOO_EXIT(0xa022);
        } else {
 #endif // #ifndef MULTICORE
 #if 0 
@@ -2907,8 +2900,10 @@ parameterpresent:
 #endif  // #if 0: for garbage collection
 execute:
 #ifdef PROFILE
+#ifdef ACCURATEPROFILE
          // check finish, set the end of the checkTaskInfo
          profileTaskEnd();
+#endif
          profileTaskStart(currtpd->task->name);
 #endif
 
@@ -2927,11 +2922,13 @@ execute:
            ((void(*) (void **))currtpd->task->taskptr)(taskpointerarray);
          } // line 2990: if(debugtask)
 #ifdef PROFILE
+#ifdef ACCURATEPROFILE
          // task finish, set the end of the checkTaskInfo
          profileTaskEnd();
          // new a PostTaskInfo for the post-task execution
          profileTaskStart("post task execution");
 #endif
+#endif
 #ifdef DEBUG
          BAMBOO_DEBUGPRINT(0xe998);
          BAMBOO_DEBUGPRINT_REG(islock);
index 444bc73b1b05940756c23a59c45755301b1d7ebc..44eeadaccc40dca95a8c391a9f3cd8eac51ba49b 100755 (executable)
@@ -58,9 +58,13 @@ echo -o binary
 echo -nojava do not run bristlecone compiler
 echo -instructionfailures inject code for instructionfailures
 echo -profile build with profile options
+echo -accurateprofile build with accurate profile information including pre/post task processing info
 echo "-useio use standard io to output profiling data (should be used together with -raw and -profile), it only works with single core version"
 echo "-enable-assertions execute assert statements during compilation"
 echo -justanalyze exit after compiler analyses complete
+echo "-distributioninfo  execute to collect distribution info for simulated annealing in multi-core version"
+echo "-disall  execute to collect whole distribution"
+echo "-disstart specify the start number of distribution information collection"
 echo -assembly generate assembly
 echo -help help
 }
@@ -87,6 +91,7 @@ RAWCONFIG=''
 DEBUGFLAG=false
 RAWPATHFLAG=false
 PROFILEFLAG=false
+ACCURATEPROFILEFLAG=false
 USEIOFLAG=false
 INTERRUPTFLAG=false
 THREADSIMULATEFLAG=false;
@@ -213,6 +218,9 @@ elif [[ $1 = '-profile' ]]
 then
 PROFILEFLAG=true
 EXTRAOPTIONS="$EXTRAOPTIONS -pg"
+elif [[ $1 = '-accurateprofile' ]]
+then
+ACCURATEPROFILEFLAG=true
 elif [[ $1 = '-useio' ]]
 then
 USEIOFLAG=true
@@ -273,7 +281,8 @@ RECOVERFLAG=true
 JAVAOPTS="$JAVAOPTS -task"
 elif [[ $1 = '-useprofile' ]]
 then
-JAVAOPTS="$JAVAOPTS -useprofile"
+JAVAOPTS="$JAVAOPTS -useprofile $2"
+shift
 elif [[ $1 = '-webinterface' ]]
 then
 JAVAOPTS="$JAVAOPTS -webinterface"
@@ -344,6 +353,16 @@ THREADFLAG=true
 elif [[ $1 = '-distributioninfo' ]]
 then
 JAVAOPTS="$JAVAOPTS -distributioninfo"
+elif [[ $1 = '-disall' ]]
+then
+JAVAOPTS="$JAVAOPTS -disall"
+elif [[ $1 = '-disstart' ]]
+then
+JAVAOPTS="$JAVAOPTS -disstart $2"
+shift
+elif [[ $1 = '-noc' ]]
+then
+CCOMPILEFLAG=false
 elif [[ $1 = '-curdir' ]]
 then
 CURDIR=$2
@@ -404,7 +423,7 @@ fi
 
 if $MULTICOREFLAG
 then
-if ! ${ROBUSTROOT}/ourjava -Xms50m -Xmx800m $JAVAFORWARDOPTS -classpath $ROBUSTROOT/../cup/:$ROBUSTROOT Main.Main -classlibrary \
+if ! ${ROBUSTROOT}/ourjava -Xms50m -Xmx1500m $JAVAFORWARDOPTS -classpath $ROBUSTROOT/../cup/:$ROBUSTROOT Main.Main -classlibrary \
 $ROBUSTROOT/ClassLibrary/ -classlibrary $ROBUSTROOT/ClassLibrary/gnu/ \
 -dir $BUILDDIR $JAVAOPTS $SRCFILES
 then exit $?
@@ -495,6 +514,11 @@ then # profile version
 RAWRGCCFLAGS="${RAWRGCCFLAGS} -DPROFILE"
 fi
 
+if $ACCURATEPROFILEFLAG
+then # accurateprofile version
+RAWRGCCFLAGS="${RAWRGCCFLAGS} -DACCURATEPROFILE"
+fi
+
 if $USEIOFLAG
 then # useio version
 RAWRGCCFLAGS="${RAWRGCCFLAGS} -DUSEIO"
@@ -560,6 +584,11 @@ then # profile version
 TILERACFLAGS="${TILERACFLAGS} -DPROFILE"
 fi
 
+if $ACCURATEPROFILEFLAG
+then # accurateprofile version
+TILERACFLAGS="${TILERACFLAGS} -DACCURATEPROFILE"
+fi
+
 if $USEIOFLAG
 then # useio version
 TILERACFLAGS="${TILERACFLAGS} -DUSEIO"