From c40847d45aa5e4cc0820e6a526c9f326e6a0b118 Mon Sep 17 00:00:00 2001
From: jzhou <jzhou>
Date: Wed, 5 Aug 2009 00:22:33 +0000
Subject: [PATCH] add load balance module for multicore gc, fix message
 handling and memory allocation in multicore gc

---
 Robust/src/IR/Flat/BuildCode.java          |  80 +++--
 Robust/src/IR/Flat/BuildCodeMultiCore.java |  20 +-
 Robust/src/Runtime/mem.c                   |  30 +-
 Robust/src/Runtime/mem.h                   |   9 +-
 Robust/src/Runtime/multicoregarbage.c      | 375 +++++++++++++++------
 Robust/src/Runtime/multicoregarbage.h      | 147 ++++++--
 Robust/src/Runtime/multicoreruntime.c      |  25 +-
 Robust/src/Runtime/multicoreruntime.h      |   7 +-
 Robust/src/Runtime/multicoretask.c         | 115 ++++---
 9 files changed, 568 insertions(+), 240 deletions(-)
diff --git a/Robust/src/IR/Flat/BuildCode.java b/Robust/src/IR/Flat/BuildCode.java
index a7dc6644..28653932 100644
--- a/Robust/src/IR/Flat/BuildCode.java
+++ b/Robust/src/IR/Flat/BuildCode.java
@@ -299,13 +299,13 @@ public class BuildCode {
     }
     if (state.DSM) {
       outmethod.println("if (dstmStartup(argv[1])) {");
-      if (GENERATEPRECISEGC) {
+      if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	outmethod.println("  struct ArrayObject * stringarray=allocate_newarray(NULL, STRINGARRAYTYPE, argc-2);");
       } else {
 	outmethod.println("  struct ArrayObject * stringarray=allocate_newarray(STRINGARRAYTYPE, argc-2);");
       }
     } else {
-      if (GENERATEPRECISEGC) {
+      if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	outmethod.println("  struct ArrayObject * stringarray=allocate_newarray(NULL, STRINGARRAYTYPE, argc-1);");
       } else {
 	outmethod.println("  struct ArrayObject * stringarray=allocate_newarray(STRINGARRAYTYPE, argc-1);");
@@ -316,7 +316,7 @@ public class BuildCode {
     } else
       outmethod.println("  for(i=1;i<argc;i++) {");
     outmethod.println("    int length=strlen(argv[i]);");
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       outmethod.println("    struct ___String___ *newstring=NewString(NULL, argv[i], length);");
     } else {
       outmethod.println("    struct ___String___ *newstring=NewString(argv[i], length);");
@@ -331,7 +331,7 @@ public class BuildCode {
     ClassDescriptor cd=typeutil.getMainClass();
 
     outmethod.println("   {");
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       if (state.DSM||state.SINGLETM) {
 	outmethod.print("       struct "+cd.getSafeSymbol()+locality.getMain().getSignature()+md.getSafeSymbol()+"_"+md.getSafeMethodDescriptor()+"_params __parameterlist__={");
       } else
@@ -1067,7 +1067,7 @@ public class BuildCode {
     for(int i=0; i<fm.numParameters(); i++) {
       TempDescriptor temp=fm.getParameter(i);
       TypeDescriptor type=temp.getType();
-      if (type.isPtr()&&GENERATEPRECISEGC)
+      if (type.isPtr()&&((GENERATEPRECISEGC) || (this.state.MULTICOREGC)))
 	objectparams.addPtr(temp);
       else
 	objectparams.addPrim(temp);
@@ -1078,7 +1078,7 @@ public class BuildCode {
 
     for(int i=0; i<fm.numTags(); i++) {
       TempDescriptor temp=fm.getTag(i);
-      if (GENERATEPRECISEGC)
+      if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC))
 	objectparams.addPtr(temp);
       else
 	objectparams.addPrim(temp);
@@ -1098,7 +1098,7 @@ public class BuildCode {
       for(int i=0; i<writes.length; i++) {
 	TempDescriptor temp=writes[i];
 	TypeDescriptor type=temp.getType();
-	if (type.isPtr()&&GENERATEPRECISEGC)
+	if (type.isPtr()&&((GENERATEPRECISEGC) || (this.state.MULTICOREGC)))
 	  objecttemps.addPtr(temp);
 	else
 	  objecttemps.addPrim(temp);
@@ -1113,7 +1113,7 @@ public class BuildCode {
       for(Iterator<TempDescriptor> tmpit=backuptable.get(lb).values().iterator(); tmpit.hasNext();) {
 	TempDescriptor tmp=tmpit.next();
 	TypeDescriptor type=tmp.getType();
-	if (type.isPtr()&&GENERATEPRECISEGC)
+	if (type.isPtr()&&((GENERATEPRECISEGC) || (this.state.MULTICOREGC)))
 	  objecttemps.addPtr(tmp);
 	else
 	  objecttemps.addPrim(tmp);
@@ -1121,7 +1121,7 @@ public class BuildCode {
       /* Create temp to hold revert table */
       if (state.DSM&&(lb.getHasAtomic()||lb.isAtomic())) {
 	TempDescriptor reverttmp=new TempDescriptor("revertlist", typeutil.getClass(TypeUtil.ObjectClass));
-	if (GENERATEPRECISEGC)
+	if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC))
 	  objecttemps.addPtr(reverttmp);
 	else
 	  objecttemps.addPrim(reverttmp);
@@ -1358,7 +1358,7 @@ public class BuildCode {
 
   private void generateMethodParam(ClassDescriptor cn, MethodDescriptor md, LocalityBinding lb, PrintWriter output) {
     /* Output parameter structure */
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       ParamsObject objectparams=(ParamsObject) paramstable.get(lb!=null ? lb : md);
       if ((state.DSM||state.SINGLETM)&&lb!=null)
 	output.println("struct "+cn.getSafeSymbol()+lb.getSignature()+md.getSafeSymbol()+"_"+md.getSafeMethodDescriptor()+"_params {");
@@ -1384,7 +1384,7 @@ public class BuildCode {
     generateMethodParam(cn, md, lb, output);
 
     /* Output temp structure */
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       if (state.DSM||state.SINGLETM)
 	output.println("struct "+cn.getSafeSymbol()+lb.getSignature()+md.getSafeSymbol()+"_"+md.getSafeMethodDescriptor()+"_locals {");
       else
@@ -1424,7 +1424,7 @@ public class BuildCode {
       headersout.print(cn.getSafeSymbol()+md.getSafeSymbol()+"_"+md.getSafeMethodDescriptor()+"(");
     }
     boolean printcomma=false;
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       if (state.DSM||state.SINGLETM) {
 	headersout.print("struct "+cn.getSafeSymbol()+lb.getSignature()+md.getSafeSymbol()+"_"+md.getSafeMethodDescriptor()+"_params * "+paramsprefix);
       } else
@@ -1465,7 +1465,7 @@ public class BuildCode {
       TempObject objecttemps=(TempObject) tempstable.get(task);
 
       /* Output parameter structure */
-      if (GENERATEPRECISEGC) {
+      if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	output.println("struct "+task.getSafeSymbol()+"_params {");
 
 	output.println("  INTPTR size;");
@@ -1482,7 +1482,7 @@ public class BuildCode {
       }
 
       /* Output temp structure */
-      if (GENERATEPRECISEGC) {
+      if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	output.println("struct "+task.getSafeSymbol()+"_locals {");
 	output.println("  INTPTR size;");
 	output.println("  void * next;");
@@ -1503,7 +1503,7 @@ public class BuildCode {
       headersout.print("void " + task.getSafeSymbol()+"(");
 
       boolean printcomma=false;
-      if (GENERATEPRECISEGC) {
+      if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	headersout.print("struct "+task.getSafeSymbol()+"_params * "+paramsprefix);
       } else
 	headersout.print("void * parameterarray[]");
@@ -1630,7 +1630,7 @@ public class BuildCode {
       }
     }
 
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       if (md!=null&&(state.DSM||state.SINGLETM))
 	output.print("   struct "+cn.getSafeSymbol()+lb.getSignature()+md.getSafeSymbol()+"_"+md.getSafeMethodDescriptor()+"_locals "+localsprefix+"={");
       else if (md!=null&&!(state.DSM||state.SINGLETM))
@@ -1659,12 +1659,15 @@ public class BuildCode {
     /* Check to see if we need to do a GC if this is a
      * multi-threaded program...*/
 
-    if ((state.THREAD||state.DSM||state.SINGLETM)&&GENERATEPRECISEGC) {
+    if (((state.THREAD||state.DSM||state.SINGLETM)&&GENERATEPRECISEGC) 
+        || this.state.MULTICOREGC) {
       //Don't bother if we aren't in recursive methods...The loops case will catch it
       if (callgraph.getAllMethods(md).contains(md)) {
 	if (state.DSM&&lb.isAtomic())
 	  output.println("if (needtocollect) checkcollect2("+localsprefixaddr+");");
-	else
+	else if (this.state.MULTICOREGC) {
+      output.println("if(gcflag) gc("+localsprefixaddr+");");
+    } else
 	  output.println("if (needtocollect) checkcollect("+localsprefixaddr+");");
       }
     }
@@ -1839,7 +1842,7 @@ public class BuildCode {
 
     TempObject objecttemp=(TempObject) tempstable.get(md);
 
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       output.print("   struct "+cn.getSafeSymbol()+md.getSafeSymbol()+"_"+md.getSafeMethodDescriptor()+"_locals "+localsprefix+"={");
       output.print(objecttemp.numPointers()+",");
       output.print("(void*) &("+paramsprefix+"->size)");
@@ -1900,10 +1903,14 @@ public class BuildCode {
 
     // Check to see if we need to do a GC if this is a
     // multi-threaded program...    
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       //Don't bother if we aren't in recursive methods...The loops case will catch it
       if (callgraph.getAllMethods(md).contains(md)) {
-        output.println("if (needtocollect) checkcollect("+localsprefixaddr+");");
+        if(this.state.MULTICOREGC) {
+          output.println("if(gcflag) gc("+localsprefixaddr+");");
+        } else {
+          output.println("if (needtocollect) checkcollect("+localsprefixaddr+");");
+        }
       }
     }    
 
@@ -2327,10 +2334,13 @@ public class BuildCode {
       break;
 
     case FKind.FlatBackEdge:
-      if ((state.THREAD||state.DSM||state.SINGLETM)&&GENERATEPRECISEGC) {
+      if (((state.THREAD||state.DSM||state.SINGLETM)&&GENERATEPRECISEGC)
+          || (this.state.MULTICOREGC)) {
 	if(state.DSM&&locality.getAtomic(lb).get(fn).intValue()>0) {
 	  output.println("if (needtocollect) checkcollect2("+localsprefixaddr+");");
-	} else
+	} else if(this.state.MULTICOREGC) {
+      output.println("if (gcflag) gc("+localsprefixaddr+");");
+    } else
 	  output.println("if (needtocollect) checkcollect("+localsprefixaddr+");");
       } else
 	output.println("/* nop */");
@@ -2865,7 +2875,7 @@ public class BuildCode {
     ParamsObject objectparams=(ParamsObject)paramstable.get(lb!=null ? locality.getBinding(lb, fc) : md);
     ClassDescriptor cn=md.getClassDesc();
     output.println("{");
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC) {
       if (lb!=null) {
 	LocalityBinding fclb=locality.getBinding(lb, fc);
 	output.print("       struct "+cn.getSafeSymbol()+fclb.getSignature()+md.getSafeSymbol()+"_"+md.getSafeMethodDescriptor()+"_params __parameterlist__={");
@@ -2924,7 +2934,7 @@ public class BuildCode {
       output.print("(*)(");
 
       boolean printcomma=false;
-      if (GENERATEPRECISEGC) {
+      if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	if (lb!=null) {
 	  LocalityBinding fclb=locality.getBinding(lb, fc);
 	  output.print("struct "+cn.getSafeSymbol()+fclb.getSignature()+md.getSafeSymbol()+"_"+md.getSafeMethodDescriptor()+"_params * ");
@@ -2954,7 +2964,7 @@ public class BuildCode {
 
     output.print("(");
     boolean needcomma=false;
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       output.print("&__parameterlist__");
       needcomma=true;
     }
@@ -3280,8 +3290,8 @@ public class BuildCode {
 	/* Link object into list */
 	String revertptr=generateTemp(fm, reverttable.get(lb),lb);
 	output.println(revertptr+"=revertlist;");
-	if (GENERATEPRECISEGC)
-	  output.println("COPY_OBJ((struct garbagelist *)"+localsprefixaddr+",(struct ___Object___ *)"+dst+");");
+	if ((GENERATEPRECISEGC))
+        output.println("COPY_OBJ((struct garbagelist *)"+localsprefixaddr+",(struct ___Object___ *)"+dst+");");
 	else
 	  output.println("COPY_OBJ("+dst+");");
 	output.println(dst+"->"+nextobjstr+"="+revertptr+";");
@@ -3343,7 +3353,7 @@ public class BuildCode {
       int arrayid=state.getArrayNumber(fn.getType())+state.numClasses();
       if (fn.isGlobal()&&(state.DSM||state.SINGLETM)) {
 	output.println(generateTemp(fm,fn.getDst(),lb)+"=allocate_newarrayglobal("+arrayid+", "+generateTemp(fm, fn.getSize(),lb)+");");
-      } else if (GENERATEPRECISEGC) {
+      } else if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	output.println(generateTemp(fm,fn.getDst(),lb)+"=allocate_newarray("+localsprefixaddr+", "+arrayid+", "+generateTemp(fm, fn.getSize(),lb)+");");
       } else {
 	output.println(generateTemp(fm,fn.getDst(),lb)+"=allocate_newarray("+arrayid+", "+generateTemp(fm, fn.getSize(),lb)+");");
@@ -3351,7 +3361,7 @@ public class BuildCode {
     } else {
       if (fn.isGlobal()&&(state.DSM||state.SINGLETM)) {
 	output.println(generateTemp(fm,fn.getDst(),lb)+"=allocate_newglobal("+fn.getType().getClassDesc().getId()+");");
-      } else if (GENERATEPRECISEGC) {
+      } else if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	output.println(generateTemp(fm,fn.getDst(),lb)+"=allocate_new("+localsprefixaddr+", "+fn.getType().getClassDesc().getId()+");");
       } else {
 	output.println(generateTemp(fm,fn.getDst(),lb)+"=allocate_new("+fn.getType().getClassDesc().getId()+");");
@@ -3373,7 +3383,7 @@ public class BuildCode {
   }
 
   private void generateFlatTagDeclaration(FlatMethod fm, LocalityBinding lb, FlatTagDeclaration fn, PrintWriter output) {
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       output.println(generateTemp(fm,fn.getDst(),lb)+"=allocate_tag("+localsprefixaddr+", "+state.getTagId(fn.getType())+");");
     } else {
       output.println(generateTemp(fm,fn.getDst(),lb)+"=allocate_tag("+state.getTagId(fn.getType())+");");
@@ -3431,7 +3441,7 @@ public class BuildCode {
     if (fln.getValue()==null)
       output.println(generateTemp(fm, fln.getDst(),lb)+"=0;");
     else if (fln.getType().getSymbol().equals(TypeUtil.StringClass)) {
-      if (GENERATEPRECISEGC) {
+      if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	if (state.DSM && locality.getAtomic(lb).get(fln).intValue()>0) {
 	  //Stash pointer in case of GC
 	  String revertptr=generateTemp(fm, reverttable.get(lb),lb);
@@ -3520,7 +3530,7 @@ public class BuildCode {
     */
 
     boolean printcomma=false;
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       if (md!=null) {
 	if (state.DSM||state.SINGLETM) {
 	  output.print("struct "+cn.getSafeSymbol()+lb.getSignature()+md.getSafeSymbol()+"_"+md.getSafeMethodDescriptor()+"_params * "+paramsprefix);
@@ -3641,7 +3651,7 @@ public class BuildCode {
 	Iterator tagit=tagtmps.iterator();
 	while(tagit.hasNext()) {
 	  TempDescriptor tagtmp=(TempDescriptor)tagit.next();
-	  if (GENERATEPRECISEGC)
+	  if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC))
 	    output.println("tagclear("+localsprefixaddr+", (struct ___Object___ *)"+generateTemp(fm, temp,lb)+", "+generateTemp(fm,tagtmp,lb)+");");
 	  else
 	    output.println("tagclear((struct ___Object___ *)"+generateTemp(fm, temp,lb)+", "+generateTemp(fm,tagtmp,lb)+");");
@@ -3653,7 +3663,7 @@ public class BuildCode {
 	Iterator tagit=tagtmps.iterator();
 	while(tagit.hasNext()) {
 	  TempDescriptor tagtmp=(TempDescriptor)tagit.next();
-	  if (GENERATEPRECISEGC)
+	  if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC))
 	    output.println("tagset("+localsprefixaddr+", (struct ___Object___ *)"+generateTemp(fm, temp,lb)+", "+generateTemp(fm,tagtmp,lb)+");");
 	  else
 	    output.println("tagset((struct ___Object___ *)"+generateTemp(fm, temp, lb)+", "+generateTemp(fm,tagtmp, lb)+");");
diff --git a/Robust/src/IR/Flat/BuildCodeMultiCore.java b/Robust/src/IR/Flat/BuildCodeMultiCore.java
index 4e2a8f72..f1ba3a12 100644
--- a/Robust/src/IR/Flat/BuildCodeMultiCore.java
+++ b/Robust/src/IR/Flat/BuildCodeMultiCore.java
@@ -342,7 +342,7 @@ public class BuildCodeMultiCore extends BuildCode {
 	TempObject objecttemps=(TempObject) tempstable.get(task);
 
 	/* Output parameter structure */
-	if (GENERATEPRECISEGC) {
+	if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	  output.println("struct "+task.getCoreSafeSymbol(num)+"_params {");
 	  output.println("  int size;");
 	  output.println("  void * next;");
@@ -358,7 +358,7 @@ public class BuildCodeMultiCore extends BuildCode {
 	}
 
 	/* Output temp structure */
-	if (GENERATEPRECISEGC) {
+	if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	  output.println("struct "+task.getCoreSafeSymbol(num)+"_locals {");
 	  output.println("  int size;");
 	  output.println("  void * next;");
@@ -378,7 +378,7 @@ public class BuildCodeMultiCore extends BuildCode {
 	/* Output task declaration */
 	headersout.print("void " + task.getCoreSafeSymbol(num)+"(");
 
-	if (GENERATEPRECISEGC) {
+	if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	  headersout.print("struct "+task.getCoreSafeSymbol(num)+"_params * "+paramsprefix);
 	} else
 	  headersout.print("void * parameterarray[]");
@@ -587,12 +587,18 @@ public class BuildCodeMultiCore extends BuildCode {
 
     //ParamsObject objectparams=(ParamsObject)paramstable.get(lb!=null?lb:task);
     generateTaskHeader(fm, lb, task,output);
+    // output code to check if need to do gc
+    if(state.MULTICOREGC) {
+      output.println("#ifdef MULTICORE_GC");
+      output.println("gc();");
+      output.println("#endif");
+    }
     TempObject objecttemp=(TempObject) tempstable.get(lb!=null ? lb : task);
     /*if (state.DSM&&lb.getHasAtomic()) {
         output.println("transrecord_t * trans;");
        }*/
 
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       output.print("   struct "+task.getCoreSafeSymbol(num)+"_locals "+localsprefix+"={");
 
       output.print(objecttemp.numPointers()+",");
@@ -901,7 +907,7 @@ public class BuildCodeMultiCore extends BuildCode {
     output.print(task.getCoreSafeSymbol(num)+"(");
 
     boolean printcomma=false;
-    if (GENERATEPRECISEGC) {
+    if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
       output.print("struct "+task.getCoreSafeSymbol(num)+"_params * "+paramsprefix);
       printcomma=true;
     }
@@ -1636,7 +1642,7 @@ public class BuildCodeMultiCore extends BuildCode {
 	output.println(super.generateTemp(fm, fn.getDst(), lb)
 	               + "=allocate_newarrayglobal(trans, " + arrayid + ", "
 	               + super.generateTemp(fm, fn.getSize(), lb) + ");");
-      } else if (GENERATEPRECISEGC) {
+      } else if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	output.println(super.generateTemp(fm, fn.getDst(), lb)
 	               + "=allocate_newarray(&" + localsprefix + ", "
 	               + arrayid + ", " + super.generateTemp(fm, fn.getSize(), lb)
@@ -1651,7 +1657,7 @@ public class BuildCodeMultiCore extends BuildCode {
 	output.println(super.generateTemp(fm, fn.getDst(), lb)
 	               + "=allocate_newglobal(trans, "
 	               + fn.getType().getClassDesc().getId() + ");");
-      } else if (GENERATEPRECISEGC) {
+      } else if ((GENERATEPRECISEGC) || (this.state.MULTICOREGC)) {
 	output.println(super.generateTemp(fm, fn.getDst(), lb)
 	               + "=allocate_new(&" + localsprefix + ", "
 	               + fn.getType().getClassDesc().getId() + ");");
diff --git a/Robust/src/Runtime/mem.c b/Robust/src/Runtime/mem.c
index 0e1d8e59..e3995299 100644
--- a/Robust/src/Runtime/mem.c
+++ b/Robust/src/Runtime/mem.c
@@ -6,38 +6,58 @@
 
 void * mycalloc(int m, int size) {
   void * p = NULL;
-  int isize = size; //2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
+  int isize = size; 
   BAMBOO_START_CRITICAL_SECTION_MEM();
   p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
   if(p == NULL) {
 	  BAMBOO_EXIT(0xa024);
   }
   BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
-  //return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
   return p;
 }
 
+#ifdef MULTICORE_GC
+void * mycalloc_share(struct garbagelist * stackptr, int m, int size) {
+	void * p = NULL;
+  int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
+memalloc:
+  BAMBOO_START_CRITICAL_SECTION_MEM();
+  p = BAMBOO_SHARE_MEM_CALLOC_I(m, isize); // calloc(m, isize);
+  if(p == NULL) {
+		// no more global shared memory
+		BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
+		// start gc
+		gc(stackptr);
+
+		// try to malloc again
+		goto memalloc;
+  }
+  BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
+  return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
+}
+#else
 void * mycalloc_share(int m, int size) {
   void * p = NULL;
   int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
   BAMBOO_START_CRITICAL_SECTION_MEM();
   p = BAMBOO_SHARE_MEM_CALLOC_I(m, isize); // calloc(m, isize);
   if(p == NULL) {
-	  BAMBOO_EXIT(0xa025);
+		// no more global shared memory
+		BAMBOO_EXIT(0xa025);
   }
   BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
   return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
 }
+#endif
 
 void * mycalloc_i(int m, int size) {
   void * p = NULL;
-  int isize = size; //2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
+  int isize = size; 
   p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
   if(p == NULL) {
 	  BAMBOO_EXIT(0xa026);
   }
   return p;
-  //return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK));
 }
 
 void myfree(void * ptr) {
diff --git a/Robust/src/Runtime/mem.h b/Robust/src/Runtime/mem.h
index f6e1e350..9fa64726 100644
--- a/Robust/src/Runtime/mem.h
+++ b/Robust/src/Runtime/mem.h
@@ -24,13 +24,18 @@
 //#define PTR(x) (x)
 #else
 void * mycalloc(int m, int size);
-void * mycalloc_share(int m, int size);
 void * mycalloc_i(int m, int size);
 void myfree(void * ptr);
-#define FREEMALLOC(x) mycalloc_share(1,x)
 #define RUNMALLOC(x) mycalloc(1,x) // handle interruption inside
 #define RUNMALLOC_I(x) mycalloc_i(1,x) // with interruption blocked beforehand
 #define RUNFREE(x) myfree(x)
+#ifdef MULTICORE_GC
+void * mycalloc_share(struct garbagelist * stackptr, int m, int size);
+#define FREEMALLOC(s, x) mycalloc_share((s),1,(x))
+#else
+void * mycalloc_share(int m, int size);
+#define FREEMALLOC(x) mycalloc_share(1,x)
+#endif // #ifdef MULTICORE_GC
 //#define PTR(x) (32+(x-1)&~31)
 #endif  // #ifdef THREADSIMULATE
 #endif  // #ifdef MULTICORE
diff --git a/Robust/src/Runtime/multicoregarbage.c b/Robust/src/Runtime/multicoregarbage.c
index 407361d5..6c206203 100644
--- a/Robust/src/Runtime/multicoregarbage.c
+++ b/Robust/src/Runtime/multicoregarbage.c
@@ -7,7 +7,67 @@
 
 extern struct genhashtable * activetasks;
 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
-extern struct taskparamdescriptor *currtpd;
+extern struct taskparamdescriptor *currtpdo;
+
+struct largeObjList {
+	struct largeObjItem * head;
+	struct largeObjItem * tail;
+};
+
+struct largeObjList lObjList;
+
+#define NUMPTRS 100
+
+void gc_enqueue(void *ptr) {
+  if (gcheadindex==NUMPTRS) {
+    struct pointerblock * tmp;
+    if (gcspare!=NULL) {
+      tmp=gcspare;
+      gcspare=NULL;
+    } else
+      tmp=malloc(sizeof(struct pointerblock));
+    gchead->next=tmp;
+    gchead=tmp;
+    gcheadindex=0;
+  }
+  gchead->ptrs[gcheadindex++]=ptr;
+}
+
+// dequeue and destroy the queue
+void * gc_dequeue() {
+  if (gctailindex==NUMPTRS) {
+    struct pointerblock *tmp=tail;
+    gctail=gctail->next;
+    gctailindex=0;
+    if (gcspare!=NULL)
+      free(tmp);
+    else
+      gcspare=tmp;
+  }
+  return gctail->ptrs[gctailindex++];
+}
+
+// dequeue and do not destroy the queue
+void * gc_dequeue2() {
+	if (gctailindex2==NUMPTRS) {
+    struct pointerblock *tmp=tail;
+    gctail2=gctail2->next;
+    gctailindex2=0;
+  }
+  return gctail2->ptrs[gctailindex2++];
+}
+
+int gc_moreItems() {
+  if ((gchead==gctail)&&(gctailindex==gcheadindex))
+    return 0;
+  return 1;
+}
+
+int gc_moreItems2() {
+  if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
+    return 0;
+  return 1;
+}
 
 INTPTR curr_heaptop = 0;
 
@@ -52,7 +112,7 @@ void transferMarkResults() {
 
 void checkMarkStatue() {
 	if((!gcwaitconfirm) || 
-			(gcwaitconfirm && (gcnumconfirm == 0))) {
+			(waitconfirm && (numconfirm == 0))) {
 		BAMBOO_START_CRITICAL_SECTION_STATUS();  
 		gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 		gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
@@ -76,13 +136,13 @@ void checkMarkStatue() {
 				sumsendobj -= gcnumreceiveobjs[i];
 			}
 			if(0 == sumsendobj) {
-				if(!gcwaitconfirm) {
+				if(!waitconfirm) {
 					// the first time found all cores stall
 					// send out status confirm msg to all other cores
 					// reset the corestatus array too
 					gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
-					gcwaitconfirm = true;
-					gcnumconfirm = NUMCORES - 1;
+					waitconfirm = true;
+					numconfirm = NUMCORES - 1;
 					for(i = 1; i < NUMCORES; ++i) {	
 						gccorestatus[i] = 1;
 						// send mark phase finish confirm request msg to core i
@@ -103,19 +163,150 @@ void checkMarkStatue() {
 	} // if((!gcwaitconfirm)...
 }
 
-void gc() {
+bool preGC() {
+	// preparation for gc
+	// make sure to clear all incoming msgs espacially transfer obj msgs
+	int i;
+	if((!waitconfirm) || 
+						  (waitconfirm && (numconfirm == 0))) {
+		// send out status confirm msgs to all cores to check if there are
+		// transfer obj msgs on-the-fly
+		waitconfirm = true;
+		numconfirm = NUMCORES - 1;
+		for(i = 1; i < NUMCORES; ++i) {	
+			corestatus[i] = 1;
+			// send status confirm msg to core i
+			send_msg_1(i, STATUSCONFIRM);
+		}
+
+		while(numconfirm != 0) {} // wait for confirmations
+		numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
+		numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
+		int sumsendobj = 0;
+		for(i = 0; i < NUMCORES; ++i) {
+			sumsendobj += numsendobjs[i];
+		}		
+		for(i = 0; i < NUMCORES; ++i) {
+			sumsendobj -= numreceiveobjs[i];
+		}
+		if(0 == sumsendobj) {
+			return true;
+		} else {
+			// still have some transfer obj msgs on-the-fly, can not start gc
+			return false;
+		}
+	} else {
+		// previously asked for status confirmation and do not have all the 
+		// confirmations yet, can not start gc
+		return false;
+	}
+}
+
+// compute load balance for all cores
+void loadbalance() {
+	// compute load balance
+	// initialize the deltas
+	int i;
+	int delta = 1 << 32 -1;
+	int deltanew = 1 << 32 - 1;
+	int lcore = 0;
+	int rcore = 0;
+	bool stop = true;
+	for(i = 0; i < NUMCORES; i++) {
+		gcdeltal[i] = gcdeltar[i] = 0;
+		gcreloads[i] = gcloads[i];
+	}
+	do {
+		stop = true;
+		delta = deltanew;
+		// compute load balance
+		for(i = 0; i < NUMCORES; i++) {
+			if(gcreloads[i] > BAMBOO_SMEM_SIZE_L) {
+				// too much load, try to redirect some of it to its neighbours
+				LEFTNEIGHBOUR(i, &lcore);
+				RIGHTNEIGHBOUR(i, &rcore);
+				if(lcore != -1) {
+					int tmp = (gcreloads[lcore] - gcreloads[i]) / 2;
+					gcdeltal[i] = tmp;
+					gcdeltar[lcore] = 0-tmp;
+					deltanew += abs(gcreloads[lcore] - gcreloads[i]);
+				}
+				if(rcore != -1) {
+					int tmp = (gcreloads[rcore] - gcreloads[i]) / 2;
+					gcdeltar[i] = tmp;
+					gcdeltal[rcore] = 0-tmp;
+					deltanew += abs(gcreloads[rcore] - gcreloads[i]);
+				}
+			}
+		}
+		deltanew /= 2;
+		if((deltanew == 0) || (delta == deltanew)) {
+			break;
+		}
+		// flush for new loads
+		for(i = 0; i < NUMCORES; i++) {
+			if((gcdeltal[i] != 0) || (gcdeltar[i] != 0)) {
+				stop = false;
+				gcreloads[i] += gcdeltal[i] + gcdeltar[i];
+				gcdeltal[i] = gcdeltar[i] = 0;
+			}
+		}
+	} while(!stop);
+	for(i = 0; i < NUMCORES; i++) {
+		gcdeltal[i] = gcdeltar[i] = 0;
+	}
+	// decide how to do load balance
+	for(i = 0; i < NUMCORES; i++) {
+		int tomove = (gcloads[i] - gcreloads[i]);
+		if(tomove > 0) {
+			LEFTNEIGHBOUR(i, &lcore);
+			RIGHTNEIGHBOUR(i, &rcore);
+			int lmove = 0;
+			int rmove = 0;
+			if(lcore != -1) {
+				lmove = (gcreloads[lcore] - gcloads[lcore] - gcdeltal[lcore]);
+				if(lmove < 0) {
+					lmove = 0;
+				}
+			}
+			if(rcore != -1) {
+				rmove = (gcreloads[rcore] - gcloads[rcore] - gcdeltar[rcore]);
+				if(rmove < 0) {
+					rmove = 0;
+				}
+			}
+			// the one with bigger gap has higher priority
+			if(lmove > rmove) {
+				int ltomove = (lmove > tomove)? tomove:lmove;
+				gcdeltar[lcore] = ltomove;
+				gcdeltal[i] = 0-ltomove;
+				gcdeltal[rcore] = tomove - ltomove;
+				gcdeltar[i] = ltomove - tomove;
+			} else {
+				int rtomove = (rmove > tomove)? tomove:rmove;
+				gcdeltal[rcore] = rtomove;
+				gcdeltar[i] = 0-rtomove;
+				gcdeltar[lcore] = tomove - rtomove;
+				gcdeltal[i] = rtomove - tomove;
+			}
+		}
+	}
+}
+
+void gc(struct garbagelist * stackptr) {
 	// check if do gc
 	if(!gcflag) {
 		return;
-	} else {
-		// do gc
-		gcflag = false;
 	}
 
-	// TODO, preparation
-
 	// core coordinator routine
 	if(0 == BAMBOO_NUM_OF_CORE) {
+		if(!preGC()) {
+			// not ready to do gc
+			gcflag = true;
+			return;
+		}
+
 		int i = 0;
 		gcwaitconfirm = false;
 		gcwaitconfirm = 0;
@@ -129,7 +320,7 @@ void gc() {
 
 		// mark phase
 		while(MARKPHASE == gcphase) {
-			mark(isfirst);
+			mark(isfirst, stackptr);
 			if(isfirst) {
 				isfirst = false;
 			}
@@ -138,14 +329,16 @@ void gc() {
 			checkMarkStatue(); 
 		}  // while(MARKPHASE == gcphase)
 		// send msgs to all cores requiring large objs info
-		gcnumconfirm = NUMCORES - 1;
+		numconfirm = NUMCORES - 1;
 		for(i = 1; i < NUMCORES; ++i) {
 			send_msg_1(i, GCLOBJREQUEST);
 		}	
-		while(gcnumconfirm != 0) {} // wait for responses
-		// TODO compute load balance
+		while(numconfirm != 0) {} // wait for responses
+		loadbalance();
+		// TODO need to decide where to put large objects
 
 		// TODO cache all large objects
+
 		for(i = 1; i < NUMCORES; ++i) {
 			//TODO send start compact messages to all cores
 
@@ -202,66 +395,84 @@ void gc() {
 		}
 		return;
 	} else {
-		gc_collect();
+		gc_collect(stackptr);
 	}
+	gcflag = false;
 }
 
-void mark(bool isfirst) {
-	if(isfirst) {
-		if(MARKPHASE != gcphase) {
-			BAMBOO_EXIT(0xb002);
-		}
-		gcbusystatus = 1;
-		// initialize gctomark queue
-		while(!isEmpty(gctomark)) {
-			getItem(gctomark);
+// enqueue root objs
+void tomark(struct garbagelist * stackptr) {
+	if(MARKPHASE != gcphase) {
+		BAMBOO_EXIT(0xb002);
+	}
+	gcbusystatus = 1;
+	// initialize queue
+	if (gchead==NULL) {
+		gcheadindex=0;
+		gctailindex=0;
+		gctailindex2 = 0;
+		gchead=gctail=gctail2=malloc(sizeof(struct pointerblock));
+	}
+	int i;
+	// enqueue current stack 
+	while(stackptr!=NULL) {
+		for(i=0; i<stackptr->size; i++) {
+			gc_enqueue(stackptr->array[i]);
 		}
-		// enqueue current stack  TODO
-		
-		// enqueue objectsets
-		int i;
-		for(i=0; i<NUMCLASSES; i++) {
-			struct parameterwrapper ** queues=objectqueues[BAMBOO_NUM_OF_CORE][i];
-			int length = numqueues[BAMBOO_NUM_OF_CORE][i];
-			for(j = 0; j < length; ++j) {
-				struct parameterwrapper * parameter = queues[j];
-				struct ObjectHash * set=parameter->objectset;
-				struct ObjectNode * ptr=set->listhead;
-				while(ptr!=NULL) {
-					void *orig=(void *)ptr->key;
-					addNewItem(gctomark, orig); 
-					ptr=ptr->lnext;
-				}
+		stackptr=stackptr->next;
+	}
+	// enqueue objectsets
+	for(i=0; i<NUMCLASSES; i++) {
+		struct parameterwrapper ** queues=objectqueues[BAMBOO_NUM_OF_CORE][i];
+		int length = numqueues[BAMBOO_NUM_OF_CORE][i];
+		for(j = 0; j < length; ++j) {
+			struct parameterwrapper * parameter = queues[j];
+			struct ObjectHash * set=parameter->objectset;
+			struct ObjectNode * ptr=set->listhead;
+			while(ptr!=NULL) {
+				gc_enqueue((void *)ptr->key);
+				ptr=ptr->lnext;
 			}
 		}
-		// euqueue current task descriptor
-		for(i=0; i<currtpd->numParameters; i++) {
-			void *orig=currtpd->parameterArray[i];
-			addNewItem(gctomark, orig);  
-		}
-		// euqueue active tasks
-		struct genpointerlist * ptr=activetasks->list;
-		while(ptr!=NULL) {
-			struct taskparamdescriptor *tpd=ptr->src;
-			int i;
-			for(i=0; i<tpd->numParameters; i++) {
-				void * orig=tpd->parameterArray[i];
-				addNewItem(gctomark, orig); 
-			}
-			ptr=ptr->inext;
+	}
+	// euqueue current task descriptor
+	for(i=0; i<currtpd->numParameters; i++) {
+		gc_enqueue(currtpd->parameterArray[i]);
+	}
+	// euqueue active tasks
+	struct genpointerlist * ptr=activetasks->list;
+	while(ptr!=NULL) {
+		struct taskparamdescriptor *tpd=ptr->src;
+		int i;
+		for(i=0; i<tpd->numParameters; i++) {
+			gc_enqueue(tpd->parameterArray[i]);
 		}
+		ptr=ptr->inext;
+	}
+	// enqueue cached transferred obj
+	struct QueueItem * tmpobjptr =  getHead(&objqueue);
+	while(tmpobjptr != NULL) {
+		struct transObjInfo * objInfo = (struct transObjInfo *)(tmpobjptr->objectptr); 
+		gc_enqueue(objInfo->objptr);
+		getNextQueueItem(tmpobjptr);
+	}
+}
+
+void mark(bool isfirst, struct garbagelist * stackptr) {
+	if(isfirst) {
+		// enqueue root objs
+		tomark(stackptr);
 	}
 
 	// mark phase
 	while(MARKPHASE == gcphase) {
-		while(!isEmpty(gctomark)) {
-			voit * ptr = getItem(gctomark);
+		while(gc_moreItems2()) {
+			voit * ptr = gc_dequeue2();
 			int size = 0;
 			int type = 0;
 			if(isLarge(ptr, &type, &size)) {
 				// ptr is a large object
-				// TODO
-/*				struct largeObjItem * loi = 
+				struct largeObjItem * loi = 
 					(struct largeObjItem *)RUNMALLOC(sizeof(struct largeObjItem));  
 				loi->orig = (INTPTR)ptr;
 				loi->dst = (INTPTR)0;
@@ -271,14 +482,15 @@ void mark(bool isfirst) {
 				} else {
 					lObjList.tail->next = loi;
 					lObjList.tail = loi;
-				}*/
+				}
 			} else if (isLocal(ptr)) {
 				// ptr is an active object on this core
 				if(type == -1) {
 					// nothing to do 
 				}
 				curr_heaptop += size;
-
+				// mark this obj
+				((int *)ptr)[6] = 1;
 			}
 			// scan all pointers in ptr
 			unsigned INTPTR * pointer;
@@ -296,7 +508,7 @@ void mark(bool isfirst) {
 					int host = hostcore(objptr);
 					if(BAMBOO_NUM_OF_CORE == host) {
 						// on this core
-						addNewItem(gctomark, objptr);  
+						gc_enqueue(objptr);  
 					} else {
 						// send a msg to host informing that objptr is active
 						send_msg_2(host, GCMARKEDOBJ, objptr);
@@ -312,7 +524,7 @@ void mark(bool isfirst) {
 					int host = hostcore(objptr);
 					if(BAMBOO_NUM_OF_CORE == host) {
 						// on this core
-						addNewItem(gctomark, objptr);  
+						gc_enqueue(objptr);  
 					} else {
 						// send a msg to host informing that objptr is active
 						send_msg_2(host, GCMARKEDOBJ, objptr);
@@ -323,7 +535,8 @@ void mark(bool isfirst) {
 		} // while(!isEmpty(gctomark))
 		gcbusystatus = false;
 		// send mark finish msg to core coordinator
-		send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE, gcself_numsendobjs, gcself_numreceiveobjs); 
+		send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
+				       gcself_numsendobjs, gcself_numreceiveobjs); 
 
 		if(BAMBOO_NUM_OF_CORE == 0) {
 			return;
@@ -507,43 +720,15 @@ void flush() {
 	
 } // flush()
 
-void gc_collect() {
+void gc_collect(struct garbagelist * stackptr) {
 	// core collector routine
-	// change to UDN1
-	bme_install_interrupt_handler(INT_UDN_AVAIL, gc_msghandler);
-#ifdef DEBUG
-	tprintf("Process %x(%d): change udn interrupt handler\n", BAMBOO_NUM_OF_CORE, 
-			BAMBOO_NUM_OF_CORE);
-#endif
-	__insn_mtspr(SPR_UDN_TAG_1, UDN1_DEMUX_TAG);
-	// enable udn interrupts
-	//__insn_mtspr(SPR_INTERRUPT_MASK_RESET_2_1, INT_MASK_HI(INT_UDN_AVAIL));
-	__insn_mtspr(SPR_UDN_AVAIL_EN, (1<<1));
-	BAMBOO_CLOSE_CRITICAL_SECTION_MSG();
-
-	lObjList.head = NULL;
-	lObjList.tail = NULL;
-	mObjList.head = NULL;
-	mObjList.tail = NULL;
-	mark(true);
+	mark(true, stackptr);
 	compact();
 	while(FLUSHPHASE != gcphase) {}
 	flush();
 	
 	while(true) {
 		if(FINISHPHASE == gcphase) {
-			// change to UDN0
-			bme_install_interrupt_handler(INT_UDN_AVAIL, udn_inter_handle);
-#ifdef DEBUG
-			tprintf("Process %x(%d): change back udn interrupt handler\n", BAMBOO_NUM_OF_CORE, 
-					BAMBOO_NUM_OF_CORE);
-#endif
-			__insn_mtspr(SPR_UDN_TAG_0, UDN0_DEMUX_TAG);
-			// enable udn interrupts
-			//__insn_mtspr(SPR_INTERRUPT_MASK_RESET_2_1, INT_MASK_HI(INT_UDN_AVAIL));
-			__insn_mtspr(SPR_UDN_AVAIL_EN, (1<<0));
-			BAMBOO_START_CRITICAL_SECTION_MSG();
-
 			return;
 		}
 	}
diff --git a/Robust/src/Runtime/multicoregarbage.h b/Robust/src/Runtime/multicoregarbage.h
index e2f8b695..851ec0da 100644
--- a/Robust/src/Runtime/multicoregarbage.h
+++ b/Robust/src/Runtime/multicoregarbage.h
@@ -11,19 +11,38 @@
 #define BAMBOO_SMEM_SIZE_L 512 * BAMBOO_PAGE_SIZE
 #define BAMBOO_LARGE_SMEM_BOUND BAMBOO_SMEM_SIZE_L * NUMCORES // NUMCORES = 62
 
-struct markedObjItem {
-	INTPTR orig;
-	INTPTR dst;
-	struct markedObjItem * next;
+struct garbagelist {
+  int size;
+  struct garbagelist *next;
+  void * array[];
+};
+
+struct listitem {
+  struct listitem * prev;
+  struct listitem * next;
+  struct garbagelist * stackptr;
 };
 
+struct pointerblock {
+  void * ptrs[NUMPTRS];
+  struct pointerblock *next;
+};
+
+struct pointerblock *gchead=NULL;
+int gcheadindex=0;
+struct pointerblock *gctail=NULL;
+int gctailindex=0;
+struct pointerblock *gctail2=NULL;
+int gctailindex2=0;
+struct pointerblock *gcspare=NULL;
+
 struct largeObjItem {
 	INTPTR orig;
 	INTPTR dst;
 	int length;
 	struct largeObjItem * next;
 };
-
+/*
 struct moveObj {
 	INTPTR * starts;
 	INTPTR * ends;
@@ -37,7 +56,7 @@ struct compactInstr {
 	struct moveObj * incomingobjs;
 	struct largeObjItem * largeobjs;
 };
-
+*/
 enum GCPHASETYPE {
 	MARKPHASE = 0x0,   // 0x0
 	COMPACTPHASE,      // 0x1
@@ -46,21 +65,24 @@ enum GCPHASETYPE {
 };
 
 volatile bool gcflag;
+volatile bool gcprocessing;
 GCPHASETYPE gcphase; // indicating GC phase
 bool gctomove; // flag indicating if can start moving objects to other cores
 struct Queue * gcdsts;
-struct Queue gctomark; // global queue of objects to mark
 // for mark phase termination
 int gccorestatus[NUMCORES]; // records status of each core
-                           // 1: running gc
-                           // 0: stall
+                            // 1: running gc
+                            // 0: stall
 int gcnumsendobjs[NUMCORES]; // records how many objects a core has sent out
 int gcnumreceiveobjs[NUMCORES]; // records how many objects a core has received
-int gcnumconfirm;
-bool gcwaitconfirm;
-bool gcbusystatus;
 int gcself_numsendobjs;
 int gcself_numreceiveobjs;
+// for load balancing
+int gcloads[NUMCORES];
+int gcreloads[NUMCORES];
+int gcdeltal[NUMCORES];
+int gcdeltar[NUMCORES];
+
 // compact instruction
 struct compactInstr * cinstruction;
 // mapping of old address to new address
@@ -69,7 +91,7 @@ int obj2map;
 int mappedobj;
 bool ismapped;
 
-#define BLOCKNUM(p, b) \
+#define BLOCKINDEX(p, b) \
 	if((p) < BAMBOO_LARGE_SMEM_BOUND) { \
 		(*((int*)b)) = (p) / BAMBOO_SMEM_SIZE_L; \
 	} else { \
@@ -78,33 +100,112 @@ bool ismapped;
 
 #define RESIDECORE(p, x, y) \
 	int b; \
-	BLOCKNUM((p), &b); \
+	BLOCKINDEX((p), &b); \
 	bool reverse = (b / NUMCORES) % 2; \
 	int l = b % NUMCORES; \
 	if(reverse) { \
-		if(l < 16) { \
+		if(l < 14) { \
 			l += 1; \
 		} else { \
 			l += 2; \
 		} \
-		(*((int*)y)) = 7 - l / 8; \
+		(*((int*)y)) = bamboo_width - 1 - l / bamboo_width; \
 	} else { \
 		if(l > 54) { \
 			l += 2; \
 		} else if(l > 47) {\
 			l += 1; \
 		} \
-		(*((int*)y)) = l / 8; \
+		(*((int*)y)) = l / bamboo_width; \
 	} \
-	if((l/8)%2) { \
-		(*((int*)x)) = 1 - l % 8; \
+	if((l/bamboo_width)%2) { \
+		(*((int*)x)) = bamboo_width - 1 - l % bamboo_width; \
 	} else { \
-		(*((int*)x)) = l % 8; \
+		(*((int*)x)) = l % bamboo_width; \
 	}
 
-void gc(); // core coordinator routine
-void gc_collect(); // core collector routine
-void transferMarkResults(); 
+// NOTE: n starts from 0
+#define NUMBLOCKS(s, n) \
+	if(s < BAMBOO_SMEM_SIZE_L) { \
+		(*((int*)n)) = 0; \
+	} else { \
+		(*((int*)n)) = 1 + (s - BAMBOO_SMEM_SIZE_L) / BAMBOO_SMEM_SIZE; \
+	}
+
+#define BASEPTR(c, n, p) \
+	int x; \
+  int y; \
+	int b; \
+  if(c > 5) c += 2; \
+  x = c / bamboo_height; \
+	y = c % bamboo_height; \
+	if(n%2) { \
+		if(y % 2) { \
+			b = bamboo_width - 1 - x + (bamboo_width - 1 - y) * bamboo_width; \
+		} else { \
+			b = x + (bamboo_width - 1 - y) * bamboo_width; \
+		} \
+		if(y>5) { \
+			b--; \
+		} else { \
+			b -= 2; \
+		} \
+		b += NUMCORES * n; \
+	} else { \
+		if(y % 2) { \
+			b = bamboo_width - 1 - x + y * bamboo_width; \
+		} else { \
+			b = x + y * bamboo_width; \
+		} \
+		if(y>5) b--; \
+		b += NUMCORES * n; \
+	} \
+	if(b < NUMCORES) { \
+		(*((int*)p)) = b * BAMBOO_SMEM_SIZE_L; \
+	} else { \
+		(*((int*)p)) = BAMBOO_LARGE_SMEM_BOUND + (b - NUMCORES) * BAMBOO_SMEM_SIZE; \
+	} 
+
+#define LEFTNEIGHBOUR(n, c) \
+	int x; \
+  int y; \
+  if(n > 5) n += 2; \
+  x = n / bamboo_height; \
+	y = n % bamboo_height; \
+	if((0 == n) || (15 == n)) { \
+		(*((int*)c)) = -1; \
+	} else if(n < 5) { \
+		if( 0 == y % 2) { \
+			(*((int*)c)) = y - 1; \
+		} else { \
+			(*((int*)c)) = y + 1; \
+		} \
+	} else if(5 == n) { \
+		(*((int*)c)) = (x + 1) * bamboo_height + y + 1 - 2; \
+	} else if(14 == n) { \
+		(*((int*)c)) = 5; \
+	} else { \
+		(*((int*)c)) = (x - 1) * bamboo_height + y - 2; \
+	} 
+
+#define RIGHTNEIGHBOUR(n, c) \
+	int x; \
+  int y; \
+  if(n > 5) n += 2; \
+  x = n / bamboo_height; \
+	y = n % bamboo_height; \
+	if(n < 56) { \
+		(*((int*)c)) = (x + 1) * bamboo_height + y - 2; \
+	} else if( 0 == y % 2) { \
+		(*((int*)c)) = x * bamboo_height + y + 1 - 2; \
+	} else { \
+		(*((int*)c)) = x * bamboo_height + y - 1 - 2; \
+	} 
+
+void gc(struct garbagelist * stackptr); // core coordinator routine
+void gc_collect(struct garbagelist * stackptr); // core collector routine
+void transferMarkResults();
+void gc_enqueue(void *ptr);
 
 #endif
 
diff --git a/Robust/src/Runtime/multicoreruntime.c b/Robust/src/Runtime/multicoreruntime.c
index 414e5f7e..0e84d7e3 100644
--- a/Robust/src/Runtime/multicoreruntime.c
+++ b/Robust/src/Runtime/multicoreruntime.c
@@ -188,41 +188,26 @@ void CALL01(___System______printString____L___String___,struct ___String___ * __
 
 /* Object allocation function */
 
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
 void * allocate_new(void * ptr, int type) {
-  struct ___Object___ * v=(struct ___Object___ *) mygcmalloc((struct garbagelist *) ptr, classsize[type]);
+  struct ___Object___ * v=(struct ___Object___ *)FREEMALLOC((struct garbagelist *) ptr, classsize[type]);
   v->type=type;
   v->version = 0;
-  //v->numlocks = 0;
   v->lock = NULL;
-#ifdef THREADS
-  v->tid=0;
-  v->lockentry=0;
-  v->lockcount=0;
-#endif
   return v;
 }
 
 /* Array allocation function */
 
 struct ArrayObject * allocate_newarray(void * ptr, int type, int length) {
-  struct ArrayObject * v=mygcmalloc((struct garbagelist *) ptr, sizeof(struct ArrayObject)+length*classsize[type]);
+  struct ArrayObject * v=(struct ArrayObject *)FREEMALLOC((struct garbagelist *) ptr, sizeof(struct ArrayObject)+length*classsize[type]);
   v->type=type;
   v->version = 0;
-  //v->numlocks = 0;
   v->lock = NULL;
   if (length<0) {
-#ifndef MULTICORE
-    printf("ERROR: negative array\n");
-#endif
     return NULL;
   }
   v->___length___=length;
-#ifdef THREADS
-  v->tid=0;
-  v->lockentry=0;
-  v->lockcount=0;
-#endif
   return v;
 }
 
@@ -251,13 +236,13 @@ struct ArrayObject * allocate_newarray(int type, int length) {
 
 
 /* Converts C character arrays into Java strings */
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
 struct ___String___ * NewString(void * ptr, const char *str,int length) {
 #else
 struct ___String___ * NewString(const char *str,int length) {
 #endif
   int i;
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
   struct ArrayObject * chararray=allocate_newarray((struct garbagelist *)ptr, CHARARRAYTYPE, length);
   int ptrarray[]={1, (int) ptr, (int) chararray};
   struct ___String___ * strobj=allocate_new((struct garbagelist *) &ptrarray, STRINGTYPE);
diff --git a/Robust/src/Runtime/multicoreruntime.h b/Robust/src/Runtime/multicoreruntime.h
index 591cad1c..1715d86d 100644
--- a/Robust/src/Runtime/multicoreruntime.h
+++ b/Robust/src/Runtime/multicoreruntime.h
@@ -66,7 +66,7 @@ volatile bool isMsgSending;
  * ProfileMsg: 6 + totalexetime (size is always 2 * sizeof(int))
  *             7 + corenum (size is always 2 * sizeof(int))
  * StatusMsg: c (size is always 1 * sizeof(int))
- *            d + status + corenum (size is always 3 * sizeof(int))
+ *            d + status + corenum + sendobjs + receiveobjs (size is always 5 * sizeof(int))
  *            status: 0 -- stall; 1 -- busy
  * TerminateMsg: e (size is always 1 * sizeof(int)
  * MemoryMsg: f + size + corenum (size is always 3 * sizeof(int))
@@ -84,7 +84,7 @@ volatile bool isMsgSending;
  *        1c + obj's address + corenum (size is always 3 * sizeof(int))
  *        1d + obj's address + dst address (size if always 3 * sizeof(int))
  *        1e (size is always 1 * sizeof(int))
- *        1f + size of msg + corenum + (num of large obj lists + (start address + length)+)?
+ *        1f + size of msg + corenum + current heap size + (num of large obj lists + (start address + length)+)?
  */
 enum MSGTYPE {
 	TRANSOBJ = 0x0,  // 0x0
@@ -136,12 +136,15 @@ bool busystatus;
 int self_numsendobjs;
 int self_numreceiveobjs;
 
+// get rid of lock msgs for GC version
+#ifndef MULTICORE_GC
 // data structures for locking
 struct RuntimeHash * objRedirectLockTbl;
 int lockobj;
 int lock2require;
 int lockresult;
 bool lockflag;
+#endif
 
 // data structures for waiting objs
 struct Queue objqueue;
diff --git a/Robust/src/Runtime/multicoretask.c b/Robust/src/Runtime/multicoretask.c
index 38bf9b58..2a0c5b21 100644
--- a/Robust/src/Runtime/multicoretask.c
+++ b/Robust/src/Runtime/multicoretask.c
@@ -90,7 +90,7 @@ inline void run(void * arg) {
   isMsgHanging = false;
   isMsgSending = false;
 
-  smemflag = false;
+  smemflag = true;
   bamboo_cur_msp = NULL;
   bamboo_smem_size = 0;
 
@@ -179,6 +179,11 @@ inline void run(void * arg) {
 #endif
 
 	  while(true) {
+#ifdef MULTICORE_GC
+			// check if need to do GC
+			gc(NULL);
+#endif
+
 		  // check if there are new active tasks can be executed
 		  executetasks();
 
@@ -516,29 +521,22 @@ void createstartupobject(int argc, char ** argv) {
   int i;
 
   /* Allocate startup object     */
-#if 0
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
   struct ___StartupObject___ *startupobject=(struct ___StartupObject___*) allocate_new(NULL, STARTUPTYPE);
   struct ArrayObject * stringarray=allocate_newarray(NULL, STRINGARRAYTYPE, argc-1);
 #else
   struct ___StartupObject___ *startupobject=(struct ___StartupObject___*) allocate_new(STARTUPTYPE);
   struct ArrayObject * stringarray=allocate_newarray(STRINGARRAYTYPE, argc-1);
 #endif
-#endif // #if 0: for garbage collection
-  struct ___StartupObject___ *startupobject=(struct ___StartupObject___*) allocate_new(STARTUPTYPE);
-  struct ArrayObject * stringarray=allocate_newarray(STRINGARRAYTYPE, argc-1);
   /* Build array of strings */
   startupobject->___parameters___=stringarray;
   for(i=1; i<argc; i++) {
     int length=strlen(argv[i]);
-#if 0
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
     struct ___String___ *newstring=NewString(NULL, argv[i],length);
 #else
     struct ___String___ *newstring=NewString(argv[i],length);
 #endif
-#endif // #if 0: for garbage collection
-	struct ___String___ *newstring=NewString(argv[i],length);
     ((void **)(((char *)&stringarray->___length___)+sizeof(int)))[i-1]=newstring;
   }
 
@@ -573,14 +571,11 @@ int comparetpd(struct taskparamdescriptor *ftd1, struct taskparamdescriptor *ftd
 }
 
 /* This function sets a tag. */
-#if 0
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
 void tagset(void *ptr, struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 #else
 void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 #endif
-#endif // #if 0: for garbage collection
-void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
   struct ArrayObject * ao=NULL;
   struct ___Object___ * tagptr=obj->___tags___;
   if (tagptr==NULL) {
@@ -592,8 +587,7 @@ void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
       if (td==tagd) {
 	return;
       }
-#if 0
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
       int ptrarray[]={2, (int) ptr, (int) obj, (int)tagd};
       struct ArrayObject * ao=allocate_newarray(&ptrarray,TAGARRAYTYPE,TAGARRAYINTERVAL);
       obj=(struct ___Object___ *)ptrarray[2];
@@ -602,8 +596,6 @@ void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 #else
       ao=allocate_newarray(TAGARRAYTYPE,TAGARRAYINTERVAL);
 #endif
-#endif // #if 0: for garbage collection
-	  ao=allocate_newarray(TAGARRAYTYPE,TAGARRAYINTERVAL);
 
       ARRAYSET(ao, struct ___TagDescriptor___ *, 0, td);
       ARRAYSET(ao, struct ___TagDescriptor___ *, 1, tagd);
@@ -623,8 +615,7 @@ void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 	ARRAYSET(ao, struct ___TagDescriptor___ *, ao->___cachedCode___, tagd);
 	ao->___cachedCode___++;
       } else {
-#if 0
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
 	int ptrarray[]={2,(int) ptr, (int) obj, (int) tagd};
 	struct ArrayObject * aonew=allocate_newarray(&ptrarray,TAGARRAYTYPE,TAGARRAYINTERVAL+ao->___length___);
 	obj=(struct ___Object___ *)ptrarray[2];
@@ -633,8 +624,6 @@ void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 #else
 	struct ArrayObject * aonew=allocate_newarray(TAGARRAYTYPE,TAGARRAYINTERVAL+ao->___length___);
 #endif
-#endif // #if 0: for garbage collection
-	struct ArrayObject * aonew=allocate_newarray(TAGARRAYTYPE,TAGARRAYINTERVAL+ao->___length___);
 
 	aonew->___cachedCode___=ao->___length___+1;
 	for(i=0; i<ao->___length___; i++) {
@@ -650,8 +639,7 @@ void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
     if(tagset==NULL) {
       tagd->flagptr=obj;
     } else if (tagset->type!=OBJECTARRAYTYPE) {
-#if 0
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
       int ptrarray[]={2, (int) ptr, (int) obj, (int)tagd};
       struct ArrayObject * ao=allocate_newarray(&ptrarray,OBJECTARRAYTYPE,OBJECTARRAYINTERVAL);
       obj=(struct ___Object___ *)ptrarray[2];
@@ -659,8 +647,6 @@ void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 #else
       struct ArrayObject * ao=allocate_newarray(OBJECTARRAYTYPE,OBJECTARRAYINTERVAL);
 #endif
-#endif // #if 0: for garbage collection
-	  struct ArrayObject * ao=allocate_newarray(OBJECTARRAYTYPE,OBJECTARRAYINTERVAL);
       ARRAYSET(ao, struct ___Object___ *, 0, tagd->flagptr);
       ARRAYSET(ao, struct ___Object___ *, 1, obj);
       ao->___cachedCode___=2;
@@ -671,8 +657,7 @@ void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 	ARRAYSET(ao, struct ___Object___*, ao->___cachedCode___++, obj);
       } else {
 	int i;
-#if 0
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
 	int ptrarray[]={2, (int) ptr, (int) obj, (int)tagd};
 	struct ArrayObject * aonew=allocate_newarray(&ptrarray,OBJECTARRAYTYPE,OBJECTARRAYINTERVAL+ao->___length___);
 	obj=(struct ___Object___ *)ptrarray[2];
@@ -681,8 +666,6 @@ void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 #else
 	struct ArrayObject * aonew=allocate_newarray(OBJECTARRAYTYPE,OBJECTARRAYINTERVAL);
 #endif
-#endif // #if 0: for garbage collection
-	struct ArrayObject * aonew=allocate_newarray(OBJECTARRAYTYPE,OBJECTARRAYINTERVAL);
 	aonew->___cachedCode___=ao->___cachedCode___+1;
 	for(i=0; i<ao->___length___; i++) {
 	  ARRAYSET(aonew, struct ___Object___*, i, ARRAYGET(ao, struct ___Object___*, i));
@@ -695,14 +678,11 @@ void tagset(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 }
 
 /* This function clears a tag. */
-#if 0
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
 void tagclear(void *ptr, struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 #else
 void tagclear(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
 #endif
-#endif // #if 0: for garbage collection
-void tagclear(struct ___Object___ * obj, struct ___TagDescriptor___ * tagd) {
   /* We'll assume that tag is alway there.
      Need to statically check for this of course. */
   struct ___Object___ * tagptr=obj->___tags___;
@@ -771,17 +751,14 @@ ENDCLEAR:
   return;
 }
 
-#if 0
 /* This function allocates a new tag. */
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
 struct ___TagDescriptor___ * allocate_tag(void *ptr, int index) {
-  struct ___TagDescriptor___ * v=(struct ___TagDescriptor___ *) mygcmalloc((struct garbagelist *) ptr, classsize[TAGTYPE]);
+  struct ___TagDescriptor___ * v=(struct ___TagDescriptor___ *) FREEMALLOC((struct garbagelist *) ptr, classsize[TAGTYPE]);
 #else
 struct ___TagDescriptor___ * allocate_tag(int index) {
   struct ___TagDescriptor___ * v=FREEMALLOC(classsize[TAGTYPE]);
 #endif
-#endif // #if 0: for garbage collection
-struct ___TagDescriptor___ * allocate_tag(int index) {
   struct ___TagDescriptor___ * v=FREEMALLOC(classsize[TAGTYPE]);
   v->type=TAGTYPE;
   v->flag=index;
@@ -1191,6 +1168,8 @@ msg:
       break;
     }
 
+// GC version have no lock msgs
+#ifndef MULTICORE_GC
     case LOCKREQUEST: {
       // receive lock request msg, handle it right now
       // check to see if there is a lock exist for the required obj
@@ -1278,6 +1257,7 @@ msg:
 			processlockrelease(data1, msgdata[2], 0, false);
       break;
     }
+#endif
 
 #ifdef PROFILE
     case PROFILEOUTPUT: {
@@ -1321,6 +1301,8 @@ msg:
     }
 #endif
 
+// GC version has no lock msgs
+#ifndef MULTICORE_GC
 	case REDIRECTLOCK: {
 	  // receive a redirect lock request msg, handle it right now
 		// check to see if there is a lock exist for the required obj
@@ -1411,6 +1393,7 @@ msg:
 		processlockrelease(data1, msgdata[2], msgdata[3], true);
 		break;
 	}
+#endif
 	
 	case STATUSCONFIRM: {
       // receive a status confirm info
@@ -1426,11 +1409,13 @@ msg:
 #endif
 #endif
 		  if(isMsgSending) {
-			  cache_msg_3(STARTUPCORE, STATUSREPORT, 
-						        busystatus?1:0, BAMBOO_NUM_OF_CORE);
+			  cache_msg_5(STARTUPCORE, STATUSREPORT, 
+						        busystatus?1:0, BAMBOO_NUM_OF_CORE,
+										self_numsendobjs, self_numreceiveobjs);
 		  } else {
-			  send_msg_3(STARTUPCORE, STATUSREPORT, 
-						       busystatus?1:0, BAMBOO_NUM_OF_CORE);
+			  send_msg_5(STARTUPCORE, STATUSREPORT, 
+						       busystatus?1:0, BAMBOO_NUM_OF_CORE,
+									 self_numsendobjs, self_numreceiveobjs);
 		  }
 		}
 	  break;
@@ -1454,6 +1439,8 @@ msg:
 			  numconfirm--;
 		  }
 		  corestatus[msgdata[2]] = msgdata[1];
+			numsendobjs[data1] = msgdata[2];
+			numreceiveobjs[data1] = msgdata[3];
 		}
 	  break;
 	}
@@ -1483,11 +1470,22 @@ msg:
 		  BAMBOO_DEBUGPRINT(0xe88a);
 #endif
 #endif
-		  // TODO change for GC
+#ifdef MULTICORE_GC
+			if(gcprocess) {
+				// is currently doing gc, dump this msg
+				break;
+			}
+#endif
 		  void * mem = mspace_calloc(bamboo_free_msp, 1, msgdata[1]);
 		  if(mem == NULL) {
+				// no enough shared global memory
+#ifdef MULTICORE_GC
+				gcflag = true;
+				break;
+#else
 			  BAMBOO_DEBUGPRINT(0xa016);
 			  BAMBOO_EXIT(0xa016);
+#endif
 		  }
 		  // send the start_va to request core
 			if(isMsgSending) {
@@ -1505,6 +1503,12 @@ msg:
 #ifndef TILERA
 	  BAMBOO_DEBUGPRINT(0xe88b);
 #endif
+#endif
+#ifdef MULTICORE_GC
+			if(gcprocess) {
+				// is currently doing gc, dump this msg
+				break;
+			}
 #endif
 	  if(msgdata[2] == 0) {
 		  bamboo_smem_size = 0;
@@ -1528,6 +1532,14 @@ msg:
 #endif
 	  // set the GC flag
 		gcflag = true;
+		gcphase = MARKPHASE;
+		if(!smemflag) {
+			// is waiting for response of mem request
+			// let it return NULL and start gc
+			bamboo_smem_size = 0;
+			bamboo_cur_msp = NULL;
+			smemflag = true;
+		}
 	  break;
 	}
 
@@ -1705,8 +1717,8 @@ msg:
 #endif
 		  BAMBOO_EXIT(0xb014);
 		} else {
-		  if(gcwaitconfirm) {
-			  gcnumconfirm--;
+		  if(waitconfirm) {
+			  numconfirm--;
 		  }
 		  gccorestatus[data1] = gcmsgdata[2];
 		  gcnumsendobjs[data1] = gcmsgdata[3];
@@ -1717,7 +1729,7 @@ msg:
 
 	case GCMARKEDOBJ: {
 		// received a markedObj msg
-		addNewItem(gctomark, data1);
+		gc_enqueue(data1);
 		gcself_numreceiveobjs++;
 		gcbusystatus = true;
 		break;
@@ -2009,7 +2021,7 @@ void removereadfd(int fd) {
 }
 #endif
 
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
 #define OFFSET 2
 #else
 #define OFFSET 0
@@ -2067,6 +2079,9 @@ void executetasks() {
 newtask:
 #ifdef MULTICORE
   while(hashsize(activetasks)>0) {
+#ifdef MULTICORE_GC
+		gc(NULL);
+#endif
 #else
   while((hashsize(activetasks)>0)||(maxreadfd>0)) {
 #endif
@@ -2393,12 +2408,10 @@ parameterpresent:
 	     }
 #endif  // #if 0: for recovery
 	  /* Actually call task */
-#if 0
-#ifdef PRECISE_GC
+#ifdef MULTICORE_GC
 	  ((int *)taskpointerarray)[0]=currtpd->numParameters;
 	  taskpointerarray[1]=NULL;
 #endif
-#endif  // #if 0: for garbage collection
 execute:
 #ifdef PROFILE
 #ifdef ACCURATEPROFILE
-- 
2.34.1