fix some bug in the multicore gc
authorjzhou <jzhou>
Thu, 25 Feb 2010 19:24:26 +0000 (19:24 +0000)
committerjzhou <jzhou>
Thu, 25 Feb 2010 19:24:26 +0000 (19:24 +0000)
Robust/src/Runtime/MGCHash.c
Robust/src/Runtime/MGCHash.h
Robust/src/Runtime/multicoregarbage.c
Robust/src/Runtime/multicoregarbage.h
Robust/src/Runtime/multicoreruntime.h
Robust/src/Runtime/multicoretask.c

index 97d8c890cfdb5ff6c3b77eebb5c430db8ef184c9..24dfe051364e67a291aeb663d4d4e98c306b3211 100755 (executable)
@@ -22,7 +22,7 @@
 /* MGCHASH ********************************************************/
 mgchashlistnode_t *mgc_table;
 unsigned int mgc_size;
-//unsigned INTPTR mgc_mask;
+unsigned INTPTR mgc_mask;
 unsigned int mgc_numelements;
 unsigned int mgc_threshold;
 double mgc_loadfactor;
@@ -38,13 +38,13 @@ void mgchashCreate(unsigned int size, double loadfactor) {
   mgc_loadfactor = loadfactor;
   mgc_size = size;
   mgc_threshold=size*loadfactor;
-       /*
+       
 #ifdef BIT64
-  mgc_mask = ((size << 4)-1)&~(15UL);
+  mgc_mask = ((size << 6)-1)&~(15UL);
 #else
-  mgc_mask = ((size << 4)-1)&~15;
+  mgc_mask = ((size << 6)-1)&~15;
 #endif
-*/
+
   mgc_structs=RUNMALLOC(1*sizeof(mgcliststruct_t));
   mgc_numelements = 0; // Initial number of elements in the hash
 }
@@ -53,7 +53,7 @@ void mgchashreset() {
   mgchashlistnode_t *ptr = mgc_table;
   int i;
 
-  /*if (mgc_numelements<(mgc_size>>4)) {
+  /*if (mgc_numelements<(mgc_size>>6)) {
     mgchashlistnode_t *top=&ptr[mgc_size];
     mgchashlistnode_t *tmpptr=mgc_list;
     while(tmpptr!=NULL) {
@@ -87,11 +87,12 @@ void mgchashInsert(void * key, void *val) {
     mgchashResize(newsize);
   }
 
-       int hashkey = (unsigned int)key % mgc_size; 
-  ptr=&mgc_table[hashkey];//&mgc_table[(((unsigned INTPTR)key)&mgc_mask)>>4];
+       //int hashkey = (unsigned int)key % mgc_size; 
+  ptr=&mgc_table[(((unsigned INTPTR)key)&mgc_mask)>>6];//&mgc_table[hashkey];
   mgc_numelements++;
 
   if(ptr->key==0) {
+               // the first time insert a value for the key
     ptr->key=key;
     ptr->val=val;
   } else { // Insert in the beginning of linked list
@@ -124,14 +125,15 @@ void mgchashInsert_I(void * key, void *val) {
     mgchashResize_I(newsize);
   }
 
-       int hashkey = (unsigned int)key % mgc_size; 
-  ptr=&mgc_table[hashkey];
-  //ptr = &mgc_table[(((unsigned INTPTR)key)&mgc_mask)>>4];
+       //int hashkey = (unsigned int)key % mgc_size; 
+  //ptr=&mgc_table[hashkey];
+  ptr = &mgc_table[(((unsigned INTPTR)key)&mgc_mask)>>6];
   mgc_numelements++;
 
   if(ptr->key==0) {
     ptr->key=key;
     ptr->val=val;
+               return; 
   } else { // Insert in the beginning of linked list
     mgchashlistnode_t * node;
     if (mgc_structs->num<NUMMGCLIST) {
@@ -156,9 +158,9 @@ void mgchashInsert_I(void * key, void *val) {
 // Search for an address for a given oid
 INLINE void * mgchashSearch(void * key) {
   //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE]
-       int hashkey = (unsigned int)key % mgc_size;
-  mgchashlistnode_t *node = &mgc_table[hashkey];
-               //&mgc_table[(((unsigned INTPTR)key)&mgc_mask)>>4];
+       //int hashkey = (unsigned int)key % mgc_size;
+  mgchashlistnode_t *node = &mgc_table[(((unsigned INTPTR)key)&mgc_mask)>>6];
+               //&mgc_table[hashkey];
 
   do {
     if(node->key == key) {
@@ -175,7 +177,7 @@ unsigned int mgchashResize(unsigned int newsize) {
   unsigned int oldsize;
   int isfirst;    // Keeps track of the first element in the chashlistnode_t for each bin in hashtable
   unsigned int i,index;
-  //unsigned int mask;
+  unsigned int mask;
 
   ptr = mgc_table;
   oldsize = mgc_size;
@@ -188,7 +190,7 @@ unsigned int mgchashResize(unsigned int newsize) {
   mgc_table = node;          //Update the global hashtable upon resize()
   mgc_size = newsize;
   mgc_threshold = newsize * mgc_loadfactor;
-  //mask=mgc_mask = (newsize << 4)-1;
+  mask=mgc_mask = (newsize << 6)-1;
 
   for(i = 0; i < oldsize; i++) {                        //Outer loop for each bin in hash table
     curr = &ptr[i];
@@ -200,8 +202,8 @@ unsigned int mgchashResize(unsigned int newsize) {
       if ((key=curr->key) == 0) {             //Exit inner loop if there the first element is 0
        break;                  //key = val =0 for element if not present within the hash table
       }
-                       index = (unsigned int)key % mgc_size; 
-      //index = (((unsigned INTPTR)key) & mask) >>4;
+                       //index = (unsigned int)key % mgc_size; 
+      index = (((unsigned INTPTR)key) & mask) >>6;
       tmp=&node[index];
       next = curr->next;
       // Insert into the new table
@@ -238,7 +240,7 @@ unsigned int mgchashResize_I(unsigned int newsize) {
   unsigned int oldsize;
   int isfirst;    // Keeps track of the first element in the chashlistnode_t for each bin in hashtable
   unsigned int i,index;
-  //unsigned int mask;
+  unsigned int mask;
 
   ptr = mgc_table;
   oldsize = mgc_size;
@@ -252,7 +254,7 @@ unsigned int mgchashResize_I(unsigned int newsize) {
   mgc_table = node;          //Update the global hashtable upon resize()
   mgc_size = newsize;
   mgc_threshold = newsize * mgc_loadfactor;
-  //mask=mgc_mask = (newsize << 4)-1;
+  mask=mgc_mask = (newsize << 6)-1;
 
   for(i = 0; i < oldsize; i++) {                        //Outer loop for each bin in hash table
     curr = &ptr[i];
@@ -266,8 +268,8 @@ unsigned int mgchashResize_I(unsigned int newsize) {
              break;                  
                                //key = val =0 for element if not present within the hash table
       }
-                       index = (unsigned int)key % mgc_size; 
-      //index = (((unsigned INTPTR)key) & mask) >>4;
+                       //index = (unsigned int)key % mgc_size; 
+      index = (((unsigned INTPTR)key) & mask) >>6;
       tmp=&node[index];
       next = curr->next;
       // Insert into the new table
index b3b3f2a7879c3fb9fff102d32da40b78b23ff218..727382df41572fdda9966470d7873e8d24708aa5 100755 (executable)
 typedef struct mgchashlistnode {
   void * key;
   void * val; //this can be cast to another type or used to point to a 
-                   //larger structure
+                   //larger structure 
   struct mgchashlistnode *next;
 } mgchashlistnode_t;
 
 typedef struct mgchashtable {
   mgchashlistnode_t *table;       // points to beginning of hash table
   unsigned int size;
-  //unsigned int mask;
+  unsigned int mask;
   unsigned int numelements;
   unsigned int threshold;
   double loadfactor;
index 20a86eb5c80925b3f02f49341829bb7d2af17fa6..3ffe8e5484a96588271564369ea8c2a224f13917 100644 (file)
@@ -7,6 +7,13 @@
 #include "GenericHashtable.h"
 #include "ObjectHash.h"
 
+// TODO for profiling the flush phase
+#ifdef GC_PROFILE
+int num_mapinforequest;
+int num_markrequest;
+unsigned long long marktime;
+#endif
+
 extern int corenum;
 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
 extern int numqueues[][NUMCLASSES];
@@ -145,7 +152,7 @@ inline void gc_enqueue_I(void *ptr) {
 } // void gc_enqueue_I(void *ptr)
 
 // dequeue and destroy the queue
-inline void * gc_dequeue() {
+inline void * gc_dequeue_I() {
   if (gctailindex==NUMPTRS) {
     struct pointerblock *tmp=gctail;
     gctail=gctail->next;
@@ -160,7 +167,7 @@ inline void * gc_dequeue() {
 } // void * gc_dequeue()
 
 // dequeue and do not destroy the queue
-inline void * gc_dequeue2() {
+inline void * gc_dequeue2_I() {
        if (gctailindex2==NUMPTRS) {
     struct pointerblock *tmp=gctail2;
     gctail2=gctail2->next;
@@ -169,13 +176,13 @@ inline void * gc_dequeue2() {
   return gctail2->ptrs[gctailindex2++];
 } // void * gc_dequeue2() 
 
-inline int gc_moreItems() {
+inline int gc_moreItems_I() {
   if ((gchead==gctail)&&(gctailindex==gcheadindex))
     return 0;
   return 1;
 } // int gc_moreItems() 
 
-inline int gc_moreItems2() {
+inline int gc_moreItems2_I() {
   if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
     return 0;
   return 1;
@@ -213,7 +220,7 @@ inline void gc_lobjenqueue_I(void *ptr,
 } // void gc_lobjenqueue_I(void *ptr...)
 
 // dequeue and destroy the queue
-inline void * gc_lobjdequeue(int * length,
+inline void * gc_lobjdequeue_I(int * length,
                                         int * host) {
   if (gclobjtailindex==NUMLOBJPTRS) {
     struct lobjpointerblock *tmp=gclobjtail;
@@ -237,14 +244,14 @@ inline void * gc_lobjdequeue(int * length,
   return gclobjtail->lobjs[gclobjtailindex++];
 } // void * gc_lobjdequeue()
 
-inline int gc_lobjmoreItems() {
+inline int gc_lobjmoreItems_I() {
   if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
     return 0;
   return 1;
 } // int gc_lobjmoreItems()
 
 // dequeue and don't destroy the queue
-inline void gc_lobjdequeue2() {
+inline void gc_lobjdequeue2_I() {
   if (gclobjtailindex2==NUMLOBJPTRS) {
     gclobjtail2=gclobjtail2->next;
     gclobjtailindex2=1;
@@ -253,14 +260,14 @@ inline void gc_lobjdequeue2() {
        }// if (gclobjtailindex2==NUMLOBJPTRS)
 } // void * gc_lobjdequeue2()
 
-inline int gc_lobjmoreItems2() {
+inline int gc_lobjmoreItems2_I() {
   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
     return 0;
   return 1;
 } // int gc_lobjmoreItems2()
 
 // 'reversly' dequeue and don't destroy the queue
-inline void gc_lobjdequeue3() {
+inline void gc_lobjdequeue3_I() {
   if (gclobjtailindex2==0) {
     gclobjtail2=gclobjtail2->prev;
     gclobjtailindex2=NUMLOBJPTRS-1;
@@ -269,18 +276,18 @@ inline void gc_lobjdequeue3() {
        }// if (gclobjtailindex2==NUMLOBJPTRS)
 } // void * gc_lobjdequeue3()
 
-inline int gc_lobjmoreItems3() {
+inline int gc_lobjmoreItems3_I() {
   if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
     return 0;
   return 1;
 } // int gc_lobjmoreItems3()
 
-inline void gc_lobjqueueinit4() {
+inline void gc_lobjqueueinit4_I() {
        gclobjtail2 = gclobjtail;
        gclobjtailindex2 = gclobjtailindex;
 } // void gc_lobjqueueinit2()
 
-inline void * gc_lobjdequeue4(int * length,
+inline void * gc_lobjdequeue4_I(int * length,
                                          int * host) {
   if (gclobjtailindex2==NUMLOBJPTRS) {
     gclobjtail2=gclobjtail2->next;
@@ -295,7 +302,7 @@ inline void * gc_lobjdequeue4(int * length,
   return gclobjtail2->lobjs[gclobjtailindex2++];
 } // void * gc_lobjdequeue()
 
-inline int gc_lobjmoreItems4() {
+inline int gc_lobjmoreItems4_I() {
   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
     return 0;
   return 1;
@@ -377,7 +384,7 @@ inline bool isLocal(void * ptr) {
        return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
 } // bool isLocal(void * ptr)
 
-inline bool gc_checkCoreStatus() {
+inline bool gc_checkCoreStatus_I() {
        bool allStall = true;
        for(int i = 0; i < NUMCORES4GC; ++i) {
                if(gccorestatus[i] != 0) {
@@ -388,7 +395,7 @@ inline bool gc_checkCoreStatus() {
        return allStall;
 }
 
-inline bool gc_checkAllCoreStatus() {
+inline bool gc_checkAllCoreStatus_I() {
        bool allStall = true;
        for(int i = 0; i < NUMCORESACTIVE; ++i) {
                if(gccorestatus[i] != 0) {
@@ -414,7 +421,7 @@ inline void checkMarkStatue() {
                gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
                gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
                // check the status of all cores
-               bool allStall = gc_checkAllCoreStatus();
+               bool allStall = gc_checkAllCoreStatus_I();
 #ifdef DEBUG
                BAMBOO_DEBUGPRINT(0xee03);
 #endif
@@ -436,7 +443,7 @@ inline void checkMarkStatue() {
                                for(i = 1; i < NUMCORESACTIVE; ++i) {   
                                        gccorestatus[i] = 1;
                                        // send mark phase finish confirm request msg to core i
-                                       send_msg_1(i, GCMARKCONFIRM, false);
+                                       send_msg_1(i, GCMARKCONFIRM, true);
                                } // for(i = 1; i < NUMCORESACTIVE; ++i) 
                        } else {
                                // check if the sum of send objs and receive obj are the same
@@ -467,7 +474,13 @@ inline void checkMarkStatue() {
                                        for(i = 0; i < NUMCORESACTIVE; ++i) {
                                                gccorestatus[i] = 1;
                                        } // for(i = 0; i < NUMCORESACTIVE; ++i)
-                               } // if(0 == sumsendobj)
+                               } else {
+                                       // wait for a while and ask for confirm again
+                                       int h = 100;
+                                       while(h--) {
+                                       }
+                                       waitconfirm = false;
+                               }// if(0 == sumsendobj) else ...
                        } // if(!gcwaitconfirm) else()
                } // if(allStall)
                BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
@@ -610,12 +623,21 @@ inline void initGC() {
 
        freeRuntimeHash(gcpointertbl);
        gcpointertbl = allocateRuntimeHash(20);
-       //freeMGCHash(gcpointertbl);
        //gcpointertbl = allocateMGCHash(20);
        //mgchashreset();
        
        freeMGCHash(gcforwardobjtbl);
        gcforwardobjtbl = allocateMGCHash(20, 3);
+
+#ifdef GC_PROFILE
+       // TODO
+       num_mapinforequest = 0;
+       num_mapinforequest_i = 0;
+       flushstalltime = 0;
+       flushstalltime_i = 0;
+       num_markrequest = 0;
+       marktime = 0;
+#endif
 } // void initGC()
 
 // compute load balance for all cores
@@ -662,8 +684,8 @@ inline bool cacheLObjs() {
        int tmp_len = 0;
        int tmp_host = 0;
        // compute total mem size required and sort the lobjs in ascending order
-       while(gc_lobjmoreItems2()){
-               gc_lobjdequeue2();
+       while(gc_lobjmoreItems2_I()){
+               gc_lobjdequeue2_I();
                tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
                tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
                tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
@@ -731,8 +753,8 @@ inline bool cacheLObjs() {
        //gclobjtail2 = gclobjtail;
        //gclobjtailindex2 = gclobjtailindex;
        dst = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
-       while(gc_lobjmoreItems3()) {
-               gc_lobjdequeue3();
+       while(gc_lobjmoreItems3_I()) {
+               gc_lobjdequeue3_I();
                size = gclobjtail2->lengths[gclobjtailindex2];
                // set the mark field to , indicating that this obj has been moved 
                // and need to be flushed
@@ -893,9 +915,9 @@ inline void moveLObjs() {
                int cpysize = 0;
                remain -= BAMBOO_CACHE_LINE_SIZE;
                tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-               gc_lobjqueueinit4();
-               while(gc_lobjmoreItems4()) {
-                       ptr = (int)(gc_lobjdequeue4(&size, &host));
+               gc_lobjqueueinit4_I();
+               while(gc_lobjmoreItems4_I()) {
+                       ptr = (int)(gc_lobjdequeue4_I(&size, &host));
                        ALIGNSIZE(size, &isize);
                        if(remain < isize) {
                                // this object acrosses blocks
@@ -936,20 +958,20 @@ inline void moveLObjs() {
                                BAMBOO_DEBUGPRINT_REG(base);
 #endif
                                gcheaptop += size;
-                               if(host == BAMBOO_NUM_OF_CORE) {
-                                       //if(ptr != tmpheaptop) {
-                                       BAMBOO_START_CRITICAL_SECTION();
-                                       //mgchashInsert_I(ptr, tmpheaptop);
-                                       RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
-                                       //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
-                                       BAMBOO_CLOSE_CRITICAL_SECTION();
-                                       //}
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xcdca);
-                                       BAMBOO_DEBUGPRINT_REG(ptr);
-                                       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+                               // cache the mapping info anyway
+                               //if(ptr != tmpheaptop) {
+                               BAMBOO_START_CRITICAL_SECTION();
+                               //mgchashInsert_I(ptr, tmpheaptop);
+                               RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
+                               //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
+                               BAMBOO_CLOSE_CRITICAL_SECTION();
+                               //}
+#ifdef DEBUG
+                               BAMBOO_DEBUGPRINT(0xcdca);
+                               BAMBOO_DEBUGPRINT_REG(ptr);
+                               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 #endif
-                               } else {
+                               if(host != BAMBOO_NUM_OF_CORE) {
                                        // send the original host core with the mapping info
                                        send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
 #ifdef DEBUG
@@ -957,7 +979,7 @@ inline void moveLObjs() {
                                        BAMBOO_DEBUGPRINT_REG(ptr);
                                        BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 #endif
-                               } // if(host == BAMBOO_NUM_OF_CORE) else ...
+                               } // if(host != BAMBOO_NUM_OF_CORE)
                                tmpheaptop += isize;
 
                                // set the gcsbstarttbl and bamboo_smemtbl
@@ -1020,21 +1042,21 @@ inline void moveLObjs() {
 
                                gcheaptop += size;
                                cpysize += isize;
-                               if(host == BAMBOO_NUM_OF_CORE) {
-                                       //if(ptr != tmpheaptop) {
-                                       BAMBOO_START_CRITICAL_SECTION();
-                                       //mgchashInsert_I(ptr, tmpheaptop);
-                                       RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
-                                       //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
-                                       BAMBOO_CLOSE_CRITICAL_SECTION();
-                                       //}
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xcdcc);
-                                       BAMBOO_DEBUGPRINT_REG(ptr);
-                                       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-                                       BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
+                               // cache the mapping info anyway
+                               //if(ptr != tmpheaptop) {
+                               BAMBOO_START_CRITICAL_SECTION();
+                               //mgchashInsert_I(ptr, tmpheaptop);
+                               RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
+                               //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
+                               BAMBOO_CLOSE_CRITICAL_SECTION();
+                               //}
+#ifdef DEBUG
+                               BAMBOO_DEBUGPRINT(0xcdcc);
+                               BAMBOO_DEBUGPRINT_REG(ptr);
+                               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+                               BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
 #endif
-                               } else {
+                               if(host != BAMBOO_NUM_OF_CORE) {
                                        // send the original host core with the mapping info
                                        send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
 #ifdef DEBUG
@@ -1042,7 +1064,7 @@ inline void moveLObjs() {
                                        BAMBOO_DEBUGPRINT_REG(ptr);
                                        BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 #endif
-                               } // if(host == BAMBOO_NUM_OF_CORE) else ...
+                               } // if(host != BAMBOO_NUM_OF_CORE)
                                tmpheaptop += isize;
 
                                // update bamboo_smemtbl
@@ -1092,14 +1114,14 @@ inline void markObj(void * objptr) {
                int host = hostcore(objptr);
                if(BAMBOO_NUM_OF_CORE == host) {
                        // on this core
+                       BAMBOO_START_CRITICAL_SECTION();
                        if(((int *)objptr)[6] == INIT) {
                                // this is the first time that this object is discovered,
                                // set the flag as DISCOVERED
                                ((int *)objptr)[6] = DISCOVERED;
-                               BAMBOO_START_CRITICAL_SECTION();
                                gc_enqueue_I(objptr);  
-                               BAMBOO_CLOSE_CRITICAL_SECTION();
                        }
+                       BAMBOO_CLOSE_CRITICAL_SECTION();
                } else {
 #ifdef DEBUG
                        BAMBOO_DEBUGPRINT(0xbbbb);
@@ -1108,8 +1130,15 @@ inline void markObj(void * objptr) {
 #endif
                        // check if this obj has been forwarded
                        if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
+#ifdef GC_PROFILE
+                               unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+#endif
                                // send a msg to host informing that objptr is active
                                send_msg_2(host, GCMARKEDOBJ, objptr, false);
+#ifdef GC_PROFILE
+                               marktime += BAMBOO_GET_EXE_TIME() - ttime;
+                               num_markrequest++;
+#endif
                                gcself_numsendobjs++;
                                MGCHashadd(gcforwardobjtbl, (int)objptr);
                        }
@@ -1236,11 +1265,11 @@ inline void tomark(struct garbagelist * stackptr) {
 inline void mark(bool isfirst, 
                             struct garbagelist * stackptr) {
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xed01);
+       if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed01);
 #endif
        if(isfirst) {
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xed02);
+               if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed02);
 #endif
                // enqueue root objs
                tomark(stackptr);
@@ -1249,7 +1278,7 @@ inline void mark(bool isfirst,
                gcmarkedptrbound = 0;
        }
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xed03);
+       if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed03); 
 #endif
        int isize = 0;
        bool checkfield = true;
@@ -1257,16 +1286,22 @@ inline void mark(bool isfirst,
        // mark phase
        while(MARKPHASE == gcphase) {
 #ifdef DEBUG 
-               BAMBOO_DEBUGPRINT(0xed04);
+               if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04); 
 #endif
-               while(gc_moreItems2()) {
+               while(true) {
+                       BAMBOO_START_CRITICAL_SECTION();
+                       bool hasItems = gc_moreItems2_I();
+                       BAMBOO_CLOSE_CRITICAL_SECTION();
 #ifdef DEBUG 
-                       BAMBOO_DEBUGPRINT(0xed05);
+                       BAMBOO_DEBUGPRINT(0xed05); 
 #endif
+                       if(!hasItems) {
+                               break;
+                       }
                        sendStall = false;
                        gcbusystatus = true;
                        checkfield = true;
-                       void * ptr = gc_dequeue2();
+                       void * ptr = gc_dequeue2_I();
 
 #ifdef DEBUG 
                        BAMBOO_DEBUGPRINT_REG(ptr);
@@ -1314,11 +1349,14 @@ inline void mark(bool isfirst,
                                                // ptr is not an active obj or has been marked
                                                checkfield = false;
                                        }// if(isLarge(ptr, &type, &size)) else ...
-                               } else {
+                               } /* can never reach here 
+                                                else {
 #ifdef DEBUG
+                                       if(BAMBOO_NUM_OF_CORE == 0) {
                                        BAMBOO_DEBUGPRINT(0xbbbb);
                                        BAMBOO_DEBUGPRINT_REG(host);
                                        BAMBOO_DEBUGPRINT_REG(ptr);
+                                       }
 #endif
                                        // check if this obj has been forwarded
                                        if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
@@ -1328,10 +1366,10 @@ inline void mark(bool isfirst,
                                                MGCHashadd(gcforwardobjtbl, (int)ptr);
                                        }
                                        checkfield = false;
-                               }// if(isLocal(ptr)) else ...
+                               }// if(isLocal(ptr)) else ...*/
                        } // if(ISSHAREDOBJ(ptr))
 #ifdef DEBUG 
-                       BAMBOO_DEBUGPRINT(0xed06);
+                       BAMBOO_DEBUGPRINT(0xed06); 
 #endif
 
                        if(checkfield) {
@@ -1369,7 +1407,7 @@ inline void mark(bool isfirst,
                // send mark finish msg to core coordinator
                if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xed08);
+                       BAMBOO_DEBUGPRINT(0xed08); 
 #endif
                        gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
                        gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
@@ -1386,19 +1424,19 @@ inline void mark(bool isfirst,
                        }
                } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
 #ifdef DEBUG 
-               BAMBOO_DEBUGPRINT(0xed0a);
+               BAMBOO_DEBUGPRINT(0xed0a); 
 #endif
 
                if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xed0b);
+                       BAMBOO_DEBUGPRINT(0xed0b); 
 #endif
                        return;
                }
        } // while(MARKPHASE == gcphase)
 } // mark()
 
-inline void compact2Heaptophelper(int coren,
+inline void compact2Heaptophelper_I(int coren,
                                              int* p,
                                                                                                                                        int* numblocks,
                                                                                                                                        int* remain) {
@@ -1410,7 +1448,7 @@ inline void compact2Heaptophelper(int coren,
                gcdstcore = gctopcore;
                gcblock2fill = *numblocks + 1;
        } else {
-               send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
+               send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, true); 
        }
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT_REG(coren);
@@ -1455,7 +1493,7 @@ inline void compact2Heaptophelper(int coren,
 #endif
        } // if(memneed < remain)
        gcmovepending--;
-} // void compact2Heaptophelper(int, int*, int*, int*)
+} // void compact2Heaptophelper_I(int, int*, int*, int*)
 
 inline void compact2Heaptop() {
        // no cores with spare mem and some cores are blocked with pending move
@@ -1470,15 +1508,18 @@ inline void compact2Heaptop() {
                ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
         :((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
        // check if the top core finishes
+       BAMBOO_START_CRITICAL_SECTION();
        if(gccorestatus[gctopcore] != 0) {
 #ifdef DEBUG
                BAMBOO_DEBUGPRINT(0xd101);
                BAMBOO_DEBUGPRINT_REG(gctopcore);
 #endif
                // let the top core finishes its own work first
-               compact2Heaptophelper(gctopcore, &p, &numblocks, &remain);
+               compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
+               BAMBOO_CLOSE_CRITICAL_SECTION();
                return;
        }
+       BAMBOO_CLOSE_CRITICAL_SECTION();
 
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xd102);
@@ -1493,12 +1534,13 @@ inline void compact2Heaptop() {
 #ifdef DEBUG
                        BAMBOO_DEBUGPRINT(0xd103);
 #endif
-                       compact2Heaptophelper(i, &p, &numblocks, &remain);
+                       compact2Heaptophelper_I(i, &p, &numblocks, &remain);
                        if(gccorestatus[gctopcore] != 0) {
 #ifdef DEBUG
                                BAMBOO_DEBUGPRINT(0xd101);
                                BAMBOO_DEBUGPRINT_REG(gctopcore);
 #endif
+                         BAMBOO_CLOSE_CRITICAL_SECTION();
                                // the top core is not free now
                                return;
                        }
@@ -2137,6 +2179,10 @@ inline void compact() {
        RUNFREE(to);
 } // compact()
 
+// if return NULL, means 
+//   1. objptr is NULL
+//   2. objptr is not a shared obj
+// in these cases, remain the original value is OK
 inline void * flushObj(void * objptr) {
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xe401);
@@ -2152,14 +2198,21 @@ inline void * flushObj(void * objptr) {
 #endif
                // a shared obj ptr, change to new address
                BAMBOO_START_CRITICAL_SECTION();
+#ifdef GC_PROFILE
+               unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+#endif
                //dstptr = mgchashSearch(objptr);
                RuntimeHashget(gcpointertbl, objptr, &dstptr);
+#ifdef GC_PROFILE
+               flushstalltime += BAMBOO_GET_EXE_TIME()-ttime;
+#endif
                //MGCHashget(gcpointertbl, objptr, &dstptr);
                BAMBOO_CLOSE_CRITICAL_SECTION();
 #ifdef DEBUG
                BAMBOO_DEBUGPRINT_REG(dstptr);
 #endif
                if(NULL == dstptr) {
+                       // no mapping info
 #ifdef DEBUG
                        BAMBOO_DEBUGPRINT(0xe403);
                        BAMBOO_DEBUGPRINT_REG(objptr);
@@ -2172,36 +2225,51 @@ inline void * flushObj(void * objptr) {
                                // assume that the obj has not been moved, use the original address
                                //dstptr = objptr;
                        } else {
-                       // send msg to host core for the mapping info
-                       gcobj2map = (int)objptr;
-                       gcismapped = false;
-                       gcmappedobj = NULL;
-                       send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
-                                                                BAMBOO_NUM_OF_CORE, false);
-                       while(true) {
-                               if(gcismapped) {
-                                       break;
+                               // send msg to host core for the mapping info
+                               gcobj2map = (int)objptr;
+                               gcismapped = false;
+                               gcmappedobj = NULL;
+#ifdef GC_PROFILE
+                               // TODO
+                               num_mapinforequest++;
+                               //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+#endif
+#ifdef GC_PROFILE
+                               unsigned long long ttimet = BAMBOO_GET_EXE_TIME();
+#endif
+                               // the first time require the mapping, send msg to the hostcore 
+                               // for the mapping info
+                               send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
+                                                                        BAMBOO_NUM_OF_CORE, false);
+                               while(true) {
+                                       if(gcismapped) {
+                                               break;
+                                       }
                                }
-                       }
-                       BAMBOO_START_CRITICAL_SECTION();
-                       //dstptr = mgchashSearch(objptr);
-                       RuntimeHashget(gcpointertbl, objptr, &dstptr);
-                       //MGCHashget(gcpointertbl, objptr, &dstptr);
-                       BAMBOO_CLOSE_CRITICAL_SECTION();
-                       }
+#ifdef GC_PROFILE
+                               flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
+#endif
+#ifdef GC_PROFILE
+                               // TODO
+                               //flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
+#endif
+                               BAMBOO_START_CRITICAL_SECTION();
+                               //dstptr = mgchashSearch(objptr);
+                               RuntimeHashget(gcpointertbl, objptr, &dstptr);
+                               //MGCHashget(gcpointertbl, objptr, &dstptr);
+                               BAMBOO_CLOSE_CRITICAL_SECTION();
+                       } // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
 #ifdef DEBUG
                        BAMBOO_DEBUGPRINT_REG(dstptr);
 #endif
-               }
-       } else {
-               // not a shared obj, use the old address
-               dstptr = objptr;
+               } // if(NULL == dstptr) 
        }// if(ISSHAREDOBJ(objptr))
+       // if not a shared obj, return NULL to indicate no need to flush
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xe404);
 #endif
        return dstptr;
-} // void flushObj(void * objptr, void ** tochange)
+} // void flushObj(void * objptr)
 
 inline void flushRuntimeObj(struct garbagelist * stackptr) {
        int i,j;
@@ -2209,7 +2277,10 @@ inline void flushRuntimeObj(struct garbagelist * stackptr) {
        while(stackptr!=NULL) {
                for(i=0; i<stackptr->size; i++) {
                        if(stackptr->array[i] != NULL) {
-                               stackptr->array[i] = flushObj(stackptr->array[i]);
+                               void * dst = flushObj(stackptr->array[i]);
+                               if(dst != NULL) {
+                                       stackptr->array[i] = dst;
+                               }
                        }
                }
                stackptr=stackptr->next;
@@ -2226,7 +2297,10 @@ inline void flushRuntimeObj(struct garbagelist * stackptr) {
                                struct ObjectHash * set=parameter->objectset;
                                struct ObjectNode * ptr=set->listhead;
                                while(ptr!=NULL) {
-                                       ptr->key = flushObj((void *)ptr->key);
+                                       void * dst = flushObj((void *)ptr->key);
+                                       if(dst != NULL) {
+                                               ptr->key = dst;
+                                       }
                                        ptr=ptr->lnext;
                                }
                                ObjectHashrehash(set);
@@ -2237,7 +2311,10 @@ inline void flushRuntimeObj(struct garbagelist * stackptr) {
        // flush current task descriptor
        if(currtpd != NULL) {
                for(i=0; i<currtpd->numParameters; i++) {
-                       currtpd->parameterArray[i] = flushObj(currtpd->parameterArray[i]);
+                       void * dst = flushObj(currtpd->parameterArray[i]);
+                       if(dst != NULL) {
+                               currtpd->parameterArray[i] = dst;
+                       }
                }
        }
 
@@ -2248,7 +2325,10 @@ inline void flushRuntimeObj(struct garbagelist * stackptr) {
                        struct taskparamdescriptor *tpd=ptr->src;
                        int i;
                        for(i=0; i<tpd->numParameters; i++) {
-                               tpd->parameterArray[i] = flushObj(tpd->parameterArray[i]);
+                               void * dst = flushObj(tpd->parameterArray[i]);
+                               if(dst != NULL) {
+                                       tpd->parameterArray[i] = dst;
+                               }
                        }
                        ptr=ptr->inext;
                }
@@ -2260,7 +2340,10 @@ inline void flushRuntimeObj(struct garbagelist * stackptr) {
        while(tmpobjptr != NULL) {
                struct transObjInfo * objInfo = 
                        (struct transObjInfo *)(tmpobjptr->objectptr); 
-               objInfo->objptr = flushObj(objInfo->objptr);
+               void * dst = flushObj(objInfo->objptr);
+               if(dst != NULL) {
+                       objInfo->objptr = dst;
+               }
                tmpobjptr = getNextQueueItem(tmpobjptr);
        }
 
@@ -2269,41 +2352,68 @@ inline void flushRuntimeObj(struct garbagelist * stackptr) {
        while(item != NULL) {
                struct transObjInfo * totransobj = 
                        (struct transObjInfo *)(item->objectptr);
-               totransobj->objptr = flushObj(totransobj->objptr);
+               void * dst = flushObj(totransobj->objptr);
+               if(dst != NULL) {
+                       totransobj->objptr = dst;
+               }
                item = getNextQueueItem(item);
        } // while(item != NULL)
 
        // enqueue lock related info
        for(i = 0; i < runtime_locklen; ++i) {
-         runtime_locks[i].redirectlock = 
-                       (int)flushObj(runtime_locks[i].redirectlock);
+         void * dst = flushObj(runtime_locks[i].redirectlock);
+               if(dst != NULL) {
+                       runtime_locks[i].redirectlock = (int)dst;
+               }
                if(runtime_locks[i].value != NULL) {
-                 runtime_locks[i].value = (int)flushObj(runtime_locks[i].value);
+                       void * dst=flushObj(runtime_locks[i].value);
+                       if(dst != NULL) {
+                               runtime_locks[i].value = (int)dst;
+                       }
          }
        }
 
 } // void flushRuntimeObj(struct garbagelist * stackptr)
 
 inline void flush(struct garbagelist * stackptr) {
+#ifdef GC_PROFILE
+       if(BAMBOO_NUM_OF_CORE == 0) {
+       BAMBOO_DEBUGPRINT(0xcccc);
+       BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
+       }
+#endif
        flushRuntimeObj(stackptr);
-       
-       while(gc_moreItems()) {
+#ifdef GC_PROFILE
+       if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
+#endif
+
+       while(true) {
+               BAMBOO_START_CRITICAL_SECTION();
+               bool hasItems = gc_moreItems_I();
+               BAMBOO_CLOSE_CRITICAL_SECTION();
+               if(!hasItems) {
+                       break;
+               }
+               
 #ifdef DEBUG
                BAMBOO_DEBUGPRINT(0xe301);
 #endif
-               void * ptr = gc_dequeue();
+               BAMBOO_START_CRITICAL_SECTION();
+               void * ptr = gc_dequeue_I();
+               BAMBOO_CLOSE_CRITICAL_SECTION();
                if(ISSHAREDOBJ(ptr)) {
-               void * tptr = flushObj(ptr);
+                       // should be a local shared obj and should have mapping info
+                       ptr = flushObj(ptr);
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe302);
-               BAMBOO_DEBUGPRINT_REG(ptr);
-               BAMBOO_DEBUGPRINT_REG(tptr);
-               BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
+                       BAMBOO_DEBUGPRINT(0xe302);
+                       BAMBOO_DEBUGPRINT_REG(ptr);
+                       BAMBOO_DEBUGPRINT_REG(tptr);
+                       BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
 #endif
-               if(tptr != NULL) {
-                       ptr = tptr;
-               }
-               }
+                       if(ptr == NULL) {
+                               BAMBOO_EXIT(0xb105);
+                       }
+               } // if(ISSHAREDOBJ(ptr))
                if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
                        int type = ((int *)(ptr))[0];
                        // scan all pointers in ptr
@@ -2334,8 +2444,10 @@ inline void flush(struct garbagelist * stackptr) {
                                        BAMBOO_DEBUGPRINT_REG(objptr);
 #endif
                                        if(objptr != NULL) {
-                                               ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = 
-                                                       flushObj(objptr);
+                                               void * dst = flushObj(objptr);
+                                               if(dst != NULL) {
+                                                       ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
+                                               }
                                        }
                                }
                        } else {
@@ -2354,7 +2466,10 @@ inline void flush(struct garbagelist * stackptr) {
                                        BAMBOO_DEBUGPRINT_REG(objptr);
 #endif
                                        if(objptr != NULL) {
-                                               *((void **)(((char *)ptr)+offset)) = flushObj(objptr);
+                                               void * dst = flushObj(objptr);
+                                               if(dst != NULL) {
+                                                       *((void **)(((char *)ptr)+offset)) = dst;
+                                               }
                                        }
                                } // for(i=1; i<=size; i++) 
                        } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
@@ -2362,29 +2477,33 @@ inline void flush(struct garbagelist * stackptr) {
                        if(ISSHAREDOBJ(ptr)) {
                                ((int *)(ptr))[6] = INIT;
                        }
-               } // if(((int *)(ptr))[6] == COMPACTED)
+               } // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
        } // while(gc_moreItems())
+       BAMBOO_CLOSE_CRITICAL_SECTION();
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xe308);
 #endif
+#ifdef GC_PROFILE
+       if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
+#endif
 
-       // TODO bug here: the startup core contains all lobjs' info, it will 
-       // redundantly flush the lobjs found in the other cores.
+       // TODO bug here: the startup core contains all lobjs' info, thus all the 
+       // lobjs are flushed in sequence.
        // flush lobjs
-       while(gc_lobjmoreItems()) {
+       while(gc_lobjmoreItems_I()) {
 #ifdef DEBUG
                BAMBOO_DEBUGPRINT(0xe309);
 #endif
-               void * ptr = gc_lobjdequeue(NULL, NULL);
-               void * tptr = flushObj(ptr);
+               void * ptr = gc_lobjdequeue_I(NULL, NULL);
+               ptr = flushObj(ptr);
 #ifdef DEBUG
                BAMBOO_DEBUGPRINT(0xe30a);
                BAMBOO_DEBUGPRINT_REG(ptr);
                BAMBOO_DEBUGPRINT_REG(tptr);
                BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
 #endif
-               if(tptr != NULL) {
-                       ptr = tptr;
+               if(ptr == NULL) {
+                       BAMBOO_EXIT(0x106);
                }
                if(((int *)(ptr))[6] == COMPACTED) {
                        int type = ((int *)(ptr))[0];
@@ -2416,8 +2535,10 @@ inline void flush(struct garbagelist * stackptr) {
                                        BAMBOO_DEBUGPRINT_REG(objptr);
 #endif
                                        if(objptr != NULL) {
-                                               ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = 
-                                                       flushObj(objptr);
+                                               void * dst = flushObj(objptr);
+                                               if(dst != NULL) {
+                                                       ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
+                                               }       
                                        }
                                }
                        } else {
@@ -2437,7 +2558,10 @@ inline void flush(struct garbagelist * stackptr) {
                                        BAMBOO_DEBUGPRINT_REG(objptr);
 #endif
                                        if(objptr != NULL) {
-                                               *((void **)(((char *)ptr)+offset)) = flushObj(objptr);
+                                               void * dst = flushObj(objptr);
+                                               if(dst != NULL) {
+                                                       *((void **)(((char *)ptr)+offset)) = dst;
+                                               }
                                        }
                                } // for(i=1; i<=size; i++) 
                        } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
@@ -2448,6 +2572,9 @@ inline void flush(struct garbagelist * stackptr) {
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xe310);
 #endif
+#ifdef GC_PROFILE
+       if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
+#endif
 
        // send flush finish message to core coordinator
        if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
@@ -2455,6 +2582,16 @@ inline void flush(struct garbagelist * stackptr) {
        } else {
                send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
        }
+#ifdef GC_PROFILE
+       if(BAMBOO_NUM_OF_CORE == 0) {
+       BAMBOO_DEBUGPRINT(0xffff);
+       //BAMBOO_DEBUGPRINT_REG(num_mapinforequest);
+       BAMBOO_DEBUGPRINT_REG(flushstalltime);
+       //BAMBOO_DEBUGPRINT_REG(num_mapinforequest_i);
+       BAMBOO_DEBUGPRINT_REG(flushstalltime_i);
+       }
+       //BAMBOO_DEBUGPRINT_REG(flushstalltime);
+#endif
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xe311);
 #endif
@@ -2587,10 +2724,12 @@ inline void gc(struct garbagelist * stackptr) {
                //dumpSMem();
 #endif
                gcprocessing = true;
+               gcphase = INITPHASE;
                int i = 0;
                waitconfirm = false;
-               waitconfirm = 0;
-               gcphase = INITPHASE;
+               numconfirm = 0;
+               initGC();
+
                // Note: all cores need to init gc including non-gc cores
                for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; i++) {
                        // send GC init messages to all cores
@@ -2599,16 +2738,18 @@ inline void gc(struct garbagelist * stackptr) {
                bool isfirst = true;
                bool allStall = false;
 
-               initGC();
 #ifdef RAWPATH // TODO GC_DEBUG
                tprintf("Check core status \n");
 #endif
 
                gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
                while(true) {
-                       if(gc_checkAllCoreStatus()) {
+                       BAMBOO_START_CRITICAL_SECTION();
+                       if(gc_checkAllCoreStatus_I()) {
+                               BAMBOO_CLOSE_CRITICAL_SECTION();
                                break;
                        }
+                       BAMBOO_CLOSE_CRITICAL_SECTION();
                }
 #ifdef GC_PROFILE
                gc_profileItem();
@@ -2655,6 +2796,12 @@ inline void gc(struct garbagelist * stackptr) {
                }
 #ifdef GC_PROFILE
                gc_profileItem();
+               // TODO
+               if(BAMBOO_NUM_OF_CORE == 0) {
+                       BAMBOO_DEBUGPRINT(0xeeee);
+                       BAMBOO_DEBUGPRINT_REG(num_markrequest);
+                       BAMBOO_DEBUGPRINT_REG(marktime);
+               }
 #endif
 #ifdef RAWPATH // TODO GC_DEBUG
                tprintf("prepare to cache large objs \n");
@@ -2663,7 +2810,7 @@ inline void gc(struct garbagelist * stackptr) {
                // cache all large objs
                if(!cacheLObjs()) {
                        // no enough space to cache large objs
-                       BAMBOO_EXIT(0xb104);
+                       BAMBOO_EXIT(0xb107);
                }
                // predict number of blocks to fill for each core
                int tmpheaptop = 0;
@@ -2750,14 +2897,17 @@ inline void gc(struct garbagelist * stackptr) {
 #endif
                        }
 
-                       if(gc_checkCoreStatus()) {
+                       BAMBOO_START_CRITICAL_SECTION();
+                       if(gc_checkCoreStatus_I()) {
                                // all cores have finished compacting
                                // restore the gcstatus of all cores
                                for(i = 0; i < NUMCORES4GC; ++i) {
                                        gccorestatus[i] = 1;
                                }
+                               BAMBOO_CLOSE_CRITICAL_SECTION();
                                break;
                        } else {
+                               BAMBOO_CLOSE_CRITICAL_SECTION();
                                // check if there are spare mem for pending move requires
                                if(COMPACTPHASE == gcphase) {
 #ifdef DEBUG
@@ -2773,7 +2923,7 @@ inline void gc(struct garbagelist * stackptr) {
 #endif
                                        compact2Heaptop();
                                }
-                       } // if(gc_checkCoreStatus()) else ...
+                       } // if(gc_checkCoreStatus_I()) else ...
 
                        if(gctomove) {
 #ifdef DEBUG
@@ -2845,9 +2995,12 @@ inline void gc(struct garbagelist * stackptr) {
                gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
                while(FLUSHPHASE == gcphase) {
                        // check the status of all cores
-                       if(gc_checkAllCoreStatus()) {
+                       BAMBOO_START_CRITICAL_SECTION();
+                       if(gc_checkAllCoreStatus_I()) {
+                               BAMBOO_CLOSE_CRITICAL_SECTION();
                                break;
                        }
+                       BAMBOO_CLOSE_CRITICAL_SECTION();
                } // while(FLUSHPHASE == gcphase)
                gcphase = FINISHPHASE;
 
index 08ba12bca2c585439b939c31f60a9bfbc480daba..6ae933c20c3ca909c2627c5de3c4d70c5178eac4 100644 (file)
@@ -33,6 +33,11 @@ typedef struct gc_info {
 GCInfo * gc_infoArray[GCINFOLENGTH];
 int gc_infoIndex;
 bool gc_infoOverflow;
+
+// TODO
+unsigned long long flushstalltime;
+unsigned long long flushstalltime_i;
+int num_mapinforequest_i;
 #endif
 
 typedef enum {
@@ -60,12 +65,13 @@ volatile GCPHASETYPE gcphase; // indicating GC phase
 int gccurr_heaptop;
 struct MGCHash * gcforwardobjtbl; // cache forwarded objs in mark phase
 // for mark phase termination
-int gccorestatus[NUMCORESACTIVE]; // records status of each core
-                                  // 1: running gc
-                                  // 0: stall
-int gcnumsendobjs[NUMCORESACTIVE]; // records how many objects sent out
-int gcnumreceiveobjs[NUMCORESACTIVE]; // records how many objects received
-bool gcbusystatus;
+volatile int gccorestatus[NUMCORESACTIVE]; // records status of each core
+                                           // 1: running gc
+                                           // 0: stall
+volatile int gcnumsendobjs[NUMCORESACTIVE];//records how many objects sent out
+volatile int gcnumreceiveobjs[NUMCORESACTIVE];//records how many objects 
+                                              //received
+volatile bool gcbusystatus;
 int gcself_numsendobjs;
 int gcself_numreceiveobjs;
 
@@ -89,12 +95,25 @@ volatile bool gctomove;
 int gcrequiredmems[NUMCORES4GC]; //record pending mem requests
 volatile int gcmovepending;
 
+struct flushlist {
+       void * key;
+       struct flushnode * val;
+       struct flushlist * next;
+};
+
+struct flushnode {
+       void ** ptr;
+       struct flushnode * next;
+};
 // mapping of old address to new address
-struct RuntimeHash * gcpointertbl;
+volatile struct RuntimeHash * gcpointertbl;
 //struct MGCHash * gcpointertbl;
 int gcobj2map;
 int gcmappedobj;
 volatile bool gcismapped;
+//volatile struct flushlist * gcflushlist; // list of (key, list of reference 
+                                         // to be flushed)
+//volatile int gcnumflush;
 
 // table recording the starting address of each small block
 // (size is BAMBOO_SMEM_SIZE)
index bdf4ab858e11cca2b8679de7f525b41bb3725ee0..ccfd0624562a77ae594134cdfeaffc6ecd315063 100644 (file)
 ///////////////////////////////////////////////////////////////
 
 // data structures for msgs
-#define BAMBOO_OUT_BUF_LENGTH 300
-#define BAMBOO_MSG_BUF_LENGTH 30
+#define BAMBOO_OUT_BUF_LENGTH 3000
+#define BAMBOO_MSG_BUF_LENGTH 3000
 int msgdata[BAMBOO_MSG_BUF_LENGTH];
 int msgdataindex;
+int msgdatalast;
 int msglength;
 int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
 int outmsgindex;
@@ -22,13 +23,33 @@ int outmsgleft;
 bool isMsgHanging;
 volatile bool isMsgSending;
 
+#define MSG_INDEXINC_I() \
+       msgdataindex = (msgdataindex + 1) % (BAMBOO_MSG_BUF_LENGTH)
+
+#define MSG_LASTINDEXINC_I() \
+       msgdatalast = (msgdatalast + 1) % (BAMBOO_MSG_BUF_LENGTH)
+
+#define MSG_CACHE_I(n) \
+       msgdata[msgdatalast] = (n); \
+  MSG_LASTINDEXINC_I() 
+
+// NOTE: if msgdataindex == msgdatalast, it always means that the buffer if 
+//       full. In the case that the buffer is empty, should never call this
+//       MACRO
+#define MSG_REMAINSIZE_I(s) \
+       if(msgdataindex < msgdatalast) { \
+               (*(int*)s) = msgdatalast - msgdataindex; \
+       } else { \
+               (*(int*)s) = (BAMBOO_MSG_BUF_LENGTH) - msgdataindex + msgdatalast; \
+       } 
+
 #define OUTMSG_INDEXINC() \
        outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
 
 #define OUTMSG_LASTINDEXINC() \
        outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
        if(outmsglast == outmsgindex) { \
-               BAMBOO_EXIT(0xd001); \
+               BAMBOO_EXIT(0xdd01); \
        } 
 
 #define OUTMSG_CACHE(n) \
@@ -194,11 +215,13 @@ typedef enum {
 ////////////////////////////////////////////////////////////////////////////////
 // data structures of status for termination
 // only check working cores
-int corestatus[NUMCORESACTIVE]; // records status of each core
-                                // 1: running tasks
-                                // 0: stall
-int numsendobjs[NUMCORESACTIVE]; // records how many objects a core has sent out
-int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a core has received
+volatile int corestatus[NUMCORESACTIVE]; // records status of each core
+                                         // 1: running tasks
+                                         // 0: stall
+volatile int numsendobjs[NUMCORESACTIVE]; // records how many objects a core 
+                                          // has sent out
+volatile int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a 
+                                             // core has received
 volatile int numconfirm;
 volatile bool waitconfirm;
 bool busystatus;
@@ -275,8 +298,8 @@ struct freeMemList {
 // table recording the number of allocated bytes on each block
 // Note: this table resides on the bottom of the shared heap for all cores
 //       to access
-int * bamboo_smemtbl;
-int bamboo_free_block;
+volatile int * bamboo_smemtbl;
+volatile int bamboo_free_block;
 //bool bamboo_smem_flushed;
 //struct freeMemList * bamboo_free_mem_list;
 int bamboo_reserved_smem; // reserved blocks on the top of the shared heap
@@ -321,9 +344,9 @@ bool taskInfoOverflow;
 /*InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
    int interruptInfoIndex;
    bool interruptInfoOverflow;*/
-int profilestatus[NUMCORESACTIVE]; // records status of each core
-                             // 1: running tasks
-                             // 0: stall
+volatile int profilestatus[NUMCORESACTIVE]; // records status of each core
+                                            // 1: running tasks
+                                            // 0: stall
 #endif // #ifdef PROFILE
 
 #ifndef INTERRUPT
index 6d1ee7639e657ebac33024f12ca61f0625592b92..c43766b24e25898ddb6107d5a23d42a8b91dc22c 100644 (file)
@@ -4,6 +4,10 @@
 #include "runtime_arch.h"
 #include "GenericHashtable.h"
 
+#ifndef INLINE
+#define INLINE    inline __attribute__((always_inline))
+#endif // #ifndef INLINE
+
 //  data structures for task invocation
 struct genhashtable * activetasks;
 struct taskparamdescriptor * currtpd;
@@ -86,6 +90,7 @@ void initruntimedata() {
     msgdata[i] = -1;
   }
   msgdataindex = 0;
+       msgdatalast = 0;
   msglength = BAMBOO_MSG_BUF_LENGTH;
   for(i = 0; i < BAMBOO_OUT_BUF_LENGTH; ++i) {
     outmsgdata[i] = -1;
@@ -1425,723 +1430,862 @@ void * smemalloc_I(int coren,
        return mem;
 }  // void * smemalloc_I(int, int, int)
 
-// receive object transferred from other cores
-// or the terminate message from other cores
-// Should be invoked in critical sections!!
-// NOTICE: following format is for threadsimulate version only
-//         RAW version please see previous description
-// format: type + object
-// type: -1--stall msg
-//      !-1--object
-// return value: 0--received an object
-//               1--received nothing
-//               2--received a Stall Msg
-//               3--received a lock Msg
-//               RAW version: -1 -- received nothing
-//                            otherwise -- received msg type
-int receiveObject() {
-  int deny = 0;
-  
-msg:
-  if(receiveMsg() == -1) {
-         return -1;
-  }
+INLINE int checkMsgLength_I(int size) {
+#ifdef DEBUG
+#ifndef TILERA
+  BAMBOO_DEBUGPRINT(0xcccc);
+#endif
+#endif
+       int type = msgdata[msgdataindex];
+       switch(type) {
+               case STATUSCONFIRM:
+               case TERMINATE:
+#ifdef MULTICORE_GC
+               case GCSTARTINIT: 
+               case GCSTART: 
+               case GCSTARTFLUSH: 
+               case GCFINISH: 
+               case GCMARKCONFIRM: 
+               case GCLOBJREQUEST: 
+#endif 
+               {
+                       msglength = 1;
+                       break;
+               }
+               case PROFILEOUTPUT:
+               case PROFILEFINISH:
+#ifdef MULTICORE_GC
+               case GCSTARTCOMPACT:
+               case GCFINISHINIT: 
+               case GCFINISHFLUSH: 
+               case GCMARKEDOBJ: 
+#endif
+               {
+                       msglength = 2;
+                       break;
+               }
+               case MEMREQUEST: 
+               case MEMRESPONSE:
+#ifdef MULTICORE_GC
+               case GCMAPREQUEST: 
+               case GCMAPINFO: 
+               case GCLOBJMAPPING: 
+#endif 
+               {
+                       msglength = 3;
+                       break;
+               }
+               case TRANSTALL:
+               case LOCKGROUNT:
+               case LOCKDENY:
+               case LOCKRELEASE:
+               case REDIRECTGROUNT:
+               case REDIRECTDENY:
+               case REDIRECTRELEASE:
+#ifdef MULTICORE_GC
+               case GCFINISHMARK:
+               case GCMOVESTART:
+#endif
+               { 
+                       msglength = 4;
+                       break;
+               }
+               case LOCKREQUEST:
+               case STATUSREPORT:
+#ifdef MULTICORE_GC
+               case GCFINISHCOMPACT:
+               case GCMARKREPORT: 
+#endif 
+               {
+                       msglength = 5;
+                       break;
+               }
+               case REDIRECTLOCK: 
+               {
+                       msglength = 6;
+                       break;
+               }
+               case TRANSOBJ:  // nonfixed size
+#ifdef MULTICORE_GC
+               case GCLOBJINFO: 
+#endif
+               { // nonfixed size 
+                       if(size > 1) {
+                               msglength = msgdata[msgdataindex+1];
+                       } else {
+                               return -1;
+                       }
+                       break;
+               }
+               default: 
+               {
+                       BAMBOO_DEBUGPRINT_REG(type);
+                       int i = 6;
+                       while(i-- > 0) {
+                               BAMBOO_DEBUGPRINT(msgdata[msgdataindex+i]);
+                       }
+                       BAMBOO_EXIT(0xd005);
+                       break;
+               }
+       }
+#ifdef DEBUG
+#ifndef TILERA
+       BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]);
+#endif
+#endif
+#ifdef DEBUG
+#ifndef TILERA
+  BAMBOO_DEBUGPRINT(0xffff);
+#endif
+#endif
+       return msglength;
+}
 
-  if(msgdataindex == msglength) {
-    // received a whole msg
-    MSGTYPE type; 
-    type = msgdata[0];
-    switch(type) {
-    case TRANSOBJ: {
-      // receive a object transfer msg
-      struct transObjInfo * transObj = 
-                               RUNMALLOC_I(sizeof(struct transObjInfo));
-      int k = 0;
+INLINE void processmsg_transobj_I() {
+       MSG_INDEXINC_I();
+       struct transObjInfo * transObj = RUNMALLOC_I(sizeof(struct transObjInfo));
+       int k = 0;
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                       BAMBOO_DEBUGPRINT(0xe880);
+       BAMBOO_DEBUGPRINT(0xe880);
 #endif
 #endif
-      if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+       if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT_REG(msgdata[2]);
-#endif
-                               BAMBOO_EXIT(0xa002);
-                       } 
-      // store the object and its corresponding queue info, enqueue it later
-      transObj->objptr = (void *)msgdata[2]; 
-      transObj->length = (msglength - 3) / 2;
-      transObj->queues = RUNMALLOC_I(sizeof(int)*(msglength - 3));
-      for(k = 0; k < transObj->length; ++k) {
-                               transObj->queues[2*k] = msgdata[3+2*k];
+               BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]/*[2]*/);
+#endif
+               BAMBOO_EXIT(0xa002);
+       } 
+       // store the object and its corresponding queue info, enqueue it later
+       transObj->objptr = (void *)msgdata[msgdataindex]; //[2]
+       MSG_INDEXINC_I();
+       transObj->length = (msglength - 3) / 2;
+       transObj->queues = RUNMALLOC_I(sizeof(int)*(msglength - 3));
+       for(k = 0; k < transObj->length; ++k) {
+               transObj->queues[2*k] = msgdata[msgdataindex]; //[3+2*k];
+               MSG_INDEXINC_I();
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT_REG(transObj->queues[2*k]);
+               //BAMBOO_DEBUGPRINT_REG(transObj->queues[2*k]);
 #endif
 #endif
-                               transObj->queues[2*k+1] = msgdata[3+2*k+1];
+               transObj->queues[2*k+1] = msgdata[msgdataindex]; //[3+2*k+1];
+               MSG_INDEXINC_I();
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT_REG(transObj->queues[2*k+1]);
+               //BAMBOO_DEBUGPRINT_REG(transObj->queues[2*k+1]);
 #endif
 #endif
+       }
+       // check if there is an existing duplicate item
+       {
+               struct QueueItem * qitem = getHead(&objqueue);
+               struct QueueItem * prev = NULL;
+               while(qitem != NULL) {
+                       struct transObjInfo * tmpinfo = 
+                               (struct transObjInfo *)(qitem->objectptr);
+                       if(tmpinfo->objptr == transObj->objptr) {
+                               // the same object, remove outdate one
+                               RUNFREE(tmpinfo->queues);
+                               RUNFREE(tmpinfo);
+                               removeItem(&objqueue, qitem);
+                               //break;
+                       } else {
+                               prev = qitem;
                        }
-      // check if there is an existing duplicate item
-      {
-                               struct QueueItem * qitem = getHead(&objqueue);
-                               struct QueueItem * prev = NULL;
-                               while(qitem != NULL) {
-                                       struct transObjInfo * tmpinfo = 
-                                               (struct transObjInfo *)(qitem->objectptr);
-                                       if(tmpinfo->objptr == transObj->objptr) {
-                                               // the same object, remove outdate one
-                                               RUNFREE(tmpinfo->queues);
-                                               RUNFREE(tmpinfo);
-                                               removeItem(&objqueue, qitem);
-                                               //break;
-                                       } else {
-                                               prev = qitem;
-                                       }
-                                       if(prev == NULL) {
-                                               qitem = getHead(&objqueue);
-                                       } else {
-                                               qitem = getNextQueueItem(prev);
-                                       }
-                               }
-                               addNewItem_I(&objqueue, (void *)transObj);
+                       if(prev == NULL) {
+                               qitem = getHead(&objqueue);
+                       } else {
+                               qitem = getNextQueueItem(prev);
                        }
-      ++(self_numreceiveobjs);
-      break;
-    }
+               }
+               addNewItem_I(&objqueue, (void *)transObj);
+       }
+       ++(self_numreceiveobjs);
+}
 
-    case TRANSTALL: {
-      // receive a stall msg
-      if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-                 // non startup core can not receive stall msg
+INLINE void processmsg_transtall_I() {
+       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+       // non startup core can not receive stall msg
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT_REG(msgdata[1]);
+               BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]/*[1]*/);
 #endif
-                               BAMBOO_EXIT(0xa003);
-      } 
-      if(msgdata[1] < NUMCORESACTIVE) {
+               BAMBOO_EXIT(0xa003);
+       } 
+       int num_core = msgdata[msgdataindex]; //[1]
+       MSG_INDEXINC_I();
+       if(num_core < NUMCORESACTIVE) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT(0xe881);
+               BAMBOO_DEBUGPRINT(0xe881);
 #endif
 #endif
-                               corestatus[msgdata[1]] = 0;
-                               numsendobjs[msgdata[1]] = msgdata[2];
-                               numreceiveobjs[msgdata[1]] = msgdata[3];
-      }
-      break;
-    }
+               corestatus[num_core] = 0;
+               numsendobjs[num_core] = msgdata[msgdataindex]; //[2];
+               MSG_INDEXINC_I();
+               numreceiveobjs[num_core] = msgdata[msgdataindex]; //[3];
+               MSG_INDEXINC_I();
+       }
+}
 
-// GC version have no lock msgs
 #ifndef MULTICORE_GC
-    case LOCKREQUEST: {
-      // receive lock request msg, handle it right now
-      // check to see if there is a lock exist for the required obj
-                       // msgdata[1] -> lock type
-                       int data2 = msgdata[2]; // obj pointer
-      int data3 = msgdata[3]; // lock
-                       int data4 = msgdata[4]; // request core
-                       // -1: redirected, 0: approved, 1: denied
-      deny = processlockrequest(msgdata[1], data3, data2, 
-                                                             data4, data4, true);  
-                       if(deny == -1) {
-                               // this lock request is redirected
-                               break;
-                       } else {
-                               // send response msg
-                               // for 32 bit machine, the size is always 4 words
-                               int tmp = deny==1?LOCKDENY:LOCKGROUNT;
-                               if(isMsgSending) {
-                                       cache_msg_4(data4, tmp, msgdata[1], data2, data3);
-                               } else {
-                                       send_msg_4(data4, tmp, msgdata[1], data2, data3, true);
-                               }
-                       }
-      break;
-    }
+INLINE void processmsg_lockrequest_I() {
+       // check to see if there is a lock exist for the required obj
+       // msgdata[1] -> lock type
+       int locktype = msgdata[msgdataindex]; //[1];
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex]; // obj pointer
+       MSG_INDEXINC_I();
+       int data3 = msgdata[msgdataindex]; // lock
+       MSG_INDEXINC_I();
+       int data4 = msgdata[msgdataindex]; // request core
+       MSG_INDEXINC_I();
+       // -1: redirected, 0: approved, 1: denied
+       deny = processlockrequest(locktype, data3, data2, data4, data4, true);  
+       if(deny == -1) {
+               // this lock request is redirected
+               break;
+       } else {
+               // send response msg
+               // for 32 bit machine, the size is always 4 words
+               int tmp = deny==1?LOCKDENY:LOCKGROUNT;
+               if(isMsgSending) {
+                       cache_msg_4(data4, tmp, locktype, data2, data3);
+               } else {
+                       send_msg_4(data4, tmp, locktype, data2, data3, true);
+               }
+       }
+}
 
-    case LOCKGROUNT: {
-      // receive lock grount msg
-      if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+INLINE void processmsg_lockgrount_I() {
+       MSG_INDEXINC_I();
+       if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT_REG(msgdata[2]);
+               BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]/*[2]*/);
 #endif
-                               BAMBOO_EXIT(0xa004);
-      } 
-      if((lockobj == msgdata[2]) && (lock2require == msgdata[3])) {
+               BAMBOO_EXIT(0xa004);
+       } 
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data3 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       if((lockobj == data2) && (lock2require == data3)) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT(0xe882);
+               BAMBOO_DEBUGPRINT(0xe882);
 #endif
 #endif
-                               lockresult = 1;
-                               lockflag = true;
+               lockresult = 1;
+               lockflag = true;
 #ifndef INTERRUPT
-                               reside = false;
+               reside = false;
 #endif
-                       } else {
-                               // conflicts on lockresults
+       } else {
+               // conflicts on lockresults
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT_REG(msgdata[2]);
+               BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-                               BAMBOO_EXIT(0xa005);
-      }
-      break;
-    }
+               BAMBOO_EXIT(0xa005);
+       }
+}
 
-    case LOCKDENY: {
-      // receive lock deny msg
-      if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+INLINE void processmsg_lockdeny_I() {
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data3 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT_REG(msgdata[2]);
+               BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-                               BAMBOO_EXIT(0xa006);
-      } 
-      if((lockobj == msgdata[2]) && (lock2require == msgdata[3])) {
+               BAMBOO_EXIT(0xa006);
+       
+       if((lockobj == data2) && (lock2require == data3)) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT(0xe883);
+               BAMBOO_DEBUGPRINT(0xe883);
 #endif
 #endif
-                               lockresult = 0;
-                               lockflag = true;
+               lockresult = 0;
+               lockflag = true;
 #ifndef INTERRUPT
-                               reside = false;
+               reside = false;
 #endif
-                               } else {
-                               // conflicts on lockresults
+               } else {
+               // conflicts on lockresults
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT_REG(msgdata[2]);
+               BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-                               BAMBOO_EXIT(0xa007);
-      }
-      break;
-    }
+               BAMBOO_EXIT(0xa007);
+       }
+}
 
-    case LOCKRELEASE: {
-      // receive lock release msg
-                       processlockrelease(msgdata[1], msgdata[2], 0, false);
-      break;
-    }
-#endif
+INLINE void processmsg_lockrelease_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       // receive lock release msg
+       processlockrelease(data1, data2, 0, false);
+}
 
-#ifdef PROFILE
-    case PROFILEOUTPUT: {
-      // receive an output profile data request msg
-      if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
-                               // startup core can not receive profile output finish msg
-                               BAMBOO_EXIT(0xa008);
-      }
-#ifdef DEBUG
-#ifndef CLOSE_PRINT
-                       BAMBOO_DEBUGPRINT(0xe885);
-#endif
-#endif
-                       stall = true;
-                       totalexetime = msgdata[1];
-                       outputProfileData();
-                       if(isMsgSending) {
-                               cache_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
-                       } else {
-                               send_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE, true);
-                       }
-      break;
-    }
+INLINE void processmsg_redirectlock_I() {
+       // check to see if there is a lock exist for the required obj
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[1]; // lock type
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();//msgdata[2]; // obj pointer
+       int data3 = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[3]; // redirect lock
+       int data4 = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[4]; // root request core
+       int data5 = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[5]; // request core
+       deny = processlockrequest(data1, data3, data2, data5, data4, true);
+       if(deny == -1) {
+               // this lock request is redirected
+               break;
+       } else {
+               // send response msg
+               // for 32 bit machine, the size is always 4 words
+               if(isMsgSending) {
+                       cache_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT, 
+                                                                       data1, data2, data3);
+               } else {
+                       send_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT, 
+                                                                data1, data2, data3, true);
+               }
+       }
+}
 
-    case PROFILEFINISH: {
-      // receive a profile output finish msg
-      if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-                               // non startup core can not receive profile output finish msg
+INLINE void processmsg_redirectgrount_I() {
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
 #ifndef CLOSE_PRINT
-                               BAMBOO_DEBUGPRINT_REG(msgdata[1]);
+               BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-                               BAMBOO_EXIT(0xa009);
-      }
+               BAMBOO_EXIT(0xa00a);
+       }
+       if(lockobj == data2) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                       BAMBOO_DEBUGPRINT(0xe886);
+               BAMBOO_DEBUGPRINT(0xe891);
 #endif
 #endif
-                       profilestatus[msgdata[1]] = 0;
-      break;
-    }
+               int data3 = msgdata[msgdataindex];
+               MSG_INDEXINC_I();
+               lockresult = 1;
+               lockflag = true;
+               RuntimeHashadd_I(objRedirectLockTbl, lockobj, data3);
+#ifndef INTERRUPT
+               reside = false;
 #endif
-
-// GC version has no lock msgs
-#ifndef MULTICORE_GC
-       case REDIRECTLOCK: {
-         // receive a redirect lock request msg, handle it right now
-               // check to see if there is a lock exist for the required obj
-         int data1 = msgdata[1]; // lock type
-         int data2 = msgdata[2]; // obj pointer
-               int data3 = msgdata[3]; // redirect lock
-         int data4 = msgdata[4]; // root request core
-         int data5 = msgdata[5]; // request core
-         deny = processlockrequest(msgdata[1], data3, data2, data5, data4, true);
-         if(deny == -1) {
-                 // this lock request is redirected
-                 break;
-         } else {
-                 // send response msg
-                 // for 32 bit machine, the size is always 4 words
-                 if(isMsgSending) {
-                         cache_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT, 
-                                                       data1, data2, data3);
-                 } else {
-                               send_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT, 
-                                                      data1, data2, data3, true);
-                 }
-         }
-         break;
+       } else {
+               // conflicts on lockresults
+#ifndef CLOSE_PRINT
+               BAMBOO_DEBUGPRINT_REG(data2);
+#endif
+               BAMBOO_EXIT(0xa00b);
        }
+}
 
-       case REDIRECTGROUNT: {
-               // receive a lock grant msg with redirect info
-               if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+INLINE void processmsg_redirectdeny_I() {
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
 #ifndef CLOSE_PRINT
-                       BAMBOO_DEBUGPRINT_REG(msgdata[2]);
+               BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-                       BAMBOO_EXIT(0xa00a);
-               }
-               if(lockobj == msgdata[2]) {
+               BAMBOO_EXIT(0xa00c);
+       }
+       if(lockobj == data2) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT(0xe891);
+               BAMBOO_DEBUGPRINT(0xe892);
 #endif
 #endif
-                 lockresult = 1;
-                 lockflag = true;
-                 RuntimeHashadd_I(objRedirectLockTbl, lockobj, msgdata[3]);
+               lockresult = 0;
+               lockflag = true;
 #ifndef INTERRUPT
-                 reside = false;
+               reside = false;
 #endif
-               } else {
-                 // conflicts on lockresults
+       } else {
+               // conflicts on lockresults
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT_REG(msgdata[2]);
+               BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-                 BAMBOO_EXIT(0xa00b);
-               }
-               break;
+               BAMBOO_EXIT(0xa00d);
+       }
+}
+
+INLINE void processmsg_redirectrelease_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data3 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       processlockrelease(data1, data2, data3, true);
+}
+#endif // #ifndef MULTICORE_GC
+
+#ifdef PROFILE
+INLINE void processmsg_profileoutput_I() {
+       if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
+               // startup core can not receive profile output finish msg
+               BAMBOO_EXIT(0xa008);
        }
-       
-       case REDIRECTDENY: {
-         // receive a lock deny msg with redirect info
-         if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
-#ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT_REG(msgdata[2]);
-#endif
-                 BAMBOO_EXIT(0xa00c);
-         }
-               if(lockobj == msgdata[2]) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT(0xe892);
+       BAMBOO_DEBUGPRINT(0xe885);
 #endif
 #endif
-                 lockresult = 0;
-                 lockflag = true;
-#ifndef INTERRUPT
-                 reside = false;
-#endif
-               } else {
-                 // conflicts on lockresults
-#ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT_REG(msgdata[2]);
-#endif
-                 BAMBOO_EXIT(0xa00d);
-               }
-               break;
+       stall = true;
+       totalexetime = msgdata[msgdataindex]; //[1]
+       MSG_INDEXINC_I();
+       outputProfileData();
+       if(isMsgSending) {
+               cache_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
+       } else {
+               send_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE, true);
        }
+}
 
-       case REDIRECTRELEASE: {
-         // receive a lock release msg with redirect info
-               processlockrelease(msgdata[1], msgdata[2], msgdata[3], true);
-               break;
+INLINE void processmsg_profilefinish_I() {
+       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+               // non startup core can not receive profile output finish msg
+#ifndef CLOSE_PRINT
+               BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex/*1*/]);
+#endif
+               BAMBOO_EXIT(0xa009);
        }
+#ifdef DEBUG
+#ifndef CLOSE_PRINT
+       BAMBOO_DEBUGPRINT(0xe886);
 #endif
-       
-       case STATUSCONFIRM: {
-      // receive a status confirm info
-         if((BAMBOO_NUM_OF_CORE == STARTUPCORE) 
-                               || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
-                 // wrong core to receive such msg
-                 BAMBOO_EXIT(0xa00e);
-               } else {
-                 // send response msg
+#endif
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       profilestatus[data1] = 0;
+}
+#endif // #ifdef PROFILE
+
+INLINE void processmsg_statusconfirm_I() {
+       if((BAMBOO_NUM_OF_CORE == STARTUPCORE) 
+                       || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
+               // wrong core to receive such msg
+               BAMBOO_EXIT(0xa00e);
+       } else {
+               // send response msg
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT(0xe887);
+               BAMBOO_DEBUGPRINT(0xe887);
 #endif
 #endif
-                 if(isMsgSending) {
-                         cache_msg_5(STARTUPCORE, STATUSREPORT, 
-                                                       busystatus?1:0, BAMBOO_NUM_OF_CORE,
-                                                                               self_numsendobjs, self_numreceiveobjs);
-                 } else {
-                               send_msg_5(STARTUPCORE, STATUSREPORT, busystatus?1:0, 
-                                                      BAMBOO_NUM_OF_CORE, self_numsendobjs, 
-                                                                        self_numreceiveobjs, true);
-                 }
+               if(isMsgSending) {
+                       cache_msg_5(STARTUPCORE, STATUSREPORT, 
+                                                                       busystatus?1:0, BAMBOO_NUM_OF_CORE,
+                                                                       self_numsendobjs, self_numreceiveobjs);
+               } else {
+                       send_msg_5(STARTUPCORE, STATUSREPORT, busystatus?1:0, 
+                                                                BAMBOO_NUM_OF_CORE, self_numsendobjs, 
+                                                                self_numreceiveobjs, true);
                }
-         break;
        }
+}
 
-       case STATUSREPORT: {
-         // receive a status confirm info
-         if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-                 // wrong core to receive such msg
+INLINE void processmsg_statusreport_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data3 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data4 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       // receive a status confirm info
+       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+               // wrong core to receive such msg
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT_REG(msgdata[2]);
+               BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-                 BAMBOO_EXIT(0xa00f);
-               } else {
+               BAMBOO_EXIT(0xa00f);
+       } else {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT(0xe888);
+               BAMBOO_DEBUGPRINT(0xe888);
 #endif
 #endif
-                 if(waitconfirm) {
-                         numconfirm--;
-                 }
-                 corestatus[msgdata[2]] = msgdata[1];
-                       numsendobjs[msgdata[2]] = msgdata[3];
-                       numreceiveobjs[msgdata[2]] = msgdata[4];
+               if(waitconfirm) {
+                       numconfirm--;
                }
-         break;
+               corestatus[data2] = data1;
+               numsendobjs[data2] = data3;
+               numreceiveobjs[data2] = data4;
        }
+}
 
-       case TERMINATE: {
-         // receive a terminate msg
+INLINE void processmsg_terminate_I() {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe889);
+       BAMBOO_DEBUGPRINT(0xe889);
 #endif
 #endif
-               disruntimedata();
-               BAMBOO_EXIT(0);
-         break;
-       }
+       disruntimedata();
+       BAMBOO_EXIT(0);
+}
 
-       case MEMREQUEST: {
-         // receive a shared memory request msg
-         if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-                 // wrong core to receive such msg
+INLINE void processmsg_memrequest_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       // receive a shared memory request msg
+       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+               // wrong core to receive such msg
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT_REG(msgdata[2]);
+               BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-                 BAMBOO_EXIT(0xa010);
-               } else {
+               BAMBOO_EXIT(0xa010);
+       } else {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT(0xe88a);
+               BAMBOO_DEBUGPRINT(0xe88a);
 #endif
 #endif
-                       int allocsize = 0;
-                 void * mem = NULL;
+               int allocsize = 0;
+               void * mem = NULL;
 #ifdef MULTICORE_GC
-                       if(gcprocessing) {
-                               // is currently doing gc, dump this msg
-                               if(INITPHASE == gcphase) {
-                                       // if still in the initphase of gc, send a startinit msg again
-                                       if(isMsgSending) {
-                                               cache_msg_1(msgdata[2], GCSTARTINIT);
-                                       } else {
-                                               send_msg_1(msgdata[2], GCSTARTINIT, true);
-                                       }
+               if(gcprocessing) {
+                       // is currently doing gc, dump this msg
+                       if(INITPHASE == gcphase) {
+                               // if still in the initphase of gc, send a startinit msg again
+                               if(isMsgSending) {
+                                       cache_msg_1(data2, GCSTARTINIT);
+                               } else {
+                                       send_msg_1(data2, GCSTARTINIT, true);
                                }
-                               break;
-                       } 
-#endif
-                       mem = smemalloc_I(msgdata[2], msgdata[1], &allocsize);
-                       if(mem == NULL) {
-                               // in this case, the gcflag of the startup core has been set
-                               // and the gc should be started later, then a GCSTARTINIT msg
-                               // will be sent to the requesting core to notice it to start gc
-                               // and try malloc again
-                               break;
                        }
+               } else { 
+#endif
+               mem = smemalloc_I(data2, data1, &allocsize);
+               if(mem != NULL) {
                        // send the start_va to request core
                        if(isMsgSending) {
-                               cache_msg_3(msgdata[2], MEMRESPONSE, mem, allocsize);
+                               cache_msg_3(data2, MEMRESPONSE, mem, allocsize);
                        } else {
-                               send_msg_3( msgdata[2], MEMRESPONSE, mem, allocsize, true);
+                               send_msg_3(data2, MEMRESPONSE, mem, allocsize, true);
                        } 
+               } // if mem == NULL, the gcflag of the startup core has been set
+                       // and the gc should be started later, then a GCSTARTINIT msg
+                       // will be sent to the requesting core to notice it to start gc
+                       // and try malloc again
+#ifdef MULTICORE_GC
                }
-         break;
+#endif
        }
+}
 
-       case MEMRESPONSE: {
-               // receive a shared memory response msg
+INLINE void processmsg_memresponse_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       // receive a shared memory response msg
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-         BAMBOO_DEBUGPRINT(0xe88b);
+       BAMBOO_DEBUGPRINT(0xe88b);
 #endif
 #endif
 #ifdef MULTICORE_GC
-               if(gcprocessing) {
-                       // is currently doing gc, dump this msg
-                       break;
-               }
+       // if is currently doing gc, dump this msg
+       if(!gcprocessing) {
 #endif
-         if(msgdata[2] == 0) {
-                 bamboo_smem_size = 0;
-                 bamboo_cur_msp = 0;
-         } else {
+       if(data2 == 0) {
+               bamboo_smem_size = 0;
+               bamboo_cur_msp = 0;
+       } else {
 #ifdef MULTICORE_GC
-                       // fill header to store the size of this mem block
-                       memset(msgdata[1], 0, BAMBOO_CACHE_LINE_SIZE);
-                       (*((int*)msgdata[1])) = msgdata[2];
-                 bamboo_smem_size = msgdata[2] - BAMBOO_CACHE_LINE_SIZE;
-                       bamboo_cur_msp = msgdata[1] + BAMBOO_CACHE_LINE_SIZE;
+               // fill header to store the size of this mem block
+               memset(data1, 0, BAMBOO_CACHE_LINE_SIZE);
+               (*((int*)data1)) = data2;
+               bamboo_smem_size = data2 - BAMBOO_CACHE_LINE_SIZE;
+               bamboo_cur_msp = data1 + BAMBOO_CACHE_LINE_SIZE;
 #else
-                 bamboo_smem_size = msgdata[2];
-                 bamboo_cur_msp =(void*)(msgdata[1]);
+               bamboo_smem_size = data2;
+               bamboo_cur_msp =(void*)(data1);
 #endif
-         }
-         smemflag = true;
-         break;
        }
+       smemflag = true;
+#ifdef MULTICORE_GC
+       }
+#endif
+}
 
 #ifdef MULTICORE_GC
-       // GC msgs
-       case GCSTARTINIT: {
-               gcflag = true;
-               gcphase = INITPHASE;
-               if(!smemflag) {
-                       // is waiting for response of mem request
-                       // let it return NULL and start gc
-                       bamboo_smem_size = 0;
-                       bamboo_cur_msp = NULL;
-                       smemflag = true;
-               }
-         break;
+INLINE void processmsg_gcstartinit_I() {
+       gcflag = true;
+       gcphase = INITPHASE;
+       if(!smemflag) {
+               // is waiting for response of mem request
+               // let it return NULL and start gc
+               bamboo_smem_size = 0;
+               bamboo_cur_msp = NULL;
+               smemflag = true;
        }
+}
 
-       case GCSTART: {
-               // receive a start GC msg
+INLINE void processmsg_gcstart_I() {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-         BAMBOO_DEBUGPRINT(0xe88c);
+       BAMBOO_DEBUGPRINT(0xe88c);
 #endif
 #endif
-         // set the GC flag
-               gcphase = MARKPHASE;
-         break;
-       }
+       // set the GC flag
+       gcphase = MARKPHASE;
+}
 
-       case GCSTARTCOMPACT: {
-               // a compact phase start msg
-               gcblock2fill = msgdata[1];
-               gcphase = COMPACTPHASE;
-               break;
-       }
+INLINE void processmsg_gcstartcompact_I() {
+       gcblock2fill = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[1];
+       gcphase = COMPACTPHASE;
+}
 
-       case GCSTARTFLUSH: {
-               // received a flush phase start msg
-               gcphase = FLUSHPHASE;
-               break;
-       }
-       
-       case GCFINISHINIT: {
-               // received a init phase finish msg
-               if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-                 // non startup core can not receive this msg
+INLINE void processmsg_gcstartflush_I() {
+       gcphase = FLUSHPHASE;
+}
+
+INLINE void processmsg_gcfinishinit_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       // received a init phase finish msg
+       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+               // non startup core can not receive this msg
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT_REG(msgdata[1]);
+               BAMBOO_DEBUGPRINT_REG(data1);
 #endif
-                 BAMBOO_EXIT(0xb001);
-               }
+               BAMBOO_EXIT(0xb001);
+       }
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe88c);
-               BAMBOO_DEBUGPRINT_REG(msgdata[1]);
+       BAMBOO_DEBUGPRINT(0xe88c);
+       BAMBOO_DEBUGPRINT_REG(data1);
 #endif
-               // All cores should do init GC
-               if(msgdata[1] < NUMCORESACTIVE) {
-                       gccorestatus[msgdata[1]] = 0;
-               }
+       // All cores should do init GC
+       if(data1 < NUMCORESACTIVE) {
+               gccorestatus[data1] = 0;
        }
+}
 
-       case GCFINISHMARK: {
-               // received a mark phase finish msg
-               if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-                 // non startup core can not receive this msg
+INLINE void processmsg_gcfinishmark_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data3 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       // received a mark phase finish msg
+       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+               // non startup core can not receive this msg
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT_REG(msgdata[1]);
+               BAMBOO_DEBUGPRINT_REG(data1);
 #endif
-                 BAMBOO_EXIT(0xb002);
-               }
-               // all cores should do mark
-               if(msgdata[1] < NUMCORESACTIVE) {
-                       gccorestatus[msgdata[1]] = 0;
-                       gcnumsendobjs[msgdata[1]] = msgdata[2];
-                       gcnumreceiveobjs[msgdata[1]] = msgdata[3];
-               }
-         break;
+               BAMBOO_EXIT(0xb002);
        }
-       
-       case GCFINISHCOMPACT: {
-               // received a compact phase finish msg
-               if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-                 // non startup core can not receive this msg
-                 // return -1
+       // all cores should do mark
+       if(data1 < NUMCORESACTIVE) {
+               gccorestatus[data1] = 0;
+               gcnumsendobjs[data1] = data2;
+               gcnumreceiveobjs[data1] = data3;
+       }
+}
+
+INLINE void processmsg_gcfinishcompact_I() {
+       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+               // non startup core can not receive this msg
+               // return -1
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT_REG(msgdata[1]);
+               BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]/*[1]*/);
 #endif
-                 BAMBOO_EXIT(0xb003);
+               BAMBOO_EXIT(0xb003);
+       }
+       int cnum = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[1];
+       int filledblocks = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[2];
+       int heaptop = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[3];
+       int data4 = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[4];
+       // only gc cores need to do compact
+       if(cnum < NUMCORES4GC) {
+               if(COMPACTPHASE == gcphase) {
+                       gcfilledblocks[cnum] = filledblocks;
+                       gcloads[cnum] = heaptop;
                }
-               int cnum = msgdata[1];
-               int filledblocks = msgdata[2];
-               int heaptop = msgdata[3];
-               int data4 = msgdata[4];
-               // only gc cores need to do compact
-               if(cnum < NUMCORES4GC) {
-                       if(COMPACTPHASE == gcphase) {
-                               gcfilledblocks[cnum] = filledblocks;
-                               gcloads[cnum] = heaptop;
-                       }
-                       if(data4 > 0) {
-                               // ask for more mem
-                               int startaddr = 0;
-                               int tomove = 0;
-                               int dstcore = 0;
-                               if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) {
-                                       if(isMsgSending) {
-                                               cache_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
-                                       } else {
-                                               send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove, true);
-                                       }
+               if(data4 > 0) {
+                       // ask for more mem
+                       int startaddr = 0;
+                       int tomove = 0;
+                       int dstcore = 0;
+                       if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) {
+                               if(isMsgSending) {
+                                       cache_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
+                         } else {
+                                       send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove, true);
                                }
-                       } else {
-                               gccorestatus[cnum] = 0;
-                       } // if(data4>0)
-               } // if(cnum < NUMCORES4GC)
-         break;
-       }
+                       }
+               } else {
+                       gccorestatus[cnum] = 0;
+               } // if(data4>0)
+       } // if(cnum < NUMCORES4GC)
+}
 
-       case GCFINISHFLUSH: {
-               // received a flush phase finish msg
-               if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-                 // non startup core can not receive this msg
-                 // return -1
+INLINE void processmsg_gcfinishflush_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       // received a flush phase finish msg
+       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+               // non startup core can not receive this msg
+               // return -1
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT_REG(msgdata[1]);
+               BAMBOO_DEBUGPRINT_REG(data1);
 #endif
-                 BAMBOO_EXIT(0xb004);
-               } 
-               // all cores should do flush
-               if(msgdata[1] < NUMCORESACTIVE) {
-                 gccorestatus[msgdata[1]] = 0;
-               }
-         break;
-       }
-
-       case GCFINISH: {
-               // received a GC finish msg
-               gcphase = FINISHPHASE;
-               break;
+               BAMBOO_EXIT(0xb004);
+       } 
+       // all cores should do flush
+       if(data1 < NUMCORESACTIVE) {
+               gccorestatus[data1] = 0;
        }
+}
 
-       case GCMARKCONFIRM: {
-               // received a marked phase finish confirm request msg
-               // all cores should do mark
-               if((BAMBOO_NUM_OF_CORE == STARTUPCORE) 
-                               || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
-                 // wrong core to receive such msg
-                 BAMBOO_EXIT(0xb005);
+INLINE void processmsg_gcmarkconfirm_I() {
+       if((BAMBOO_NUM_OF_CORE == STARTUPCORE) 
+                       || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
+               // wrong core to receive such msg
+               BAMBOO_EXIT(0xb005);
+       } else {
+               // send response msg
+               if(isMsgSending) {
+                       cache_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, 
+                                                                       gcbusystatus, gcself_numsendobjs, 
+                                                                       gcself_numreceiveobjs);
                } else {
-                 // send response msg
-                 if(isMsgSending) {
-                         cache_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, 
-                                                       gcbusystatus, gcself_numsendobjs, 
-                                                                               gcself_numreceiveobjs);
-                 } else {
-                               send_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, 
-                                                      gcbusystatus, gcself_numsendobjs, 
-                                                                        gcself_numreceiveobjs, true);
-                 }
+                       send_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, 
+                                                                gcbusystatus, gcself_numsendobjs, 
+                                                                gcself_numreceiveobjs, true);
                }
-         break;
        }
+}
 
-       case GCMARKREPORT: {
-               // received a marked phase finish confirm response msg
-               if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-                 // wrong core to receive such msg
+INLINE void processmsg_gcmarkreport_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data3 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data4 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       // received a marked phase finish confirm response msg
+       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+               // wrong core to receive such msg
 #ifndef CLOSE_PRINT
-                 BAMBOO_DEBUGPRINT_REG(msgdata[2]);
+               BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-                 BAMBOO_EXIT(0xb006);
-               } else {
-                 if(waitconfirm) {
-                         numconfirm--;
-                 }
-                 gccorestatus[msgdata[1]] = msgdata[2];
-                 gcnumsendobjs[msgdata[1]] = msgdata[3];
-                 gcnumreceiveobjs[msgdata[1]] = msgdata[4];
+               BAMBOO_EXIT(0xb006);
+       } else {
+               if(waitconfirm) {
+                       numconfirm--;
                }
-         break;
+               gccorestatus[data1] = data2;
+               gcnumsendobjs[data1] = data3;
+               gcnumreceiveobjs[data1] = data4;
        }
+}
 
-       case GCMARKEDOBJ: {
-               // received a markedObj msg
-               if(((int *)msgdata[1])[6] == INIT) {
-                               // this is the first time that this object is discovered,
-                               // set the flag as DISCOVERED
-                               ((int *)msgdata[1])[6] = DISCOVERED;
-                               gc_enqueue_I(msgdata[1]);
-               }
-               gcself_numreceiveobjs++;
-               gcbusystatus = true;
-               break;
+INLINE void processmsg_gcmarkedobj_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       // received a markedObj msg
+       if(((int *)data1)[6] == INIT) {
+                       // this is the first time that this object is discovered,
+                       // set the flag as DISCOVERED
+                       ((int *)data1)[6] = DISCOVERED;
+                       gc_enqueue_I(data1);
        }
+       gcself_numreceiveobjs++;
+       gcbusystatus = true;
+}
 
-       case GCMOVESTART: {
-               // received a start moving objs msg
-               gctomove = true;
-               gcdstcore = msgdata[1];
-               gcmovestartaddr = msgdata[2];
-               gcblock2fill = msgdata[3];
-               break;
-       }
-       
-       case GCMAPREQUEST: {
-               // received a mapping info request msg
-               void * dstptr = NULL;
-               //dstptr = mgchashSearch(msgdata[1]);
-               RuntimeHashget(gcpointertbl, msgdata[1], &dstptr);
-               //MGCHashget(gcpointertbl, msgdata[1], &dstptr);
-               if(NULL == dstptr) {
-                       // no such pointer in this core, something is wrong
+INLINE void processmsg_gcmovestart_I() {
+       gctomove = true;
+       gcdstcore = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[1];
+       gcmovestartaddr = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[2];
+       gcblock2fill = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); //msgdata[3];
+}
+
+INLINE void processmsg_gcmaprequest_I() {
+#ifdef GC_PROFILE
+       //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+#endif
+       void * dstptr = NULL;
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       //dstptr = mgchashSearch(msgdata[1]);
+#ifdef GC_PROFILE
+       unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+#endif
+       RuntimeHashget(gcpointertbl, data1, &dstptr);
+#ifdef GC_PROFILE
+       flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
+#endif
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       //MGCHashget(gcpointertbl, msgdata[1], &dstptr);
+#ifdef GC_PROFILE
+       unsigned long long ttimei = BAMBOO_GET_EXE_TIME();
+#endif
+       if(NULL == dstptr) {
+               // no such pointer in this core, something is wrong
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT_REG(msgdata[1]);
-                       BAMBOO_DEBUGPRINT_REG(msgdata[2]);
+               BAMBOO_DEBUGPRINT_REG(data1);
+               BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-                       BAMBOO_EXIT(0xb007);
-                       //assume that the object was not moved, use the original address
-                       /*if(isMsgSending) {
-                               cache_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1]);
-                       } else {
-                               send_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1], true);
-                       }*/
+               BAMBOO_EXIT(0xb007);
+               //assume that the object was not moved, use the original address
+               /*if(isMsgSending) {
+                       cache_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1]);
                } else {
-                       // send back the mapping info
-                       if(isMsgSending) {
-                               cache_msg_3(msgdata[2], GCMAPINFO, msgdata[1], (int)dstptr);
-                       } else {
-                               send_msg_3(msgdata[2], GCMAPINFO, msgdata[1], (int)dstptr, true);
-                       }
+                       send_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1], true);
+               }*/
+       } else {
+               // send back the mapping info
+               if(isMsgSending) {
+                       cache_msg_3(data2, GCMAPINFO, data1, (int)dstptr);
+               } else {
+                       send_msg_3(data2, GCMAPINFO, data1, (int)dstptr, true);
                }
-               break;
        }
+#ifdef GC_PROFILE
+       flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimei;
+       //num_mapinforequest_i++;
+#endif
+}
 
-       case GCMAPINFO: {
-               // received a mapping info response msg
-               if(msgdata[1] != gcobj2map) {
+INLINE void processmsg_gcmapinfo_I() {
+#ifdef GC_PROFILE
+       //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+#endif
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       if(data1 != gcobj2map) {
                        // obj not matched, something is wrong
 #ifdef DEBUG
                        BAMBOO_DEBUGPRINT_REG(gcobj2map);
@@ -2149,89 +2293,362 @@ msg:
 #endif
                        BAMBOO_EXIT(0xb008);
                } else {
-                       gcmappedobj = msgdata[2];
+                       gcmappedobj = msgdata[msgdataindex]; // [2]
+      MSG_INDEXINC_I();
+                       //mgchashReplace_I(msgdata[1], msgdata[2]);
                        //mgchashInsert_I(gcobj2map, gcmappedobj);
                        RuntimeHashadd_I(gcpointertbl, gcobj2map, gcmappedobj);
                        //MGCHashadd_I(gcpointertbl, gcobj2map, gcmappedobj);
                }
                gcismapped = true;
-               break;
-       }
+#ifdef GC_PROFILE
+                       //flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
+#endif
+}
 
-       case GCLOBJREQUEST: {
-               // received a large objs info request msg
-               transferMarkResults_I();
-               break;
+INLINE void processmsg_gclobjinfo_I() {
+       numconfirm--;
+
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       if(BAMBOO_NUM_OF_CORE > NUMCORES4GC - 1) {
+#ifndef CLOSE_PRINT
+               BAMBOO_DEBUGPRINT_REG(data2);
+#endif
+               BAMBOO_EXIT(0xb009);
+       } 
+       // store the mark result info 
+       int cnum = data2;
+       gcloads[cnum] = msgdata[msgdataindex];
+       MSG_INDEXINC_I(); // msgdata[3];
+       int data4 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       if(gcheaptop < data4) {
+               gcheaptop = data4;
        }
+       // large obj info here
+       for(int k = 5; k < data1;) {
+               int lobj = msgdata[msgdataindex];
+               MSG_INDEXINC_I(); //msgdata[k++];
+               int length = msgdata[msgdataindex];
+               MSG_INDEXINC_I(); //msgdata[k++];
+               gc_lobjenqueue_I(lobj, length, cnum);
+               gcnumlobjs++;
+       } // for(int k = 5; k < msgdata[1];)
+}
 
-       case GCLOBJINFO: {
-               // received a large objs info response msg
-               numconfirm--;
+INLINE void processmsg_gclobjmapping_I() {
+       int data1 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       int data2 = msgdata[msgdataindex];
+       MSG_INDEXINC_I();
+       //mgchashInsert_I(msgdata[1], msgdata[2]);
+       RuntimeHashadd_I(gcpointertbl, data1, data2);
+       //MGCHashadd_I(gcpointertbl, msgdata[1], msgdata[2]);
+}
+#endif // #ifdef MULTICORE_GC
 
-               if(BAMBOO_NUM_OF_CORE > NUMCORES4GC - 1) {
-#ifndef CLOSE_PRINT
-                       BAMBOO_DEBUGPRINT_REG(msgdata[2]);
-#endif
-                       BAMBOO_EXIT(0xb009);
-               } 
-               // store the mark result info 
-               int cnum = msgdata[2];
-               gcloads[cnum] = msgdata[3];
-               if(gcheaptop < msgdata[4]) {
-                       gcheaptop = msgdata[4];
+// receive object transferred from other cores
+// or the terminate message from other cores
+// Should be invoked in critical sections!!
+// NOTICE: following format is for threadsimulate version only
+//         RAW version please see previous description
+// format: type + object
+// type: -1--stall msg
+//      !-1--object
+// return value: 0--received an object
+//               1--received nothing
+//               2--received a Stall Msg
+//               3--received a lock Msg
+//               RAW version: -1 -- received nothing
+//                            otherwise -- received msg type
+int receiveObject() {
+  int deny = 0;
+  
+msg:
+       // get the incoming msgs
+  if(receiveMsg() == -1) {
+         return -1;
+  }
+processmsg:
+       // processing received msgs
+       int size = 0;
+       MSG_REMAINSIZE_I(&size);
+  if(checkMsgLength_I(size) == -1) {
+               // not a whole msg
+               // have new coming msg
+               if(BAMBOO_MSG_AVAIL() != 0) {
+                       goto msg;
+               } else {
+                       return -1;
                }
-               // large obj info here
-         for(int k = 5; k < msgdata[1];) {
-                       int lobj = msgdata[k++];
-                       int length = msgdata[k++];
-                       gc_lobjenqueue_I(lobj, length, cnum);
-                       gcnumlobjs++;
-               } // for(int k = 5; k < msgdata[1];)
-               break;
        }
+
+       if(msglength <= size) {
+               // have some whole msg
+  //if(msgdataindex == msglength) {
+    // received a whole msg
+    MSGTYPE type;
+    type = msgdata[msgdataindex]; //[0]
+               MSG_INDEXINC_I();
+               // TODO
+               //tprintf("msg type: %x\n", type);
+    switch(type) {
+                       case TRANSOBJ: {
+                               // receive a object transfer msg
+                               processmsg_transobj_I();
+                               break;
+                       } // case TRANSOBJ
+
+                       case TRANSTALL: {
+                               // receive a stall msg
+                               processmsg_transtall_I();
+                               break;
+                       } // case TRANSTALL
+
+// GC version have no lock msgs
+#ifndef MULTICORE_GC
+                       case LOCKREQUEST: {
+                               // receive lock request msg, handle it right now
+                               processmsg_lockrequest_I();
+                               break;
+                       } // case LOCKREQUEST
+
+                       case LOCKGROUNT: {
+                               // receive lock grount msg
+                               processmsg_lockgrount_I();
+                               break;
+                       } // case LOCKGROUNT
+
+                       case LOCKDENY: {
+                               // receive lock deny msg
+                               processmsg_lockdeny_I();
+                               break;
+                       } // case LOCKDENY
+
+                       case LOCKRELEASE: {
+                               processmsg_lockrelease_I();
+                               break;
+                       } // case LOCKRELEASE
+#endif // #ifndef MULTICORE_GC
+
+#ifdef PROFILE
+                       case PROFILEOUTPUT: {
+                               // receive an output profile data request msg
+                               processmsg_profileoutput_I();
+                               break;
+                       } // case PROFILEOUTPUT
+
+                       case PROFILEFINISH: {
+                               // receive a profile output finish msg
+                               processmsg_profilefinish_I();
+                               break;
+                       } // case PROFILEFINISH
+#endif // #ifdef PROFILE
+
+// GC version has no lock msgs
+#ifndef MULTICORE_GC
+                       case REDIRECTLOCK: {
+                               // receive a redirect lock request msg, handle it right now
+                               processmsg_redirectlock_I();
+                               break;
+                       } // case REDIRECTLOCK
+
+                       case REDIRECTGROUNT: {
+                               // receive a lock grant msg with redirect info
+                               processmsg_redirectgrount_I();
+                               break;
+                       } // case REDIRECTGROUNT
+                       
+                       case REDIRECTDENY: {
+                               // receive a lock deny msg with redirect info
+                               processmsg_redirectdeny_I();
+                               break;
+                       } // case REDIRECTDENY
+
+                       case REDIRECTRELEASE: {
+                               // receive a lock release msg with redirect info
+                               processmsg_redirectrelease_I();
+                               break;
+                       } // case REDIRECTRELEASE
+#endif // #ifndef MULTICORE_GC
        
-       case GCLOBJMAPPING: {
-               // received a large obj mapping info msg
-               //mgchashInsert_I(msgdata[1], msgdata[2]);
-               RuntimeHashadd_I(gcpointertbl, msgdata[1], msgdata[2]);
-               //MGCHashadd_I(gcpointertbl, msgdata[1], msgdata[2]);
-               break;
-       }
+                       case STATUSCONFIRM: {
+                               // receive a status confirm info
+                               processmsg_statusconfirm_I();
+                               break;
+                       } // case STATUSCONFIRM
 
-#endif
+                       case STATUSREPORT: {
+                               processmsg_statusreport_I();
+                               break;
+                       } // case STATUSREPORT
 
-       default:
-               break;
-       }
-  memset(msgdata, '\0', sizeof(int) * msgdataindex);
-       msgdataindex = 0;
-       msglength = BAMBOO_MSG_BUF_LENGTH;
+                       case TERMINATE: {
+                               // receive a terminate msg
+                               processmsg_terminate_I();
+                               break;
+                       } // case TERMINATE
+
+                       case MEMREQUEST: {
+                               processmsg_memrequest_I();
+                               break;
+                       } // case MEMREQUEST
+
+                       case MEMRESPONSE: {
+                               processmsg_memresponse_I();
+                               break;
+                       } // case MEMRESPONSE
+
+#ifdef MULTICORE_GC
+                       // GC msgs
+                       case GCSTARTINIT: {
+                               processmsg_gcstartinit_I();
+                               break;
+                       } // case GCSTARTINIT
+
+                       case GCSTART: {
+                               // receive a start GC msg
+                               processmsg_gcstart_I();
+                               break;
+                       } // case GCSTART
+
+                       case GCSTARTCOMPACT: {
+                               // a compact phase start msg
+                               processmsg_gcstartcompact_I();
+                               break;
+                       } // case GCSTARTCOMPACT
+
+                       case GCSTARTFLUSH: {
+                               // received a flush phase start msg
+                               processmsg_gcstartflush_I();
+                               break;
+                       } // case GCSTARTFLUSH
+                       
+                       case GCFINISHINIT: {
+                               processmsg_gcfinishinit_I();
+                               break;
+                       } // case GCFINISHINIT
+
+                       case GCFINISHMARK: {
+                               processmsg_gcfinishmark_I();
+                               break;
+                       } // case GCFINISHMARK
+                       
+                       case GCFINISHCOMPACT: {
+                               // received a compact phase finish msg
+                               processmsg_gcfinishcompact_I();
+                               break;
+                       } // case GCFINISHCOMPACT
+
+                       case GCFINISHFLUSH: {
+                               processmsg_gcfinishflush_I();
+                               break;
+                       } // case GCFINISHFLUSH
+
+                       case GCFINISH: {
+                               // received a GC finish msg
+                               gcphase = FINISHPHASE;
+                               break;
+                       } // case GCFINISH
+
+                       case GCMARKCONFIRM: {
+                               // received a marked phase finish confirm request msg
+                               // all cores should do mark
+                               processmsg_gcmarkconfirm_I();
+                               break;
+                       } // case GCMARKCONFIRM
+
+                       case GCMARKREPORT: {
+                               processmsg_gcmarkreport_I();
+                               break;
+                       } // case GCMARKREPORT
+
+                       case GCMARKEDOBJ: {
+                               processmsg_gcmarkedobj_I();
+                               break;
+                       } // case GCMARKEDOBJ
+
+                       case GCMOVESTART: {
+                               // received a start moving objs msg
+                               processmsg_gcmovestart_I();
+                               break;
+                       } // case GCMOVESTART
+                       
+                       case GCMAPREQUEST: {
+                               // received a mapping info request msg
+                               processmsg_gcmaprequest_I();
+                               break;
+                       } // case GCMAPREQUEST
+
+                       case GCMAPINFO: {
+                               // received a mapping info response msg
+                               processmsg_gcmapinfo_I();
+                               break;
+                       } // case GCMAPINFO
+
+                       case GCLOBJREQUEST: {
+                               // received a large objs info request msg
+                               transferMarkResults_I();
+                               break;
+                       } // case GCLOBJREQUEST
+
+                       case GCLOBJINFO: {
+                               // received a large objs info response msg
+                               processmsg_gclobjinfo_I();
+                               break;
+                       } // case GCLOBJINFO
+                       
+                       case GCLOBJMAPPING: {
+                               // received a large obj mapping info msg
+                               processmsg_gclobjmapping_I();
+                               break;
+                       } // case GCLOBJMAPPING
+
+#endif // #ifdef MULTICORE_GC
+
+                       default:
+                               break;
+               } // switch(type)
+               //memset(msgdata, '\0', sizeof(int) * msgdataindex);
+               //msgdataindex = 0;
+               msglength = BAMBOO_MSG_BUF_LENGTH;
+               // TODO
+               //printf("++ msg: %x \n", type);
+               if(msgdataindex != msgdatalast) {
+                       // still have available msg
+                       goto processmsg;
+               }
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-       BAMBOO_DEBUGPRINT(0xe88d);
+               BAMBOO_DEBUGPRINT(0xe88d);
 #endif
 #endif
 
-       if(BAMBOO_MSG_AVAIL() != 0) {
-               goto msg;
-       }
+               // have new coming msg
+               if(BAMBOO_MSG_AVAIL() != 0) {
+                       goto msg;
+               }
+
 #ifdef PROFILE
 /*if(isInterrupt) {
                profileTaskEnd();
        }*/
 #endif
-       return (int)type;
-} else {
-       // not a whole msg
+               return (int)type;
+       } else {
+               // not a whole msg
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-       BAMBOO_DEBUGPRINT(0xe88e);
+               BAMBOO_DEBUGPRINT(0xe88e);
 #endif
 #endif
 #ifdef PROFILE
-/*  if(isInterrupt) {
-                 profileTaskEnd();
-               }*/
+       /*  if(isInterrupt) {
+                               profileTaskEnd();
+                       }*/
 #endif
     return -2;
   }