bug fix in multicore gc
[IRC.git] / Robust / src / Runtime / multicoregarbage.c
1 #ifdef MULTICORE_GC
2 #include "runtime.h"
3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9
10 extern int corenum;
11 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
12 extern int numqueues[][NUMCLASSES];
13
14 extern struct genhashtable * activetasks;
15 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
16 extern struct taskparamdescriptor *currtpd;
17
18 extern struct LockValue runtime_locks[MAXTASKPARAMS];
19 extern int runtime_locklen;
20
21 struct pointerblock {
22   void * ptrs[NUMPTRS];
23   struct pointerblock *next;
24 };
25
26 struct pointerblock *gchead=NULL;
27 int gcheadindex=0;
28 struct pointerblock *gctail=NULL;
29 int gctailindex=0;
30 struct pointerblock *gctail2=NULL;
31 int gctailindex2=0;
32 struct pointerblock *gcspare=NULL;
33
34 #define NUMLOBJPTRS 20
35
36 struct lobjpointerblock {
37   void * lobjs[NUMLOBJPTRS];
38         //void * dsts[NUMLOBJPTRS];
39         int lengths[NUMLOBJPTRS];
40         //void * origs[NUMLOBJPTRS];
41         int hosts[NUMLOBJPTRS];
42   struct lobjpointerblock *next;
43 };
44
45 struct lobjpointerblock *gclobjhead=NULL;
46 int gclobjheadindex=0;
47 struct lobjpointerblock *gclobjtail=NULL;
48 int gclobjtailindex=0;
49 struct lobjpointerblock *gclobjtail2=NULL;
50 int gclobjtailindex2=0;
51 struct lobjpointerblock *gclobjspare=NULL;
52
53 #ifdef GC_DEBUG
54 // dump whole mem in blocks
55 inline void dumpSMem() {
56         int block = 0;
57         int sblock = 0;
58         int j = 0;
59         int i = 0;
60         int coren = 0;
61         int x = 0;
62         int y = 0;
63         tprintf("Dump shared mem: \n");
64         // reserved blocks for sblocktbl
65         tprintf("++++ reserved sblocks ++++ \n");
66         for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
67                 tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
68             *((int *)(i)), *((int *)(i + 4)), 
69                                                 *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
70                                                 *((int *)(i + 4*4)), *((int *)(i + 4*5)), 
71                                                 *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
72                                                 *((int *)(i + 4*8)), *((int *)(i + 4*9)), 
73                                                 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
74                                                 *((int *)(i + 4*12)), *((int *)(i + 4*13)), 
75                                                 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
76         }
77         sblock = gcreservedsb;
78         bool advanceblock = false;
79         // remaining memory
80         for(i=gcbaseva;i<BAMBOO_BASE_VA+BAMBOO_SHARED_MEM_SIZE;i+=4*16){
81                 advanceblock = false;
82                 // computing sblock # and block #, core coordinate (x,y) also
83                 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
84                         // finished a sblock
85                         if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
86                                 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
87                                         // finished a block
88                                         block++;
89                                         advanceblock = true;
90                                 }
91                         } else {
92                                 // finished a block
93                                 block++;
94                                 advanceblock = true;
95                         }
96                         // compute core #
97                         if(advanceblock) {
98                                 coren = gc_block2core[block%(NUMCORES*2)];
99                         }
100                         // compute core coordinate
101                         int tmpcore = coren;
102                         if((NUMCORES==62) && (tmpcore > 5)) {
103                                 tmpcore+=2;
104                         }
105                         x = tmpcore/bamboo_width;
106                         y = tmpcore%bamboo_width;
107                         tprintf("==== %d, %d : core (%d,%d), saddr %x====\n", 
108                                             block, sblock++, x, y, 
109                                                         (sblock-1)*(BAMBOO_SMEM_SIZE)+BAMBOO_BASE_VA);
110                 }
111                 j++;
112     tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
113             *((int *)(i)), *((int *)(i + 4)), 
114                                                 *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
115                                                 *((int *)(i + 4*4)), *((int *)(i + 4*5)), 
116                                                 *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
117                                                 *((int *)(i + 4*8)), *((int *)(i + 4*9)), 
118                                                 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
119                                                 *((int *)(i + 4*12)), *((int *)(i + 4*13)), 
120                                                 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
121         }
122         tprintf("\n");
123 }
124 #endif
125
126 // should be invoked with interruption closed
127 inline void gc_enqueue_I(void *ptr) {
128 #ifdef DEBUG
129         BAMBOO_DEBUGPRINT(0xe601);
130         BAMBOO_DEBUGPRINT_REG(ptr);
131 #endif
132   if (gcheadindex==NUMPTRS) {
133     struct pointerblock * tmp;
134     if (gcspare!=NULL) {
135       tmp=gcspare;
136       gcspare=NULL;
137     } else {
138       tmp=RUNMALLOC_I(sizeof(struct pointerblock));
139                 } // if (gcspare!=NULL)
140     gchead->next=tmp;
141     gchead=tmp;
142     gcheadindex=0;
143   } // if (gcheadindex==NUMPTRS)
144   gchead->ptrs[gcheadindex++]=ptr;
145 #ifdef DEBUG
146         BAMBOO_DEBUGPRINT(0xe602);
147 #endif
148 } // void gc_enqueue_I(void *ptr)
149
150 // dequeue and destroy the queue
151 inline void * gc_dequeue() {
152   if (gctailindex==NUMPTRS) {
153     struct pointerblock *tmp=gctail;
154     gctail=gctail->next;
155     gctailindex=0;
156     if (gcspare!=NULL) {
157       RUNFREE(tmp);
158                 } else {
159       gcspare=tmp;
160                 } // if (gcspare!=NULL)
161   } // if (gctailindex==NUMPTRS)
162   return gctail->ptrs[gctailindex++];
163 } // void * gc_dequeue()
164
165 // dequeue and do not destroy the queue
166 inline void * gc_dequeue2() {
167         if (gctailindex2==NUMPTRS) {
168     struct pointerblock *tmp=gctail2;
169     gctail2=gctail2->next;
170     gctailindex2=0;
171   } // if (gctailindex2==NUMPTRS)
172   return gctail2->ptrs[gctailindex2++];
173 } // void * gc_dequeue2() 
174
175 inline int gc_moreItems() {
176   if ((gchead==gctail)&&(gctailindex==gcheadindex))
177     return 0;
178   return 1;
179 } // int gc_moreItems() 
180
181 inline int gc_moreItems2() {
182   if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
183     return 0;
184   return 1;
185 } // int gc_moreItems2()
186
187 // should be invoked with interruption closed
188 // enqueue a large obj: start addr & length
189 inline void gc_lobjenqueue_I(void *ptr, 
190                                          int length, 
191                                                                                          int host) {
192 #ifdef DEBUG
193         BAMBOO_DEBUGPRINT(0xe901);
194 #endif
195   if (gclobjheadindex==NUMLOBJPTRS) {
196     struct lobjpointerblock * tmp;
197     if (gclobjspare!=NULL) {
198       tmp=gclobjspare;
199       gclobjspare=NULL;
200     } else {
201       tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
202                 } // if (gclobjspare!=NULL)
203     gclobjhead->next=tmp;
204     gclobjhead=tmp;
205     gclobjheadindex=0;
206   } // if (gclobjheadindex==NUMLOBJPTRS)
207   gclobjhead->lobjs[gclobjheadindex]=ptr;
208         gclobjhead->lengths[gclobjheadindex]=length;
209         gclobjhead->hosts[gclobjheadindex++]=host;
210 #ifdef DEBUG
211         BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
212         BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
213         BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
214 #endif
215 } // void gc_lobjenqueue_I(void *ptr...)
216
217 // dequeue and destroy the queue
218 inline void * gc_lobjdequeue(int * length,
219                                          int * host) {
220   if (gclobjtailindex==NUMLOBJPTRS) {
221     struct lobjpointerblock *tmp=gclobjtail;
222     gclobjtail=gclobjtail->next;
223     gclobjtailindex=0;
224     if (gclobjspare!=NULL) {
225       RUNFREE(tmp);
226                 } else {
227       gclobjspare=tmp;
228                 } // if (gclobjspare!=NULL)
229   } // if (gclobjtailindex==NUMLOBJPTRS)
230         if(length != NULL) {
231                 *length = gclobjtail->lengths[gclobjtailindex];
232         }
233         if(host != NULL) {
234                 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
235         }
236   return gclobjtail->lobjs[gclobjtailindex++];
237 } // void * gc_lobjdequeue()
238
239 inline int gc_lobjmoreItems() {
240   if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
241     return 0;
242   return 1;
243 } // int gc_lobjmoreItems()
244
245 // dequeue and don't destroy the queue
246 inline void gc_lobjdequeue2() {
247   if (gclobjtailindex2==NUMLOBJPTRS) {
248     gclobjtail2=gclobjtail2->next;
249     gclobjtailindex2=1;
250   } else {
251                 gclobjtailindex2++;
252         }// if (gclobjtailindex2==NUMLOBJPTRS)
253 } // void * gc_lobjdequeue2()
254
255 inline int gc_lobjmoreItems2() {
256   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
257     return 0;
258   return 1;
259 } // int gc_lobjmoreItems2()
260
261 INTPTR gccurr_heapbound = 0;
262
263 inline void gettype_size(void * ptr, 
264                                      int * ttype, 
265                                                                          int * tsize) {
266         int type = ((int *)ptr)[0];
267         int size = 0;
268         if(type < NUMCLASSES) {
269                 // a normal object
270                 size = classsize[type];
271         } else {        
272                 // an array 
273                 struct ArrayObject *ao=(struct ArrayObject *)ptr;
274                 int elementsize=classsize[type];
275                 int length=ao->___length___; 
276                 size=sizeof(struct ArrayObject)+length*elementsize;
277         } // if(type < NUMCLASSES)
278         *ttype = type;
279         *tsize = size;
280 }
281
282 inline bool isLarge(void * ptr, 
283                                 int * ttype, 
284                                                                                 int * tsize) {
285 #ifdef DEBUG
286                 BAMBOO_DEBUGPRINT(0xe701);
287                 BAMBOO_DEBUGPRINT_REG(ptr);
288 #endif
289         // check if a pointer is referring to a large object
290         gettype_size(ptr, ttype, tsize);
291 #ifdef DEBUG
292         BAMBOO_DEBUGPRINT(*tsize);
293 #endif
294         int bound = (BAMBOO_SMEM_SIZE);
295         if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
296                 bound = (BAMBOO_SMEM_SIZE_L);
297         }
298         if((((int)ptr-gcbaseva)%(bound))==0) {
299                 // ptr is a start of a block
300 #ifdef DEBUG
301                 BAMBOO_DEBUGPRINT(0xe702);
302                 BAMBOO_DEBUGPRINT(1);
303 #endif
304                 return true;
305         }
306         if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
307                 // it acrosses the boundary of current block
308 #ifdef DEBUG
309                 BAMBOO_DEBUGPRINT(0xe703);
310                 BAMBOO_DEBUGPRINT(1);
311 #endif
312                 return true;
313         }
314 #ifdef DEBUG
315                 BAMBOO_DEBUGPRINT(0);
316 #endif
317         return false;
318 } // bool isLarge(void * ptr, int * ttype, int * tsize)
319
320 inline int hostcore(void * ptr) {
321         // check the host core of ptr
322         int host = 0;
323         RESIDECORE(ptr, &host);
324 #ifdef DEBUG
325         BAMBOO_DEBUGPRINT(0xedd0);
326         BAMBOO_DEBUGPRINT_REG(ptr);
327         BAMBOO_DEBUGPRINT_REG(host);
328 #endif
329         return host;
330 } // int hostcore(void * ptr)
331
332 inline bool isLocal(void * ptr) {
333         // check if a pointer is in shared heap on this core
334         return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
335 } // bool isLocal(void * ptr)
336
337 inline bool gc_checkCoreStatus() {
338         bool allStall = true;
339         for(int i = 0; i < NUMCORES; ++i) {
340                 if(gccorestatus[i] != 0) {
341                         allStall = false;
342                         break;
343                 } // if(gccorestatus[i] != 0)
344         } // for(i = 0; i < NUMCORES; ++i)
345         return allStall;
346 }
347
348 inline void checkMarkStatue() {
349 #ifdef DEBUG
350         BAMBOO_DEBUGPRINT(0xee01);
351 #endif
352         int i;
353         if((!waitconfirm) || 
354                         (waitconfirm && (numconfirm == 0))) {
355 #ifdef DEBUG
356                 BAMBOO_DEBUGPRINT(0xee02);
357 #endif
358                 BAMBOO_START_CRITICAL_SECTION_STATUS();  
359                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
360                 gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
361                 gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
362                 // check the status of all cores
363                 bool allStall = gc_checkCoreStatus();
364 #ifdef DEBUG
365                 BAMBOO_DEBUGPRINT(0xee03);
366 #endif
367                 if(allStall) {
368 #ifdef DEBUG
369                         BAMBOO_DEBUGPRINT(0xee04);
370 #endif
371                         // check if the sum of send objs and receive obj are the same
372                         // yes->check if the info is the latest; no->go on executing
373                         int sumsendobj = 0;
374                         for(i = 0; i < NUMCORES; ++i) {
375                                 sumsendobj += gcnumsendobjs[i];
376                         } // for(i = 0; i < NUMCORES; ++i) 
377 #ifdef DEBUG
378                         BAMBOO_DEBUGPRINT(0xee05);
379                         BAMBOO_DEBUGPRINT_REG(sumsendobj);
380 #endif
381                         for(i = 0; i < NUMCORES; ++i) {
382                                 sumsendobj -= gcnumreceiveobjs[i];
383                         } // for(i = 0; i < NUMCORES; ++i) 
384 #ifdef DEBUG
385                         BAMBOO_DEBUGPRINT(0xee06);
386                         BAMBOO_DEBUGPRINT_REG(sumsendobj);
387 #endif
388                         if(0 == sumsendobj) {
389 #ifdef DEBUG
390                                 BAMBOO_DEBUGPRINT(0xee07);
391 #endif
392                                 if(!waitconfirm) {
393 #ifdef DEBUG
394                                         BAMBOO_DEBUGPRINT(0xee08);
395 #endif
396                                         // the first time found all cores stall
397                                         // send out status confirm msg to all other cores
398                                         // reset the corestatus array too
399                                         gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
400                                         waitconfirm = true;
401                                         numconfirm = NUMCORES - 1;
402                                         for(i = 1; i < NUMCORES; ++i) { 
403                                                 gccorestatus[i] = 1;
404                                                 // send mark phase finish confirm request msg to core i
405                                                 send_msg_1(i, GCMARKCONFIRM);
406                                         } // for(i = 1; i < NUMCORES; ++i) 
407                                 } else {
408 #ifdef DEBUG
409                                         BAMBOO_DEBUGPRINT(0xee09);
410 #endif
411                                         // all the core status info are the latest
412                                         // stop mark phase
413                                         gcphase = COMPACTPHASE;
414                                         // restore the gcstatus for all cores
415                                         for(i = 0; i < NUMCORES; ++i) {
416                                                 gccorestatus[i] = 1;
417                                         } // for(i = 0; i < NUMCORES; ++i)
418                                 } // if(!gcwautconfirm) else()
419                         } // if(0 == sumsendobj)
420                 } // if(allStall)
421                 BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
422         } // if((!waitconfirm)...
423 #ifdef DEBUG
424         BAMBOO_DEBUGPRINT(0xee0a);
425 #endif
426 } // void checkMarkStatue()
427
428 inline bool preGC() {
429         // preparation for gc
430         // make sure to clear all incoming msgs espacially transfer obj msgs
431 #ifdef DEBUG
432         BAMBOO_DEBUGPRINT(0xec01);
433 #endif
434         int i;
435         if((!waitconfirm) || 
436                                                   (waitconfirm && (numconfirm == 0))) {
437                 // send out status confirm msgs to all cores to check if there are
438                 // transfer obj msgs on-the-fly
439                 waitconfirm = true;
440                 numconfirm = NUMCORES - 1;
441                 for(i = 1; i < NUMCORES; ++i) { 
442                         corestatus[i] = 1;
443                         // send status confirm msg to core i
444                         send_msg_1(i, STATUSCONFIRM);
445                 } // for(i = 1; i < NUMCORES; ++i)
446
447 #ifdef DEBUG
448                 BAMBOO_DEBUGPRINT(0xec02);
449 #endif
450                 while(true) {
451                         if(numconfirm == 0) {
452                                 break;
453                         }
454                 } // wait for confirmations
455                 waitconfirm = false;
456                 numconfirm = 0;
457 #ifdef DEBUG
458                 BAMBOO_DEBUGPRINT(0xec03);
459 #endif
460                 numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
461                 numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
462                 int sumsendobj = 0;
463 #ifdef DEBUG
464                 BAMBOO_DEBUGPRINT(0xec04);
465 #endif
466                 for(i = 0; i < NUMCORES; ++i) {
467                         sumsendobj += numsendobjs[i];
468 #ifdef DEBUG
469                         BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
470 #endif
471                 } // for(i = 1; i < NUMCORES; ++i)
472 #ifdef DEBUG
473         BAMBOO_DEBUGPRINT(0xec05);
474         BAMBOO_DEBUGPRINT_REG(sumsendobj);
475 #endif
476                 for(i = 0; i < NUMCORES; ++i) {
477                         sumsendobj -= numreceiveobjs[i];
478 #ifdef DEBUG
479                         BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
480 #endif
481                 } // for(i = 1; i < NUMCORES; ++i)
482 #ifdef DEBUG
483                 BAMBOO_DEBUGPRINT(0xec06);
484                 BAMBOO_DEBUGPRINT_REG(sumsendobj);
485 #endif
486                 if(0 == sumsendobj) {
487                         return true;
488                 } else {
489                         // still have some transfer obj msgs on-the-fly, can not start gc
490                         return false;
491                 } // if(0 == sumsendobj) 
492         } else {
493 #ifdef DEBUG
494                 BAMBOO_DEBUGPRINT(0xec07);
495 #endif
496                 // previously asked for status confirmation and do not have all the 
497                 // confirmations yet, can not start gc
498                 return false;
499         } // if((!waitconfirm) || 
500 } // bool preGC()
501
502 inline void initGC() {
503         int i;
504         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
505                 for(i = 0; i < NUMCORES; ++i) {
506                         gccorestatus[i] = 1;
507                         gcnumsendobjs[i] = 0; 
508                         gcnumreceiveobjs[i] = 0;
509                         gcloads[i] = 0;
510                         gcrequiredmems[i] = 0;
511                         gcfilledblocks[i] = 0;
512                         gcstopblock[i] = 0;
513                 } // for(i = 0; i < NUMCORES; ++i)
514                 gcheaptop = 0;
515                 gctopcore = 0;
516                 gctopblock = 0;
517         }
518         gcself_numsendobjs = 0;
519         gcself_numreceiveobjs = 0;
520         gcmarkedptrbound = 0;
521         gcobj2map = 0;
522         gcmappedobj = 0;
523         gcismapped = false;
524         gcnumlobjs = 0;
525         gcmovestartaddr = 0;
526         gctomove = false;
527         gcblock2fill = 0;
528         gcmovepending = 0;
529         gccurr_heaptop = 0;
530         gcdstcore = 0;
531
532         // initialize queue
533         if (gchead==NULL) {
534                 gcheadindex=gctailindex=gctailindex2 = 0;
535                 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
536         } else {
537                 gctailindex = gctailindex2 = gcheadindex;
538                 gctail = gctail2 = gchead;
539         }
540
541         // initialize the large obj queues
542         if (gclobjhead==NULL) {
543                 gclobjheadindex=0;
544                 gclobjtailindex=0;
545                 gclobjtailindex2 = 0;
546                 gclobjhead=gclobjtail=gclobjtail2=
547                         RUNMALLOC(sizeof(struct lobjpointerblock));
548         } else {
549                 gclobjtailindex = gclobjtailindex2 = gclobjheadindex;
550                 gclobjtail = gclobjtail2 = gclobjhead;
551         }
552
553         freeRuntimeHash(gcpointertbl);
554         gcpointertbl = allocateRuntimeHash(20);
555
556         memset(gcsmemtbl, '\0', sizeof(int)*gcnumblock);
557 } // void initGC()
558
559 // compute load balance for all cores
560 inline int loadbalance() {
561         // compute load balance
562         int i;
563
564         // get the total loads
565         int tloads = gcloads[STARTUPCORE];
566         for(i = 1; i < NUMCORES; i++) {
567                 tloads += gcloads[i];
568         }
569         int heaptop = gcbaseva + tloads;
570 #ifdef DEBUG
571         BAMBOO_DEBUGPRINT(0xdddd);
572         BAMBOO_DEBUGPRINT_REG(tloads);
573         BAMBOO_DEBUGPRINT_REG(heaptop);
574 #endif
575         int b = 0;
576         BLOCKINDEX(heaptop, &b);
577         int numbpc = b / NUMCORES; // num of blocks per core
578 #ifdef DEBUG
579         BAMBOO_DEBUGPRINT_REG(b);
580         BAMBOO_DEBUGPRINT_REG(numbpc);
581 #endif
582         gctopblock = b;
583         RESIDECORE(heaptop, &gctopcore);
584 #ifdef DEBUG
585         BAMBOO_DEBUGPRINT_REG(gctopcore);
586 #endif
587         return numbpc;
588 } // void loadbalance()
589
590 inline bool cacheLObjs() {
591         // check the total mem size need for large objs
592         int sumsize = 0;
593         int size = 0;
594 #ifdef DEBUG
595         BAMBOO_DEBUGPRINT(0xe801);
596 #endif
597         gclobjtail2 = gclobjtail;
598         gclobjtailindex2 = gclobjtailindex;
599         while(gc_lobjmoreItems2()){
600                 gc_lobjdequeue2();
601                 size = gclobjtail2->lengths[gclobjtailindex2 - 1];
602                 sumsize += size;
603 #ifdef DEBUG
604                 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
605                 BAMBOO_DEBUGPRINT_REG(size);
606                 BAMBOO_DEBUGPRINT_REG(sumsize);
607 #endif
608         } // while(gc_lobjmoreItems2())
609
610         // check if there are enough space to cache these large objs
611         INTPTR dst = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) - sumsize;
612         if(gcheaptop > dst) {
613                 // do not have enough room to cache large objs
614 #ifdef DEBUG
615         BAMBOO_DEBUGPRINT(0xe802);
616         BAMBOO_DEBUGPRINT_REG(dst);
617         BAMBOO_DEBUGPRINT_REG(gcheaptop);
618 #endif
619                 return false;
620         }
621 #ifdef DEBUG
622         BAMBOO_DEBUGPRINT(0xe803);
623         BAMBOO_DEBUGPRINT_REG(dst);
624         BAMBOO_DEBUGPRINT_REG(gcheaptop);
625 #endif
626
627         gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
628         // cache the largeObjs to the top of the shared heap
629         gclobjtail2 = gclobjtail;
630         gclobjtailindex2 = gclobjtailindex;
631         while(gc_lobjmoreItems2()) {
632                 gc_lobjdequeue2();
633                 size = gclobjtail2->lengths[gclobjtailindex2 - 1];
634                 // set the mark field to 2, indicating that this obj has been moved and 
635                 // need to be flushed
636                 ((int *)(gclobjtail2->lobjs[gclobjtailindex2-1]))[6] = 2;
637                 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2 - 1], size);
638                 dst += size;
639 #ifdef DEBUG 
640                 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
641                 BAMBOO_DEBUGPRINT(dst-size);
642                 BAMBOO_DEBUGPRINT_REG(size);
643 #endif
644         }
645         return true;
646 } // void cacheLObjs()
647
648 // NOTE: the free mem chunks should be maintained in an ordered linklist
649 // the listtop param always specify current list tail
650
651 // update the gcsmemtbl to record current shared mem usage
652 void updateSmemTbl(int coren,
653                                int localtop) {
654         int ltopcore = 0;
655         int bound = BAMBOO_SMEM_SIZE_L;
656         BLOCKINDEX(localtop, &ltopcore);
657         if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
658                 bound = BAMBOO_SMEM_SIZE;
659         }
660         int load = (localtop-gcbaseva)%bound;
661         int i = 0;
662         int j = 0;
663         int toset = 0;
664         do{
665                 toset = gc_core2block[2*coren+i]+(NUMCORES*2)*j;
666                 if(toset < ltopcore) {
667                         gcsmemtbl[toset] = (toset<NUMCORES)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
668                 } else if(toset == ltopcore) {
669                         gcsmemtbl[toset] = load;
670                         break;
671                 } else {
672                         break;
673                 }
674                 i++;
675                 if(i == 2) {
676                         i = 0;
677                         j++;
678                 }
679         }while(true);
680 } // void updateSmemTbl(int, int)
681
682 inline struct freeMemItem * addFreeMemItem(int ptr,
683                                                        int size,
684                                                                                                                                                                          struct freeMemItem * listtail,
685                                                                                                                                                                          bool* sethead) {
686         struct freeMemItem * tochange = listtail;
687         if(*sethead) {
688                 if(tochange->next == NULL) {
689                         tochange->next = 
690                                 (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
691                 } // if(tochange->next == NULL)
692                 tochange = tochange->next;
693         } else {
694                 *sethead = true;
695         } // if(sethead)
696         tochange->ptr = ptr;
697         tochange->size = size;
698         BLOCKINDEX(ptr, &(tochange->startblock));
699         BLOCKINDEX(ptr+size-1, &(tochange->endblock));
700         // zero out all these spare memory
701         // note that, leave the mem starting from heaptop, as it caches large objs
702         // zero out these cache later when moving large obj
703         memset(tochange->ptr, '\0', tochange->size);
704         return tochange;
705 } // struct freeMemItem * addFreeMemItem(int,int,struct freeMemItem*,bool*, int)
706
707 inline void moveLObjs() {
708 #ifdef DEBUG
709         BAMBOO_DEBUGPRINT(0xea01);
710 #endif
711         // find current heap top
712         // flush all gcloads to indicate the real heap top on one core
713         // previous it represents the next available ptr on a core
714         if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L))) 
715                         && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
716                 // edge of a block, check if this is exactly the heaptop
717                 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
718                 gcloads[0]+=(gcfilledblocks[0]>1?
719                                 (BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
720         } 
721         updateSmemTbl(0, gcloads[0]);
722 #ifdef DEBUG
723   BAMBOO_DEBUGPRINT(0xea02);
724         BAMBOO_DEBUGPRINT_REG(gcloads[0]);
725         BAMBOO_DEBUGPRINT_REG(gcsmemtbl[0]);
726 #endif
727         for(int i = 1; i < NUMCORES; i++) {
728                 int tmptop = 0;
729 #ifdef DEBUG
730                 BAMBOO_DEBUGPRINT(0xf000+i);
731                 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
732                 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
733 #endif
734                 if((gcfilledblocks[i] > 0) 
735                                 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
736                         // edge of a block, check if this is exactly the heaptop
737                         BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
738                         gcloads[i]
739                                 +=(gcfilledblocks[i]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
740                         tmptop = gcloads[i];
741                 } 
742                 updateSmemTbl(i, gcloads[i]);
743 #ifdef DEBUG
744                 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
745 #endif
746         } // for(int i = 1; i < NUMCORES; i++) {
747
748         // find current heap top
749         // TODO
750         // a bug here: when using local allocation, directly move large objects
751         // to the highest free chunk might not be memory efficient
752         int tmpheaptop = 0;
753         int size = 0;
754         int bound = 0;
755         int i = 0;
756         for(i = gcnumblock-1; i >= 0; i--) {
757                 if(gcsmemtbl[i] > 0) {
758                         break;
759                 }
760         }
761         if(i == -1) {
762                 tmpheaptop = gcbaseva;
763         } else {
764                 tmpheaptop = gcbaseva+gcsmemtbl[i]+((i<NUMCORES)?(BAMBOO_SMEM_SIZE_L*i):
765                                 (BAMBOO_SMEM_SIZE*(i-NUMCORES)+BAMBOO_LARGE_SMEM_BOUND));
766         }
767         // move large objs from gcheaptop to tmpheaptop
768         // write the header first
769         int tomove = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) - gcheaptop;
770 #ifdef DEBUG
771         BAMBOO_DEBUGPRINT(0xea03);
772         BAMBOO_DEBUGPRINT_REG(tomove);
773         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
774         BAMBOO_DEBUGPRINT_REG(gcheaptop);
775 #endif
776         // flush the sbstartbl
777         memset(&(gcsbstarttbl[gcreservedsb]), '\0', 
778                            BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE*sizeof(INTPTR));
779         if(tomove == 0) {
780                 gcheaptop = tmpheaptop;
781         } else {
782                 // check how many blocks it acrosses
783                 int remain = tmpheaptop-gcbaseva;
784                 int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb; // number of the sblock
785                 int b = 0; // number of the block
786                 BLOCKINDEX(tmpheaptop, &b);
787                 // check the remaining space in this block
788                 bound = (BAMBOO_SMEM_SIZE);
789                 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
790                         bound = (BAMBOO_SMEM_SIZE_L);
791                 }
792                 remain = bound - remain%bound;
793
794 #ifdef DEBUG
795                 BAMBOO_DEBUGPRINT(0xea04);
796 #endif
797                 size = 0;
798                 int isize = 0;
799                 int host = 0;
800                 int ptr = 0;
801                 int base = tmpheaptop;
802                 int cpysize = 0;
803                 remain -= BAMBOO_CACHE_LINE_SIZE;
804                 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
805                 while(gc_lobjmoreItems()) {
806                         ptr = (int)(gc_lobjdequeue(&size, &host));
807                         ALIGNSIZE(size, &isize);
808                         if(remain < isize) {
809                                 // this object acrosses blocks
810                                 if(cpysize > 0) {
811                                         // close current block, fill its header
812                                         memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
813                                         *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
814                                         gcsmemtbl[b] = cpysize + BAMBOO_CACHE_LINE_SIZE;
815                                         cpysize = 0;
816                                         base = tmpheaptop;
817                                         if(remain == 0) {
818                                                 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
819                                                                                  BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
820                                         } 
821                                         remain -= BAMBOO_CACHE_LINE_SIZE;
822                                         tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
823                                         BLOCKINDEX(tmpheaptop, &b);
824                                         sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
825                                 } // if(cpysize > 0)
826
827                                 // move the large obj
828                                 memcpy(tmpheaptop, gcheaptop, size);
829                                 // fill the remaining space with -2 padding
830                                 memset(tmpheaptop+size, -2, isize-size);
831                                 // zero out original mem caching the lobj
832                                 memset(gcheaptop, '\0', size);
833 #ifdef DEBUG
834                                 BAMBOO_DEBUGPRINT(0xea05);
835                                 BAMBOO_DEBUGPRINT_REG(gcheaptop);
836                                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
837                                 BAMBOO_DEBUGPRINT_REG(size);
838                                 BAMBOO_DEBUGPRINT_REG(isize);
839                                 BAMBOO_DEBUGPRINT_REG(base);
840 #endif
841                                 gcheaptop += size;
842                                 if(host == BAMBOO_NUM_OF_CORE) {
843                                         BAMBOO_START_CRITICAL_SECTION();
844                                         RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
845                                         BAMBOO_CLOSE_CRITICAL_SECTION();
846 #ifdef DEBUG
847                                         BAMBOO_DEBUGPRINT(0xcdca);
848                                         BAMBOO_DEBUGPRINT_REG(ptr);
849                                         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
850 #endif
851                                 } else {
852                                         // send the original host core with the mapping info
853                                         send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
854 #ifdef DEBUG
855                                         BAMBOO_DEBUGPRINT(0xcdcb);
856                                         BAMBOO_DEBUGPRINT_REG(ptr);
857                                         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
858 #endif
859                                 } // if(host == BAMBOO_NUM_OF_CORE) else ...
860                                 tmpheaptop += isize;
861
862                                 // set the gcsbstarttbl and gcsmemtbl
863                                 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
864                                 for(int k = 1; k < tmpsbs; k++) {
865                                         gcsbstarttbl[sb+k] = (INTPTR)(-1);
866                                 }
867                                 sb += tmpsbs;
868                                 bound = (b<NUMCORES)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
869                                 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
870                                 for(; b < tmpsbs; b++) {
871                                         gcsmemtbl[b] = bound;
872                                         if(b==NUMCORES-1) {
873                                                 bound = BAMBOO_SMEM_SIZE;
874                                         }
875                                 }
876                                 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
877                                         gcsbstarttbl[sb] = (INTPTR)(-1);
878                                         remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
879                                                                          BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
880                                         gcsmemtbl[b] = bound;
881                                 } else {
882                                         gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
883                                         remain = tmpheaptop-gcbaseva;
884                                         gcsmemtbl[b] = remain%bound;
885                                         remain = bound - gcsmemtbl[b];
886                                 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
887
888                                 // close current block and fill the header
889                                 memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
890                                 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
891                                 cpysize = 0;
892                                 base = tmpheaptop;
893                                 remain -= BAMBOO_CACHE_LINE_SIZE;
894                                 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
895                         } else {
896                                 remain -= isize;
897                                 // move the large obj
898                                 memcpy(tmpheaptop, gcheaptop, size);
899                                 // fill the remaining space with -2 padding
900                                 memset(tmpheaptop+size, -2, isize-size);
901                                 // zero out original mem caching the lobj
902                                 memset(gcheaptop, '\0', size);
903 #ifdef DEBUG
904                                 BAMBOO_DEBUGPRINT(0xea06);
905                                 BAMBOO_DEBUGPRINT_REG(gcheaptop);
906                                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
907                                 BAMBOO_DEBUGPRINT_REG(size);
908                                 BAMBOO_DEBUGPRINT_REG(isize);
909 #endif
910                                 gcheaptop += size;
911                                 cpysize += isize;
912                                 if(host == BAMBOO_NUM_OF_CORE) {
913                                         BAMBOO_START_CRITICAL_SECTION();
914                                         RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
915                                         BAMBOO_CLOSE_CRITICAL_SECTION();
916 #ifdef DEBUG
917                                         BAMBOO_DEBUGPRINT(0xcdcc);
918                                         BAMBOO_DEBUGPRINT_REG(ptr);
919                                         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
920 #endif
921                                 } else {
922                                         // send the original host core with the mapping info
923                                         send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
924 #ifdef DEBUG
925                                         BAMBOO_DEBUGPRINT(0xcdcd);
926                                         BAMBOO_DEBUGPRINT_REG(ptr);
927                                         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
928 #endif
929                                 } // if(host == BAMBOO_NUM_OF_CORE) else ...
930                                 tmpheaptop += isize;
931
932                                 // update gcsmemtbl
933                                 if(gcsmemtbl[b] == 0) {
934                                         // add the header's size
935                                         gcsmemtbl[b] = BAMBOO_CACHE_LINE_SIZE;
936                                 }
937                                 gcsmemtbl[b] += isize;
938                         } // if(remain < isize) else ...
939                 } // while(gc_lobjmoreItems())
940                 if(cpysize > 0) {
941                         // close current block, fill the header
942                         memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
943                         *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
944                         gcsmemtbl[b] = cpysize + BAMBOO_CACHE_LINE_SIZE;
945                 } else {
946                         tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
947                 }
948                 gcheaptop = tmpheaptop;
949         } // if(tomove == 0)
950
951 #ifdef DEBUG
952         BAMBOO_DEBUGPRINT(0xea07);
953         BAMBOO_DEBUGPRINT_REG(gcheaptop);
954 #endif
955
956         // update the free mem list
957         // create new free mem list according to gcsmemtbl
958         bool sethead = false;
959         struct freeMemItem * tochange = bamboo_free_mem_list->head;
960         if(tochange == NULL) {
961                 bamboo_free_mem_list->head = tochange = 
962                         (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
963                 tochange->next = NULL;
964         }
965         int startptr = 0;
966         size = 0;
967         bound = BAMBOO_SMEM_SIZE_L;
968         for(i = 0; i < gcnumblock; i++) {
969                 if(gcsmemtbl[i] < bound) {
970                         if(gcsmemtbl[i] == 0) {
971                                 // blank one
972                                 if(startptr == 0) {
973                                         // a start of a new free mem chunk
974                                         startptr = gcbaseva+((i<NUMCORES)?(i*BAMBOO_SMEM_SIZE_L)
975                                                         :(BAMBOO_LARGE_SMEM_BOUND+(i-NUMCORES)*BAMBOO_SMEM_SIZE));
976                                 } // if(startptr == 0) 
977                                 size += bound;
978                         } else {
979                                 if(startptr != 0) {
980                                         // the end of previous free mem chunk
981                                         tochange = addFreeMemItem(startptr,size,tochange,&sethead);
982                                         //startptr = 0;
983                                         //size = 0;
984                                 }
985                                 // start of a new free mem chunk
986                                 startptr = gcbaseva+((i<NUMCORES)?(i*BAMBOO_SMEM_SIZE_L)
987                                                 :(BAMBOO_LARGE_SMEM_BOUND+(i-NUMCORES)*BAMBOO_SMEM_SIZE))+gcsmemtbl[i];
988                                 size = bound-gcsmemtbl[i];
989                         } // if(gcsmemtbl[i] == 0) else
990                 } else {
991                         if(startptr != 0) {
992                                 // the end of previous free mem chunk
993                                 tochange = addFreeMemItem(startptr,size,tochange,&sethead);
994                                 startptr = 0;
995                                 size = 0;
996                         } // if(startptr != 0) {
997                 } // if(gcsmemtbl[i] < bound) else
998                 if(i == NUMCORES-1) {
999                         bound = BAMBOO_SMEM_SIZE;
1000                 }
1001         } // for(i = 0; i < gcnumblock; i++) {
1002         if(startptr != 0) {
1003                 tochange = addFreeMemItem(startptr, size, tochange, &sethead);
1004                 startptr = 0;
1005                 size = 0;
1006         }
1007
1008 #ifdef DEBUG
1009         BAMBOO_DEBUGPRINT(0xea08);
1010         BAMBOO_DEBUGPRINT_REG(gcheaptop);
1011 #endif
1012 } // void moveLObjs()
1013
1014 // enqueue root objs
1015 inline void tomark(struct garbagelist * stackptr) {
1016         if(MARKPHASE != gcphase) {
1017 #ifdef DEBUG
1018                 BAMBOO_DEBUGPRINT_REG(gcphase);
1019 #endif
1020                 BAMBOO_EXIT(0xb101);
1021         }
1022         gcbusystatus = true;
1023         gcnumlobjs = 0;
1024         
1025         int i,j;
1026         // enqueue current stack 
1027         while(stackptr!=NULL) {
1028 #ifdef DEBUG
1029                 BAMBOO_DEBUGPRINT(0xe501);
1030                 BAMBOO_DEBUGPRINT_REG(stackptr->size);
1031                 BAMBOO_DEBUGPRINT_REG(stackptr->next);
1032                 BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1033 #endif
1034                 for(i=0; i<stackptr->size; i++) {
1035                         if(stackptr->array[i] != NULL) {
1036                                 BAMBOO_START_CRITICAL_SECTION();
1037                                 gc_enqueue_I(stackptr->array[i]);
1038                                 BAMBOO_CLOSE_CRITICAL_SECTION();
1039                         }
1040                 }
1041                 stackptr=stackptr->next;
1042         }
1043
1044 #ifdef DEBUG
1045         BAMBOO_DEBUGPRINT(0xe503);
1046 #endif
1047         // enqueue objectsets
1048         for(i=0; i<NUMCLASSES; i++) {
1049                 struct parameterwrapper ** queues = 
1050                         objectqueues[BAMBOO_NUM_OF_CORE][i];
1051                 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1052                 for(j = 0; j < length; ++j) {
1053                         struct parameterwrapper * parameter = queues[j];
1054                         struct ObjectHash * set=parameter->objectset;
1055                         struct ObjectNode * ptr=set->listhead;
1056                         while(ptr!=NULL) {
1057                                 BAMBOO_START_CRITICAL_SECTION();
1058                                 gc_enqueue_I((void *)ptr->key);
1059                                 BAMBOO_CLOSE_CRITICAL_SECTION();
1060                                 ptr=ptr->lnext;
1061                         }
1062                 }
1063         }
1064
1065         // euqueue current task descriptor
1066         if(currtpd != NULL) {
1067 #ifdef DEBUG
1068                 BAMBOO_DEBUGPRINT(0xe504);
1069 #endif
1070                 for(i=0; i<currtpd->numParameters; i++) {
1071                         BAMBOO_START_CRITICAL_SECTION();
1072                         gc_enqueue_I(currtpd->parameterArray[i]);
1073                         BAMBOO_CLOSE_CRITICAL_SECTION();
1074                 }
1075         }
1076
1077 #ifdef DEBUG
1078         BAMBOO_DEBUGPRINT(0xe505);
1079 #endif
1080         // euqueue active tasks
1081         struct genpointerlist * ptr=activetasks->list;
1082         while(ptr!=NULL) {
1083                 struct taskparamdescriptor *tpd=ptr->src;
1084                 int i;
1085                 for(i=0; i<tpd->numParameters; i++) {
1086                         BAMBOO_START_CRITICAL_SECTION();
1087                         gc_enqueue_I(tpd->parameterArray[i]);
1088                         BAMBOO_CLOSE_CRITICAL_SECTION();
1089                 }
1090                 ptr=ptr->inext;
1091         }
1092
1093 #ifdef DEBUG
1094         BAMBOO_DEBUGPRINT(0xe506);
1095 #endif
1096         // enqueue cached transferred obj
1097         struct QueueItem * tmpobjptr =  getHead(&objqueue);
1098         while(tmpobjptr != NULL) {
1099                 struct transObjInfo * objInfo = 
1100                         (struct transObjInfo *)(tmpobjptr->objectptr); 
1101                 BAMBOO_START_CRITICAL_SECTION();
1102                 gc_enqueue_I(objInfo->objptr);
1103                 BAMBOO_CLOSE_CRITICAL_SECTION();
1104                 tmpobjptr = getNextQueueItem(tmpobjptr);
1105         }
1106
1107 #ifdef DEBUG
1108         BAMBOO_DEBUGPRINT(0xe507);
1109 #endif
1110         // enqueue cached objs to be transferred
1111         struct QueueItem * item = getHead(totransobjqueue);
1112         while(item != NULL) {
1113                 struct transObjInfo * totransobj = 
1114                         (struct transObjInfo *)(item->objectptr);
1115                 BAMBOO_START_CRITICAL_SECTION();
1116                 gc_enqueue_I(totransobj->objptr);
1117                 BAMBOO_CLOSE_CRITICAL_SECTION();
1118                 item = getNextQueueItem(item);
1119         } // while(item != NULL)
1120
1121 #ifdef DEBUG
1122         BAMBOO_DEBUGPRINT(0xe508);
1123 #endif
1124         // enqueue lock related info
1125         for(i = 0; i < runtime_locklen; ++i) {
1126          gc_enqueue_I((void *)(runtime_locks[i].redirectlock));
1127          if(runtime_locks[i].value != NULL) {
1128                  gc_enqueue_I((void *)(runtime_locks[i].value));
1129          }
1130         }
1131
1132 } // void tomark(struct garbagelist * stackptr)
1133
1134 inline void markObj(void * objptr) {
1135         if(objptr == NULL) {
1136                 return;
1137         }
1138         if(ISSHAREDOBJ(objptr)) {
1139                 int host = hostcore(objptr);
1140                 if(BAMBOO_NUM_OF_CORE == host) {
1141                         // on this core
1142                         BAMBOO_START_CRITICAL_SECTION();
1143                         gc_enqueue_I(objptr);  
1144                         BAMBOO_CLOSE_CRITICAL_SECTION();
1145                 } else {
1146 #ifdef DEBUG
1147                         BAMBOO_DEBUGPRINT(0xbbbb);
1148                         BAMBOO_DEBUGPRINT_REG(host);
1149                         BAMBOO_DEBUGPRINT_REG(objptr);
1150 #endif
1151                         // send a msg to host informing that objptr is active
1152                         send_msg_2(host, GCMARKEDOBJ, objptr);
1153                         gcself_numsendobjs++;
1154                 }
1155         } else {
1156                 BAMBOO_START_CRITICAL_SECTION();
1157                 gc_enqueue_I(objptr);
1158                 BAMBOO_CLOSE_CRITICAL_SECTION();
1159         } // if(ISSHAREDOBJ(objptr))
1160 } // void markObj(void * objptr) 
1161
1162 inline void mark(bool isfirst, 
1163                              struct garbagelist * stackptr) {
1164 #ifdef DEBUG
1165         BAMBOO_DEBUGPRINT(0xed01);
1166 #endif
1167         if(isfirst) {
1168 #ifdef DEBUG
1169                 BAMBOO_DEBUGPRINT(0xed02);
1170 #endif
1171                 // enqueue root objs
1172                 tomark(stackptr);
1173                 gccurr_heaptop = 0; // record the size of all active objs in this core
1174                                   // aligned but does not consider block boundaries
1175                 gcmarkedptrbound = 0;
1176         }
1177 #ifdef DEBUG
1178         BAMBOO_DEBUGPRINT(0xed03);
1179 #endif
1180         int isize = 0;
1181         bool checkfield = true;
1182         bool sendStall = false;
1183         // mark phase
1184         while(MARKPHASE == gcphase) {
1185 #ifdef DEBUG 
1186                 BAMBOO_DEBUGPRINT(0xed04);
1187 #endif
1188                 while(gc_moreItems2()) {
1189 #ifdef DEBUG 
1190                         BAMBOO_DEBUGPRINT(0xed05);
1191 #endif
1192                         sendStall = false;
1193                         gcbusystatus = true;
1194                         checkfield = true;
1195                         void * ptr = gc_dequeue2();
1196 #ifdef DEBUG 
1197                         BAMBOO_DEBUGPRINT_REG(ptr);
1198 #endif
1199                         int size = 0;
1200                         int isize = 0;
1201                         int type = 0;
1202                         // check if it is a shared obj
1203                         if(ISSHAREDOBJ(ptr)) {
1204                                 // a shared obj, check if it is a local obj on this core
1205                                 if(isLarge(ptr, &type, &size)) {
1206                                         // ptr is a large object
1207                                         if(((int *)ptr)[6] == 0) {
1208                                                 // not marked and not enqueued
1209 #ifdef DEBUG
1210                                                 BAMBOO_DEBUGPRINT(0xecec);
1211                                                 BAMBOO_DEBUGPRINT_REG(ptr);
1212 #endif
1213                                                 BAMBOO_START_CRITICAL_SECTION();
1214                                                 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1215                                                 gcnumlobjs++;
1216                                                 BAMBOO_CLOSE_CRITICAL_SECTION();
1217                                                 // mark this obj
1218                                                 ((int *)ptr)[6] = 1;
1219                                         } // if(((int *)ptr)[6] == 0)
1220                                 } else {
1221                                         bool islocal = isLocal(ptr);
1222                                         if (islocal && (((int *)ptr)[6] == 0)) {
1223                                                 // ptr is an unmarked active object on this core
1224                                                 ALIGNSIZE(size, &isize);
1225                                                 gccurr_heaptop += isize;
1226 #ifdef DEBUG
1227                                                 BAMBOO_DEBUGPRINT(0xaaaa);
1228                                                 BAMBOO_DEBUGPRINT_REG(ptr);
1229                                                 BAMBOO_DEBUGPRINT_REG(isize);
1230                                                 BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1231 #endif
1232                                                 // mark this obj
1233                                                 ((int *)ptr)[6] = 1;
1234                                                 if(ptr + size > gcmarkedptrbound) {
1235                                                         gcmarkedptrbound = ptr + size;
1236                                                 } // if(ptr + size > gcmarkedptrbound)
1237                                         } else if (!islocal /*&& (((int *)ptr)[6] == 0)*/) {
1238                                                 int host = hostcore(ptr);
1239 #ifdef DEBUG
1240                                                 BAMBOO_DEBUGPRINT(0xbbbb);
1241                                                 BAMBOO_DEBUGPRINT_REG(host);
1242                                                 BAMBOO_DEBUGPRINT_REG(ptr);
1243 #endif
1244                                                 // send a msg to host informing that ptr is active
1245                                                 send_msg_2(host, GCMARKEDOBJ, ptr);
1246                                                 gcself_numsendobjs++;
1247                                                 checkfield = false;
1248                                         }// if(isLocal(ptr)) else ...
1249                                 } // if(isLarge(ptr, &type, &size)) else ...
1250                         } // if(ISSHAREDOBJ(ptr))
1251 #ifdef DEBUG 
1252                         BAMBOO_DEBUGPRINT(0xed06);
1253 #endif
1254
1255                         if(checkfield) {
1256                                 // scan all pointers in ptr
1257                                 unsigned INTPTR * pointer;
1258                                 pointer=pointerarray[type];
1259                                 if (pointer==0) {
1260                                         /* Array of primitives */
1261                                         /* Do nothing */
1262                                 } else if (((INTPTR)pointer)==1) {
1263                                         /* Array of pointers */
1264                                         struct ArrayObject *ao=(struct ArrayObject *) ptr;
1265                                         int length=ao->___length___;
1266                                         int j;
1267                                         for(j=0; j<length; j++) {
1268                                                 void *objptr = 
1269                                                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1270                                                 markObj(objptr);
1271                                         }
1272                                 } else {
1273                                         INTPTR size=pointer[0];
1274                                         int i;
1275                                         for(i=1; i<=size; i++) {
1276                                                 unsigned int offset=pointer[i];
1277                                                 void * objptr=*((void **)(((char *)ptr)+offset));
1278                                                 markObj(objptr);
1279                                         }
1280                                 } // if (pointer==0) else if ... else ...
1281                         } // if(checkfield)
1282                 } // while(gc_moreItems2())
1283 #ifdef DEBUG
1284                 BAMBOO_DEBUGPRINT(0xed07);
1285 #endif
1286                 gcbusystatus = false;
1287                 // send mark finish msg to core coordinator
1288                 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1289 #ifdef DEBUG
1290                         BAMBOO_DEBUGPRINT(0xed08);
1291 #endif
1292                         gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1293                         gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
1294                         gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
1295                         gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1296                 } else {
1297                         if(!sendStall) {
1298 #ifdef DEBUG
1299                                 BAMBOO_DEBUGPRINT(0xed09);
1300 #endif
1301                                 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1302                                                                          gcself_numsendobjs, gcself_numreceiveobjs);
1303                                 sendStall = true;
1304                         }
1305                 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1306 #ifdef DEBUG 
1307                 BAMBOO_DEBUGPRINT(0xed0a);
1308 #endif
1309
1310                 if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1311 #ifdef DEBUG
1312                         BAMBOO_DEBUGPRINT(0xed0b);
1313 #endif
1314                         return;
1315                 }
1316         } // while(MARKPHASE == gcphase)
1317 } // mark()
1318
1319 inline void compact2Heaptophelper(int coren,
1320                                               int* p,
1321                                                                                                                                         int* numblocks,
1322                                                                                                                                         int* remain) {
1323         int b;
1324         int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1325         if(STARTUPCORE == coren) {
1326                 gctomove = true;
1327                 gcmovestartaddr = *p;
1328                 gcdstcore = gctopcore;
1329                 gcblock2fill = *numblocks + 1;
1330         } else {
1331                 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1);
1332         }
1333 #ifdef DEBUG
1334         BAMBOO_DEBUGPRINT_REG(coren);
1335         BAMBOO_DEBUGPRINT_REG(gctopcore);
1336         BAMBOO_DEBUGPRINT_REG(*p);
1337         BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1338 #endif
1339         if(memneed < *remain) {
1340 #ifdef DEBUG
1341                 BAMBOO_DEBUGPRINT(0xd104);
1342 #endif
1343                 *p = *p + memneed;
1344                 gcrequiredmems[coren] = 0;
1345                 gcloads[gctopcore] += memneed;
1346                 *remain = *remain - memneed;
1347         } else {
1348 #ifdef DEBUG
1349                 BAMBOO_DEBUGPRINT(0xd105);
1350 #endif
1351                 // next available block
1352                 *p = *p + *remain;
1353                 gcfilledblocks[gctopcore] += 1;
1354                 int newbase = 0;
1355                 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1356                 gcloads[gctopcore] = newbase;
1357                 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1358                 gcstopblock[gctopcore]++;
1359                 gctopcore = NEXTTOPCORE(gctopblock);
1360                 gctopblock++;
1361                 *numblocks = gcstopblock[gctopcore];
1362                 *p = gcloads[gctopcore];
1363                 BLOCKINDEX(*p, &b);
1364                 *remain=(b<NUMCORES)?((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1365                                                                                           :((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1366 #ifdef DEBUG
1367                 BAMBOO_DEBUGPRINT(0xd106);
1368                 BAMBOO_DEBUGPRINT_REG(gctopcore);
1369                 BAMBOO_DEBUGPRINT_REG(*p);
1370                 BAMBOO_DEBUGPRINT_REG(b);
1371                 BAMBOO_DEBUGPRINT_REG(*remain);
1372 #endif
1373         } // if(memneed < remain)
1374         gcmovepending--;
1375 } // void compact2Heaptophelper(int, int*, int*, int*)
1376
1377 inline void compact2Heaptop() {
1378         // no cores with spare mem and some cores are blocked with pending move
1379         // find the current heap top and make them move to the heap top
1380         int p;
1381         int numblocks = gcfilledblocks[gctopcore];
1382         //BASEPTR(gctopcore, numblocks, &p);
1383         p = gcloads[gctopcore];
1384         int b;
1385         BLOCKINDEX(p, &b);
1386         int remain = (b<NUMCORES)?((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1387                                        :((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1388         // check if the top core finishes
1389         if(gccorestatus[gctopcore] != 0) {
1390 #ifdef DEBUG
1391                 BAMBOO_DEBUGPRINT(0xd101);
1392                 BAMBOO_DEBUGPRINT_REG(gctopcore);
1393 #endif
1394                 // let the top core finishes its own work first
1395                 compact2Heaptophelper(gctopcore, &p, &numblocks, &remain);
1396                 return;
1397         }
1398
1399 #ifdef DEBUG
1400         BAMBOO_DEBUGPRINT(0xd102);
1401         BAMBOO_DEBUGPRINT_REG(gctopcore);
1402         BAMBOO_DEBUGPRINT_REG(p);
1403         BAMBOO_DEBUGPRINT_REG(b);
1404         BAMBOO_DEBUGPRINT_REG(remain);
1405 #endif
1406         /*if((gctopcore == STARTUPCORE) && (b == 0)) {
1407                 remain -= gcreservedsb*BAMBOO_SMEM_SIZE;
1408                 p += gcreservedsb*BAMBOO_SMEM_SIZE;
1409         }*/
1410         for(int i = 0; i < NUMCORES; i++) {
1411                 BAMBOO_START_CRITICAL_SECTION();
1412                 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1413 #ifdef DEBUG
1414                         BAMBOO_DEBUGPRINT(0xd103);
1415 #endif
1416                         compact2Heaptophelper(i, &p, &numblocks, &remain);
1417                         if(gccorestatus[gctopcore] != 0) {
1418 #ifdef DEBUG
1419                                 BAMBOO_DEBUGPRINT(0xd101);
1420                                 BAMBOO_DEBUGPRINT_REG(gctopcore);
1421 #endif
1422                                 // the top core is not free now
1423                                 return;
1424                         }
1425                 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1426                 BAMBOO_CLOSE_CRITICAL_SECTION();
1427         } // for(i = 0; i < NUMCORES; i++)
1428 #ifdef DEBUG
1429         BAMBOO_DEBUGPRINT(0xd106);
1430 #endif
1431 } // void compact2Heaptop()
1432
1433 inline void resolvePendingMoveRequest() {
1434 #ifdef DEBUG
1435         BAMBOO_DEBUGPRINT(0xeb01);
1436 #endif
1437 #ifdef DEBUG
1438                 BAMBOO_DEBUGPRINT(0xeeee);
1439                 for(int k = 0; k < NUMCORES; k++) {
1440                         BAMBOO_DEBUGPRINT(0xf000+k);
1441                         BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1442                         BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1443                         BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1444                         BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1445                 }
1446                 BAMBOO_DEBUGPRINT(0xffff);
1447 #endif
1448         int i;
1449         int j;
1450         bool nosparemem = true;
1451         bool haspending = false;
1452         bool hasrunning = false;
1453         bool noblock = false;
1454         int dstcore = 0; // the core who need spare mem
1455         int sourcecore = 0; // the core who has spare mem
1456         for(i = j = 0; (i < NUMCORES) && (j < NUMCORES);) {
1457                 if(nosparemem) {
1458                         // check if there are cores with spare mem
1459                         if(gccorestatus[i] == 0) {
1460                                 // finished working, check if it still have spare mem
1461                                 if(gcfilledblocks[i] < gcstopblock[i]) {
1462                                         // still have spare mem
1463                                         nosparemem = false;
1464                                         sourcecore = i;
1465                                 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1466                         }
1467                         i++;
1468                 } // if(nosparemem)
1469                 if(!haspending) {
1470                         if(gccorestatus[j] != 0) {
1471                                 // not finished, check if it has pending move requests
1472                                 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1473                                         dstcore = j;
1474                                         haspending = true;
1475                                 } else {
1476                                         hasrunning = true;
1477                                 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1478                         } // if(gccorestatus[i] == 0) else ...
1479                         j++;
1480                 } // if(!haspending)
1481                 if(!nosparemem && haspending) {
1482                         // find match
1483                         int tomove = 0;
1484                         int startaddr = 0;
1485                         BAMBOO_START_CRITICAL_SECTION();
1486                         gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore, 
1487                                                                                gcrequiredmems[dstcore], 
1488                                                                                                                                                                                            &tomove, 
1489                                                                                                                                                                                            &startaddr);
1490                         BAMBOO_CLOSE_CRITICAL_SECTION();
1491 #ifdef DEBUG
1492                         BAMBOO_DEBUGPRINT(0xeb02);
1493                         BAMBOO_DEBUGPRINT_REG(sourcecore);
1494                         BAMBOO_DEBUGPRINT_REG(dstcore);
1495                         BAMBOO_DEBUGPRINT_REG(startaddr);
1496                         BAMBOO_DEBUGPRINT_REG(tomove);
1497 #endif
1498                         if(STARTUPCORE == dstcore) {
1499 #ifdef DEBUG
1500                                 BAMBOO_DEBUGPRINT(0xeb03);
1501 #endif
1502                                 gcdstcore = sourcecore;
1503                                 gctomove = true;
1504                                 gcmovestartaddr = startaddr;
1505                                 gcblock2fill = tomove;
1506                         } else {
1507 #ifdef DEBUG
1508                                 BAMBOO_DEBUGPRINT(0xeb04);
1509 #endif
1510                                 send_msg_4(dstcore, GCMOVESTART, sourcecore, startaddr, tomove);
1511                         }
1512                         gcmovepending--;
1513                         nosparemem = true;
1514                         haspending = false;
1515                         noblock = true;
1516                 }
1517         } // for(i = 0; i < NUMCORES; i++)
1518 #ifdef DEBUG
1519         BAMBOO_DEBUGPRINT(0xcccc);
1520         BAMBOO_DEBUGPRINT_REG(hasrunning);
1521         BAMBOO_DEBUGPRINT_REG(haspending);
1522         BAMBOO_DEBUGPRINT_REG(noblock);
1523 #endif
1524
1525         if(!hasrunning && !noblock) {
1526                 gcphase = SUBTLECOMPACTPHASE;
1527                 compact2Heaptop();
1528         }
1529
1530 } // void resovePendingMoveRequest()
1531
1532 struct moveHelper {
1533         int numblocks; // block num for heap
1534         INTPTR base; // base virtual address of current heap block
1535         INTPTR ptr; // virtual address of current heap top
1536         int offset; // offset in current heap block
1537         int blockbase; // virtual address of current small block to check
1538         int blockbound; // bound virtual address of current small blcok
1539         int sblockindex; // index of the small blocks
1540         int top; // real size of current heap block to check
1541         int bound; // bound size of current heap block to check
1542 }; // struct moveHelper
1543
1544 // if out of boundary of valid shared memory, return false, else return true
1545 inline bool nextSBlock(struct moveHelper * orig) {
1546         orig->blockbase = orig->blockbound;
1547 #ifdef DEBUG 
1548         BAMBOO_DEBUGPRINT(0xecc0);
1549         BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1550         BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1551         BAMBOO_DEBUGPRINT_REG(orig->bound);
1552         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1553 #endif
1554 outernextSBlock:
1555         // check if across a big block
1556         if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound) 
1557                         || ((orig->ptr != NULL) && (*((int*)orig->ptr))==0) 
1558                         || ((*((int*)orig->blockbase))==0)) {
1559 innernextSBlock:
1560                 // end of current heap block, jump to next one
1561                 orig->numblocks++;
1562 #ifdef DEBUG 
1563                 BAMBOO_DEBUGPRINT(0xecc1);
1564                 BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1565 #endif
1566                 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1567 #ifdef DEBUG 
1568                 BAMBOO_DEBUGPRINT(orig->base);
1569 #endif
1570                 if(orig->base >= BAMBOO_BASE_VA + BAMBOO_SHARED_MEM_SIZE) {
1571                         // out of boundary
1572                         orig->ptr = orig->base; // set current ptr to out of boundary too
1573                         return false;
1574                 }
1575                 orig->bound = orig->base + BAMBOO_SMEM_SIZE;
1576                 orig->blockbase = orig->base;
1577                 orig->sblockindex = (orig->blockbase-BAMBOO_BASE_VA)/BAMBOO_SMEM_SIZE;
1578         } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1579                 orig->sblockindex += 1;
1580         } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1581
1582         // check if this sblock should be omitted or have special start point
1583         if(gcsbstarttbl[orig->sblockindex] == -1) {
1584                 // goto next sblock
1585 #ifdef DEBUG
1586                 BAMBOO_DEBUGPRINT(0xecc2);
1587 #endif
1588                 orig->sblockindex += 1;
1589                 orig->blockbase += BAMBOO_SMEM_SIZE;
1590                 goto outernextSBlock;
1591         } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1592 #ifdef DEBUG
1593                 BAMBOO_DEBUGPRINT(0xecc3);
1594 #endif
1595                 // not start from the very beginning
1596                 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1597         } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1598
1599         // setup information for this sblock
1600         orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1601         orig->offset = BAMBOO_CACHE_LINE_SIZE;
1602         orig->ptr = orig->blockbase + orig->offset;
1603 #ifdef DEBUG
1604         BAMBOO_DEBUGPRINT(0xecc4);
1605         BAMBOO_DEBUGPRINT_REG(orig->base);
1606         BAMBOO_DEBUGPRINT_REG(orig->bound);
1607         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1608         BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1609         BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1610         BAMBOO_DEBUGPRINT_REG(orig->offset);
1611 #endif
1612         if(orig->ptr >= orig->bound) {
1613                 // met a lobj, move to next block
1614                 goto innernextSBlock;
1615         }
1616
1617         return true;
1618 } // bool nextSBlock(struct moveHelper * orig) 
1619
1620 // return false if there are no available data to compact
1621 inline bool initOrig_Dst(struct moveHelper * orig, 
1622                                      struct moveHelper * to) {
1623         // init the dst ptr
1624         to->numblocks = 0;
1625         to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1626         to->bound = BAMBOO_SMEM_SIZE_L;
1627         BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1628
1629 #ifdef DEBUG
1630         BAMBOO_DEBUGPRINT(0xef01);
1631         BAMBOO_DEBUGPRINT_REG(to->base);
1632 #endif
1633         to->ptr = to->base + to->offset;
1634
1635         // init the orig ptr
1636         orig->numblocks = 0;
1637         orig->base = to->base;
1638         orig->bound = to->base + BAMBOO_SMEM_SIZE_L;
1639         orig->blockbase = orig->base;
1640         orig->sblockindex = (orig->base - BAMBOO_BASE_VA) / BAMBOO_SMEM_SIZE;
1641 #ifdef DEBUG 
1642         BAMBOO_DEBUGPRINT(0xef02);
1643         BAMBOO_DEBUGPRINT_REG(orig->base);
1644         BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1645         BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1646         BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1647 #endif
1648
1649         if(gcsbstarttbl[orig->sblockindex] == -1) {
1650 #ifdef DEBUG 
1651                 BAMBOO_DEBUGPRINT(0xef03);
1652 #endif
1653                 // goto next sblock
1654                 orig->blockbound = 
1655                         BAMBOO_BASE_VA+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1656                 return nextSBlock(orig);
1657         } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1658 #ifdef DEBUG 
1659                 BAMBOO_DEBUGPRINT(0xef04);
1660 #endif
1661                 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1662         }
1663 #ifdef DEBUG 
1664         BAMBOO_DEBUGPRINT(0xef05);
1665 #endif
1666         orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1667         orig->offset = BAMBOO_CACHE_LINE_SIZE;
1668         orig->ptr = orig->blockbase + orig->offset;
1669 #ifdef DEBUG
1670         BAMBOO_DEBUGPRINT(0xef06);
1671         BAMBOO_DEBUGPRINT_REG(orig->base);
1672 #endif
1673         return true;
1674 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to) 
1675
1676 inline void nextBlock(struct moveHelper * to) {
1677         to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1678         to->bound += BAMBOO_SMEM_SIZE;
1679         to->numblocks++;
1680         BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1681         to->offset = BAMBOO_CACHE_LINE_SIZE;
1682         to->ptr = to->base + to->offset;
1683 } // void nextBlock(struct moveHelper * to)
1684
1685 // endaddr does not contain spaces for headers
1686 inline bool moveobj(struct moveHelper * orig, 
1687                                 struct moveHelper * to, 
1688                                                         int stopblock) {
1689         if(stopblock == 0) {
1690                 return true;
1691         }
1692
1693 #ifdef DEBUG
1694         BAMBOO_DEBUGPRINT(0xe201);
1695         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1696         BAMBOO_DEBUGPRINT_REG(to->ptr);
1697 #endif
1698
1699         int type = 0;
1700         int size = 0;
1701         int mark = 0;
1702         int isize = 0;
1703 innermoveobj:
1704         while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1705                 orig->ptr = (int*)(orig->ptr) + 1;
1706         }
1707         if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
1708                 if(!nextSBlock(orig)) {
1709                         // finished, no more data
1710                         return true;
1711                 }
1712                 goto innermoveobj;
1713         }
1714 #ifdef DEBUG
1715         BAMBOO_DEBUGPRINT(0xe202);
1716         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1717         BAMBOO_DEBUGPRINT(((int *)(orig->ptr))[0]);
1718 #endif
1719         // check the obj's type, size and mark flag
1720         type = ((int *)(orig->ptr))[0];
1721         size = 0;
1722         if(type == 0) {
1723                 // end of this block, go to next one
1724                 if(!nextSBlock(orig)) {
1725                         // finished, no more data
1726                         return true;
1727                 }
1728                 goto innermoveobj;
1729         } else if(type < NUMCLASSES) {
1730                 // a normal object
1731                 size = classsize[type];
1732         } else {        
1733                 // an array 
1734                 struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
1735                 int elementsize=classsize[type];
1736                 int length=ao->___length___; 
1737                 size=sizeof(struct ArrayObject)+length*elementsize;
1738         }
1739         mark = ((int *)(orig->ptr))[6];
1740 #ifdef DEBUG
1741         BAMBOO_DEBUGPRINT(0xe203);
1742         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1743         BAMBOO_DEBUGPRINT_REG(size);
1744 #endif
1745         ALIGNSIZE(size, &isize); // no matter is the obj marked or not
1746                                  // should be able to across it
1747         if(mark == 1) {
1748 #ifdef DEBUG
1749                 BAMBOO_DEBUGPRINT(0xe204);
1750 #endif
1751                 // marked obj, copy it to current heap top
1752                 // check to see if remaining space is enough
1753                 if(to->top + isize > to->bound) {
1754                         // fill -1 indicating the end of this block
1755                         /*if(to->top != to->bound) {
1756                                 *((int*)to->ptr) = -1;
1757                         }*/
1758                         //memset(to->ptr+1,  -2, to->bound - to->top - 1);
1759                         // fill the header of this block and then go to next block
1760         to->offset += to->bound - to->top;
1761                         memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1762                         (*((int*)(to->base))) = to->offset;
1763                         nextBlock(to);
1764                         if(stopblock == to->numblocks) {
1765                                 // already fulfilled the block
1766                                 return true;
1767                         } // if(stopblock == to->numblocks)
1768                 } // if(to->top + isize > to->bound)
1769                 // set the mark field to 2, indicating that this obj has been moved 
1770                 // and need to be flushed
1771                 ((int *)(orig->ptr))[6] = 2;
1772                 if(to->ptr != orig->ptr) {
1773                         memcpy(to->ptr, orig->ptr, size);
1774                         // fill the remaining space with -2
1775                         memset(to->ptr+size, -2, isize-size);
1776                 }
1777                 // store mapping info
1778                 BAMBOO_START_CRITICAL_SECTION();
1779                 RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr); 
1780                 BAMBOO_CLOSE_CRITICAL_SECTION();
1781 #ifdef DEBUG
1782                 BAMBOO_DEBUGPRINT(0xcdce);
1783                 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1784                 BAMBOO_DEBUGPRINT_REG(to->ptr);
1785 #endif
1786                 gccurr_heaptop -= isize;
1787                 to->ptr += isize;
1788                 to->offset += isize;
1789                 to->top += isize;
1790                 if(to->top == to->bound) {
1791                         // fill the header of this block and then go to next block
1792                         memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1793                         (*((int*)(to->base))) = to->offset;
1794                         nextBlock(to);
1795                 }
1796         } // if(mark == 1)
1797 #ifdef DEBUG
1798         BAMBOO_DEBUGPRINT(0xe205);
1799 #endif
1800         // move to next obj
1801         orig->ptr += isize;
1802 #ifdef DEBUG
1803         BAMBOO_DEBUGPRINT_REG(isize);
1804         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1805         BAMBOO_DEBUGPRINT_REG(orig->bound);
1806 #endif
1807         if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
1808 #ifdef DEBUG
1809         BAMBOO_DEBUGPRINT(0xe206);
1810 #endif
1811                 if(!nextSBlock(orig)) {
1812                         // finished, no more data
1813                         return true;
1814                 }
1815         }
1816 #ifdef DEBUG
1817         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1818 #endif
1819         return false;
1820 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
1821
1822 // should be invoked with interrupt closed
1823 inline int assignSpareMem_I(int sourcecore,
1824                                         int * requiredmem,
1825                                                                                                           int * tomove,
1826                                                                                                           int * startaddr) {
1827         int b = 0;
1828         BLOCKINDEX(gcloads[sourcecore], &b);
1829         int boundptr = (b<NUMCORES)?((b+1)*BAMBOO_SMEM_SIZE_L)
1830                 :(BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES+1)*BAMBOO_SMEM_SIZE);
1831         int remain = boundptr - gcloads[sourcecore];
1832         int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
1833         *startaddr = gcloads[sourcecore];
1834         *tomove = gcfilledblocks[sourcecore] + 1;
1835         if(memneed < remain) {
1836                 gcloads[sourcecore] += memneed;
1837                 return 0;
1838         } else {
1839                 // next available block
1840                 gcfilledblocks[sourcecore] += 1;
1841                 int newbase = 0;
1842                 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
1843                 gcloads[sourcecore] = newbase;
1844                 return requiredmem-remain;
1845         }
1846 } // int assignSpareMem_I(int ,int * , int * , int * )
1847
1848 // should be invoked with interrupt closed
1849 inline bool gcfindSpareMem_I(int * startaddr,
1850                                          int * tomove,
1851                                                                                                    int * dstcore,
1852                                                                                                    int requiredmem,
1853                                                                                                    int requiredcore) {
1854         for(int k = 0; k < NUMCORES; k++) {
1855                 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
1856                         // check if this stopped core has enough mem
1857                         assignSpareMem_I(k, requiredmem, tomove, startaddr);
1858                         *dstcore = k;
1859                         return true;
1860                 }
1861         }
1862         // if can not find spare mem right now, hold the request
1863         gcrequiredmems[requiredcore] = requiredmem;
1864         gcmovepending++;
1865         return false;
1866 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
1867
1868 inline bool compacthelper(struct moveHelper * orig,
1869                                       struct moveHelper * to,
1870                                                                                                         int * filledblocks,
1871                                                                                                         int * heaptopptr,
1872                                                                                                         bool * localcompact) {
1873         // scan over all objs in this block, compact the marked objs 
1874         // loop stop when finishing either scanning all active objs or 
1875         // fulfilled the gcstopblock
1876 #ifdef DEBUG
1877         BAMBOO_DEBUGPRINT(0xe101);
1878         BAMBOO_DEBUGPRINT_REG(gcblock2fill);
1879 #endif
1880 innercompact:
1881         do {
1882                 bool stop = moveobj(orig, to, gcblock2fill);
1883                 if(stop) {
1884                         break;
1885                 }
1886         } while(orig->ptr < gcmarkedptrbound);
1887         // if no objs have been compact, do nothing, 
1888         // otherwise, fill the header of this block
1889         if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
1890                 memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1891                 (*((int*)(to->base))) = to->offset;
1892         } else {
1893                 to->offset = 0;
1894                 to->ptr = to->base;
1895                 to->top -= BAMBOO_CACHE_LINE_SIZE;
1896         } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
1897         if(*localcompact) {
1898                 *heaptopptr = to->ptr;
1899                 *filledblocks = to->numblocks;
1900         }
1901 #ifdef DEBUG
1902         BAMBOO_DEBUGPRINT(0xe102);
1903         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1904         BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
1905         BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1906         BAMBOO_DEBUGPRINT_REG(*filledblocks);
1907         BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
1908 #endif
1909
1910         // send msgs to core coordinator indicating that the compact is finishing
1911         // send compact finish message to core coordinator
1912         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1913                 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
1914                 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
1915                 if(orig->ptr < gcmarkedptrbound) {
1916 #ifdef DEBUG
1917                         BAMBOO_DEBUGPRINT(0xe103);
1918 #endif
1919                         // ask for more mem
1920                         gctomove = false;
1921                         BAMBOO_START_CRITICAL_SECTION();
1922                         if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore, 
1923                                                               gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
1924 #ifdef DEBUG
1925                                 BAMBOO_DEBUGPRINT(0xe104);
1926 #endif
1927                                 gctomove = true;
1928                         } else {
1929                                 BAMBOO_CLOSE_CRITICAL_SECTION();
1930 #ifdef DEBUG
1931                                 BAMBOO_DEBUGPRINT(0xe105);
1932 #endif
1933                                 return false; 
1934                         }
1935                         BAMBOO_CLOSE_CRITICAL_SECTION();
1936                 } else {
1937 #ifdef DEBUG
1938                         BAMBOO_DEBUGPRINT(0xe106);
1939 #endif
1940                         gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1941                         gctomove = false;
1942                         return true;
1943                 }
1944         } else {
1945                 if(orig->ptr < gcmarkedptrbound) {
1946 #ifdef DEBUG
1947                         BAMBOO_DEBUGPRINT(0xe107);
1948 #endif
1949                         // ask for more mem
1950                         gctomove = false;
1951                         send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, 
1952                                                *filledblocks, *heaptopptr, gccurr_heaptop);
1953                 } else {
1954 #ifdef DEBUG
1955                         BAMBOO_DEBUGPRINT(0xe108);
1956                         BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1957 #endif
1958                         // finish compacting
1959                         send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
1960                                                *filledblocks, *heaptopptr, 0);
1961                 }
1962         } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
1963
1964         if(orig->ptr < gcmarkedptrbound) {
1965 #ifdef DEBUG
1966                 BAMBOO_DEBUGPRINT(0xe109);
1967 #endif
1968                 // still have unpacked obj
1969                 while(true) {
1970                         if(gctomove) {
1971                                 break;
1972                         }
1973                 };
1974                 gctomove = false;
1975 #ifdef DEBUG
1976                 BAMBOO_DEBUGPRINT(0xe10a);
1977 #endif
1978
1979                 to->ptr = gcmovestartaddr;
1980                 to->numblocks = gcblock2fill - 1;
1981                 to->bound = (to->numblocks==0)?
1982                         BAMBOO_SMEM_SIZE_L:
1983                         BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
1984                 BASEPTR(gcdstcore, to->numblocks, &(to->base));
1985                 to->offset = to->ptr - to->base;
1986                 to->top = (to->numblocks==0)?
1987                         (to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
1988                 to->base = to->ptr;
1989                 to->offset = BAMBOO_CACHE_LINE_SIZE;
1990                 to->ptr += to->offset; // for header
1991                 to->top += to->offset;
1992                 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
1993                         *localcompact = true;
1994                 } else {
1995                         *localcompact = false;
1996                 }
1997                 goto innercompact;
1998         }
1999 #ifdef DEBUG
2000         BAMBOO_DEBUGPRINT(0xe10b);
2001 #endif
2002         return true;
2003 } // void compacthelper()
2004
2005 inline void compact() {
2006         if(COMPACTPHASE != gcphase) {
2007                 BAMBOO_EXIT(0xb102);
2008         }
2009
2010         // initialize pointers for comapcting
2011         struct moveHelper * orig = 
2012                 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2013         struct moveHelper * to = 
2014                 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2015
2016         if(!initOrig_Dst(orig, to)) {
2017                 // no available data to compact
2018                 RUNFREE(orig);
2019                 RUNFREE(to);
2020                 return;
2021         }
2022
2023         int filledblocks = 0;
2024         INTPTR heaptopptr = 0;
2025         bool localcompact = true;
2026         compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2027         
2028         RUNFREE(orig);
2029         RUNFREE(to);
2030 } // compact()
2031
2032 inline void * flushObj(void * objptr) {
2033 #ifdef DEBUG
2034         BAMBOO_DEBUGPRINT(0xe401);
2035 #endif
2036         void * dstptr = NULL;
2037         if(ISSHAREDOBJ(objptr)) {
2038 #ifdef DEBUG
2039                 BAMBOO_DEBUGPRINT(0xe402);
2040                 BAMBOO_DEBUGPRINT_REG(objptr);
2041 #endif
2042                 // a shared obj ptr, change to new address
2043                 BAMBOO_START_CRITICAL_SECTION();
2044                 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2045                 BAMBOO_CLOSE_CRITICAL_SECTION();
2046 #ifdef DEBUG
2047                 BAMBOO_DEBUGPRINT_REG(dstptr);
2048 #endif
2049                 if(NULL == dstptr) {
2050 #ifdef DEBUG 
2051                         BAMBOO_DEBUGPRINT(0xe403);
2052                         BAMBOO_DEBUGPRINT_REG(objptr);
2053                         BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2054 #endif
2055                         if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2056                                 // error! the obj is right on this core, but cannot find it
2057                                 BAMBOO_DEBUGPRINT_REG(objptr);
2058                                 BAMBOO_EXIT(0xb103);
2059                         }
2060                         // send msg to host core for the mapping info
2061                         gcobj2map = (int)objptr;
2062                         gcismapped = false;
2063                         gcmappedobj = NULL;
2064                         send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
2065                                                                  BAMBOO_NUM_OF_CORE);
2066                         while(true) {
2067                                 if(gcismapped) {
2068                                         break;
2069                                 }
2070                         }
2071                         BAMBOO_START_CRITICAL_SECTION();
2072                         RuntimeHashget(gcpointertbl, objptr, &dstptr);
2073                         BAMBOO_CLOSE_CRITICAL_SECTION();
2074 #ifdef DEBUG
2075                         BAMBOO_DEBUGPRINT_REG(dstptr);
2076 #endif
2077                 }
2078         } // if(ISSHAREDOBJ(objptr))
2079 #ifdef DEBUG
2080         BAMBOO_DEBUGPRINT(0xe404);
2081 #endif
2082         return dstptr;
2083 } // void flushObj(void * objptr, void ** tochange)
2084
2085 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2086         int i,j;
2087         // flush current stack 
2088         while(stackptr!=NULL) {
2089                 for(i=0; i<stackptr->size; i++) {
2090                         if(stackptr->array[i] != NULL) {
2091                                 stackptr->array[i] = flushObj(stackptr->array[i]);
2092                         }
2093                 }
2094                 stackptr=stackptr->next;
2095         }
2096
2097         // flush objectsets
2098         for(i=0; i<NUMCLASSES; i++) {
2099                 struct parameterwrapper ** queues = 
2100                         objectqueues[BAMBOO_NUM_OF_CORE][i];
2101                 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2102                 for(j = 0; j < length; ++j) {
2103                         struct parameterwrapper * parameter = queues[j];
2104                         struct ObjectHash * set=parameter->objectset;
2105                         struct ObjectNode * ptr=set->listhead;
2106                         while(ptr!=NULL) {
2107                                 ptr->key = flushObj((void *)ptr->key);
2108                                 ptr=ptr->lnext;
2109                         }
2110                         ObjectHashrehash(set);
2111                 }
2112         }
2113
2114         // flush current task descriptor
2115         if(currtpd != NULL) {
2116                 for(i=0; i<currtpd->numParameters; i++) {
2117                         currtpd->parameterArray[i] = flushObj(currtpd->parameterArray[i]);
2118                 }
2119         }
2120
2121         // flush active tasks
2122         struct genpointerlist * ptr=activetasks->list;
2123         while(ptr!=NULL) {
2124                 struct taskparamdescriptor *tpd=ptr->src;
2125                 int i;
2126                 for(i=0; i<tpd->numParameters; i++) {
2127                         tpd->parameterArray[i] = flushObj(tpd->parameterArray[i]);
2128                 }
2129                 ptr=ptr->inext;
2130         }
2131         genrehash(activetasks);
2132
2133         // flush cached transferred obj
2134         struct QueueItem * tmpobjptr =  getHead(&objqueue);
2135         while(tmpobjptr != NULL) {
2136                 struct transObjInfo * objInfo = 
2137                         (struct transObjInfo *)(tmpobjptr->objectptr); 
2138                 objInfo->objptr = flushObj(objInfo->objptr);
2139                 tmpobjptr = getNextQueueItem(tmpobjptr);
2140         }
2141
2142         // flush cached objs to be transferred
2143         struct QueueItem * item = getHead(totransobjqueue);
2144         while(item != NULL) {
2145                 struct transObjInfo * totransobj = 
2146                         (struct transObjInfo *)(item->objectptr);
2147                 totransobj->objptr = flushObj(totransobj->objptr);
2148                 item = getNextQueueItem(item);
2149         } // while(item != NULL)
2150
2151         // enqueue lock related info
2152         for(i = 0; i < runtime_locklen; ++i) {
2153           runtime_locks[i].redirectlock = (int)flushObj(runtime_locks[i].redirectlock);
2154                 if(runtime_locks[i].value != NULL) {
2155                   runtime_locks[i].value = (int)flushObj(runtime_locks[i].value);
2156           }
2157         }
2158
2159 } // void flushRuntimeObj(struct garbagelist * stackptr)
2160
2161 inline void flush(struct garbagelist * stackptr) {
2162         flushRuntimeObj(stackptr);
2163         
2164         while(gc_moreItems()) {
2165 #ifdef DEBUG
2166                 BAMBOO_DEBUGPRINT(0xe301);
2167 #endif
2168                 void * ptr = gc_dequeue();
2169                 void * tptr = flushObj(ptr);
2170 #ifdef DEBUG
2171                 BAMBOO_DEBUGPRINT(0xe302);
2172                 BAMBOO_DEBUGPRINT_REG(ptr);
2173                 BAMBOO_DEBUGPRINT_REG(tptr);
2174 #endif
2175                 if(tptr != NULL) {
2176                         ptr = tptr;
2177                 }
2178                 if(((int *)(ptr))[6] == 2) {
2179                         int type = ((int *)(ptr))[0];
2180                         // scan all pointers in ptr
2181                         unsigned INTPTR * pointer;
2182                         pointer=pointerarray[type];
2183 #ifdef DEBUG
2184                         BAMBOO_DEBUGPRINT(0xe303);
2185                         BAMBOO_DEBUGPRINT_REG(pointer);
2186 #endif
2187                         if (pointer==0) {
2188                                 /* Array of primitives */
2189                                 /* Do nothing */
2190                         } else if (((INTPTR)pointer)==1) {
2191 #ifdef DEBUG
2192                                 BAMBOO_DEBUGPRINT(0xe304);
2193 #endif
2194                                 /* Array of pointers */
2195                                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2196                                 int length=ao->___length___;
2197                                 int j;
2198                                 for(j=0; j<length; j++) {
2199 #ifdef DEBUG
2200                                         BAMBOO_DEBUGPRINT(0xe305);
2201 #endif
2202                                         void *objptr=
2203                                                 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2204 #ifdef DEBUG
2205                                         BAMBOO_DEBUGPRINT_REG(objptr);
2206 #endif
2207                                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = 
2208                                                 flushObj(objptr);
2209                                 }
2210                         } else {
2211 #ifdef DEBUG
2212                                 BAMBOO_DEBUGPRINT(0xe306);
2213 #endif
2214                                 INTPTR size=pointer[0];
2215                                 int i;
2216                                 for(i=1; i<=size; i++) {
2217 #ifdef DEBUG
2218                                         BAMBOO_DEBUGPRINT(0xe307);
2219 #endif
2220                                         unsigned int offset=pointer[i];
2221                                         void * objptr=*((void **)(((char *)ptr)+offset));
2222 #ifdef DEBUG
2223                                         BAMBOO_DEBUGPRINT_REG(objptr);
2224 #endif
2225                                         *((void **)(((char *)ptr)+offset)) = flushObj(objptr);
2226                                 } // for(i=1; i<=size; i++) 
2227                         } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2228                         // restore the mark field, indicating that this obj has been flushed
2229                         ((int *)(ptr))[6] = 0;
2230                 } // if(((int *)(ptr))[6] == 2)
2231         } // while(gc_moreItems())
2232 #ifdef DEBUG
2233         BAMBOO_DEBUGPRINT(0xe308);
2234 #endif
2235         // send flush finish message to core coordinator
2236         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2237                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2238         } else {
2239                 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE);
2240         }
2241 #ifdef DEBUG
2242         BAMBOO_DEBUGPRINT(0xe309);
2243 #endif
2244 } // flush()
2245
2246 inline void gc_collect(struct garbagelist * stackptr) {
2247         // core collector routine
2248         while(true) {
2249                 //BAMBOO_START_CRITICAL_SECTION();
2250                 if(INITPHASE == gcphase) {
2251                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2252                         break;
2253                 }
2254                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2255         }
2256 #ifdef GC_DEBUG
2257         tprintf("Do initGC\n");
2258 #endif
2259         initGC();
2260         //send init finish msg to core coordinator
2261         send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE);
2262         while(true) {
2263                 //BAMBOO_START_CRITICAL_SECTION();
2264                 if(MARKPHASE == gcphase) {
2265                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2266                         break;
2267                 }
2268                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2269         }
2270 #ifdef GC_DEBUG
2271         tprintf("Start mark phase\n");
2272 #endif
2273         mark(true, stackptr);
2274 #ifdef GC_DEBUG
2275         tprintf("Finish mark phase, start compact phase\n");
2276 #endif
2277         compact();
2278 #ifdef GC_DEBUG
2279         tprintf("Finish compact phase\n");
2280 #endif
2281         while(true) {
2282                 //BAMBOO_START_CRITICAL_SECTION();
2283                 if(FLUSHPHASE == gcphase) {
2284                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2285                         break;
2286                 }
2287                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2288         }
2289 #ifdef GC_DEBUG
2290         tprintf("Start flush phase\n");
2291 #endif
2292         flush(stackptr);
2293 #ifdef GC_DEBUG
2294         tprintf("Finish flush phase\n");
2295 #endif
2296
2297         while(true) {
2298                 //BAMBOO_START_CRITICAL_SECTION();
2299                 if(FINISHPHASE == gcphase) {
2300                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2301                         break;
2302                 }
2303                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2304         }
2305 #ifdef GC_DEBUG
2306         tprintf("Finish gc!\n");
2307 #endif
2308 } // void gc_collect(struct garbagelist * stackptr)
2309
2310 inline void gc(struct garbagelist * stackptr) {
2311         // check if do gc
2312         if(!gcflag) {
2313                 gcprocessing = false;
2314                 return;
2315         }
2316
2317         // core coordinator routine
2318         if(0 == BAMBOO_NUM_OF_CORE) {
2319 #ifdef GC_DEBUG
2320         tprintf("Check if can do gc or not\n");
2321 #endif
2322                 if(!preGC()) {
2323                         // not ready to do gc
2324                         gcflag = true;
2325                         return;
2326                 }
2327
2328 #ifdef GC_DEBUG
2329                 tprintf("start gc! \n");
2330                 //dumpSMem();
2331 #endif
2332
2333                 gcprocessing = true;
2334                 int i = 0;
2335                 waitconfirm = false;
2336                 waitconfirm = 0;
2337                 gcphase = INITPHASE;
2338                 for(i = 1; i < NUMCORES; i++) {
2339                         // send GC init messages to all cores
2340                         send_msg_1(i, GCSTARTINIT);
2341                 }
2342                 bool isfirst = true;
2343                 bool allStall = false;
2344
2345                 initGC();
2346 #ifdef GC_DEBUG
2347                 tprintf("Check core status \n");
2348 #endif
2349
2350                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2351                 while(true) {
2352                         BAMBOO_START_CRITICAL_SECTION();
2353                         if(gc_checkCoreStatus()) {
2354                                 BAMBOO_CLOSE_CRITICAL_SECTION();
2355                                 break;
2356                         }
2357                         BAMBOO_CLOSE_CRITICAL_SECTION();
2358                 }
2359 #ifdef GC_DEBUG
2360                 tprintf("Start mark phase \n");
2361 #endif
2362                 // all cores have finished compacting
2363                 // restore the gcstatus of all cores
2364                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
2365                 for(i = 1; i < NUMCORES; ++i) {
2366                         gccorestatus[i] = 1;
2367                         // send GC start messages to all cores
2368                         send_msg_1(i, GCSTART);
2369                 }
2370
2371                 gcphase = MARKPHASE;
2372     // mark phase
2373                 while(MARKPHASE == gcphase) {
2374                         mark(isfirst, stackptr);
2375                         if(isfirst) {
2376                                 isfirst = false;
2377                         }
2378
2379                         // check gcstatus
2380                         checkMarkStatue(); 
2381                 }  // while(MARKPHASE == gcphase)
2382                 // send msgs to all cores requiring large objs info
2383                 numconfirm = NUMCORES - 1;
2384                 for(i = 1; i < NUMCORES; ++i) {
2385                         send_msg_1(i, GCLOBJREQUEST);
2386                 }
2387                 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
2388                 while(true) {
2389                         if(numconfirm==0) {
2390                                 break;
2391                         }
2392                 } // wait for responses
2393                 // check the heaptop
2394                 if(gcheaptop < gcmarkedptrbound) {
2395                         gcheaptop = gcmarkedptrbound;
2396                 }
2397 #ifdef GC_DEBUG
2398                 tprintf("prepare to cache large objs \n");
2399                 //dumpSMem();
2400 #endif
2401                 // cache all large objs
2402                 if(!cacheLObjs()) {
2403                         // no enough space to cache large objs
2404                         BAMBOO_EXIT(0xb104);
2405                 }
2406                 // predict number of blocks to fill for each core
2407                 int numpbc = loadbalance();
2408                 // TODO
2409                 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
2410 #ifdef GC_DEBUG
2411                 tprintf("mark phase finished \n");
2412                 //dumpSMem();
2413 #endif
2414                 int tmptopptr = 0;
2415                 BASEPTR(gctopcore, 0, &tmptopptr);
2416                 // TODO
2417                 tmptopptr = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
2418 #ifdef DEBUG
2419                 BAMBOO_DEBUGPRINT(0xabab);
2420                 BAMBOO_DEBUGPRINT_REG(tmptopptr);
2421 #endif
2422                 for(i = 0; i < NUMCORES; ++i) {
2423                         int tmpcoreptr = 0;
2424                         BASEPTR(i, 0, &tmpcoreptr);
2425                         //send start compact messages to all cores
2426                         if (tmpcoreptr < tmptopptr) {
2427                                 gcstopblock[i] = numpbc + 1;
2428                                 if(i != STARTUPCORE) {
2429                                         send_msg_2(i, GCSTARTCOMPACT, numpbc+1); 
2430                                 } else {
2431                                         gcblock2fill = numpbc+1;
2432                                 } // if(i != STARTUPCORE)
2433                         } else {
2434                                 gcstopblock[i] = numpbc;
2435                                 if(i != STARTUPCORE) {
2436                                         send_msg_2(i, GCSTARTCOMPACT, numpbc);
2437                                 } else {
2438                                         gcblock2fill = numpbc;
2439                                 } // if(i != STARTUPCORE)
2440                         }
2441 #ifdef DEBUG
2442                         BAMBOO_DEBUGPRINT(0xf000+i);
2443                         BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
2444                         BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
2445 #endif
2446                         // init some data strutures for compact phase
2447                         gcloads[i] = 0;
2448                         gcfilledblocks[i] = 0;
2449                         gcrequiredmems[i] = 0;
2450                 }
2451
2452                 // compact phase
2453                 bool finalcompact = false;
2454                 // initialize pointers for comapcting
2455                 struct moveHelper * orig = 
2456                         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2457                 struct moveHelper * to = 
2458                         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2459                 initOrig_Dst(orig, to);
2460                 int filledblocks = 0;
2461                 INTPTR heaptopptr = 0;
2462                 bool finishcompact = false;
2463                 bool iscontinue = true;
2464                 bool localcompact = true;
2465                 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
2466                         if((!finishcompact) && iscontinue) {
2467 #ifdef DEBUG
2468                                 BAMBOO_DEBUGPRINT(0xe001);
2469                                 BAMBOO_DEBUGPRINT_REG(numpbc);
2470                                 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2471 #endif
2472                                 finishcompact = compacthelper(orig, to, &filledblocks, 
2473                                                                           &heaptopptr, &localcompact);
2474 #ifdef DEBUG
2475                                 BAMBOO_DEBUGPRINT(0xe002);
2476                                 BAMBOO_DEBUGPRINT_REG(finishcompact);
2477                                 BAMBOO_DEBUGPRINT_REG(gctomove);
2478                                 BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
2479                                 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
2480                                 BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
2481 #endif
2482                         }
2483
2484                         if(gc_checkCoreStatus()) {
2485                                 // all cores have finished compacting
2486                                 // restore the gcstatus of all cores
2487                                 for(i = 0; i < NUMCORES; ++i) {
2488                                         gccorestatus[i] = 1;
2489                                 }
2490                                 break;
2491                         } else {
2492                                 // check if there are spare mem for pending move requires
2493                                 if(COMPACTPHASE == gcphase) {
2494 #ifdef DEBUG
2495                                         BAMBOO_DEBUGPRINT(0xe003);
2496 #endif
2497                                         resolvePendingMoveRequest();
2498 #ifdef DEBUG
2499                                         BAMBOO_DEBUGPRINT_REG(gctomove);
2500 #endif
2501                                 } else {
2502 #ifdef DEBUG
2503                                         BAMBOO_DEBUGPRINT(0xe004);
2504 #endif
2505                                         compact2Heaptop();
2506                                 }
2507                         } // if(gc_checkCoreStatus()) else ...
2508
2509                         if(gctomove) {
2510 #ifdef DEBUG
2511                                 BAMBOO_DEBUGPRINT(0xe005);
2512                                 BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
2513                                 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2514                                 BAMBOO_DEBUGPRINT_REG(gctomove);
2515 #endif
2516                                 to->ptr = gcmovestartaddr;
2517                                 to->numblocks = gcblock2fill - 1;
2518                                 to->bound = (to->numblocks==0)?
2519                                         BAMBOO_SMEM_SIZE_L:
2520                                         BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2521                                 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2522                                 to->offset = to->ptr - to->base;
2523                                 to->top = (to->numblocks==0)?
2524                                         (to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
2525                                 to->base = to->ptr;
2526                                 to->offset = BAMBOO_CACHE_LINE_SIZE;
2527                                 to->ptr += to->offset; // for header
2528                                 to->top += to->offset;
2529                                 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2530                                         localcompact = true;
2531                                 } else {
2532                                         localcompact = false;
2533                                 }
2534                                 gctomove = false;
2535                                 iscontinue = true;
2536                         } else if(!finishcompact) {
2537                                 // still pending
2538                                 iscontinue = false;
2539                         } // if(gctomove)
2540
2541                 } // while(COMPACTPHASE == gcphase) 
2542 #ifdef GC_DEBUG
2543                 tprintf("prepare to move large objs \n");
2544                 //dumpSMem();
2545 #endif
2546                 // move largeObjs
2547                 moveLObjs();
2548 #ifdef GC_DEBUG
2549                 tprintf("compact phase finished \n");
2550                 //dumpSMem();
2551 #endif
2552
2553                 gcphase = FLUSHPHASE;
2554                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
2555                 for(i = 1; i < NUMCORES; ++i) {
2556                         // send start flush messages to all cores
2557                         gccorestatus[i] = 1;
2558                         send_msg_1(i, GCSTARTFLUSH);
2559                 }
2560
2561 #ifdef GC_DEBUG
2562                 tprintf("Start flush phase \n");
2563 #endif
2564                 // flush phase
2565                 flush(stackptr);
2566                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2567                 while(FLUSHPHASE == gcphase) {
2568                         // check the status of all cores
2569                         if(gc_checkCoreStatus()) {
2570                                 break;
2571                         }
2572                 } // while(FLUSHPHASE == gcphase)
2573                 gcphase = FINISHPHASE;
2574
2575                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
2576                 for(i = 1; i < NUMCORES; ++i) {
2577                         // send gc finish messages to all cores
2578                         send_msg_1(i, GCFINISH);
2579                         gccorestatus[i] = 1;
2580                 }
2581 #ifdef GC_DEBUG
2582                 tprintf("gc finished \n");
2583                 //dumpSMem();
2584 #endif
2585         } else {
2586                 gcprocessing = true;
2587                 gc_collect(stackptr);
2588         }
2589
2590         // invalidate all shared mem pointers
2591         bamboo_cur_msp = NULL;
2592         bamboo_smem_size = 0;
2593
2594         gcflag = false;
2595         gcprocessing = false;
2596
2597 } // void gc(struct garbagelist * stackptr)
2598
2599 #endif