change
[IRC.git] / Robust / src / Runtime / multicoregarbage.c
1 #ifdef MULTICORE_GC
2 #include "runtime.h"
3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9
10 extern int corenum;
11 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
12 extern int numqueues[][NUMCLASSES];
13
14 extern struct genhashtable * activetasks;
15 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
16 extern struct taskparamdescriptor *currtpd;
17
18 struct pointerblock {
19   void * ptrs[NUMPTRS];
20   struct pointerblock *next;
21 };
22
23 struct pointerblock *gchead=NULL;
24 int gcheadindex=0;
25 struct pointerblock *gctail=NULL;
26 int gctailindex=0;
27 struct pointerblock *gctail2=NULL;
28 int gctailindex2=0;
29 struct pointerblock *gcspare=NULL;
30
31 #define NUMLOBJPTRS 20
32
33 struct lobjpointerblock {
34   void * lobjs[NUMLOBJPTRS];
35         //void * dsts[NUMLOBJPTRS];
36         int lengths[NUMLOBJPTRS];
37         //void * origs[NUMLOBJPTRS];
38         int hosts[NUMLOBJPTRS];
39   struct lobjpointerblock *next;
40 };
41
42 struct lobjpointerblock *gclobjhead=NULL;
43 int gclobjheadindex=0;
44 struct lobjpointerblock *gclobjtail=NULL;
45 int gclobjtailindex=0;
46 struct lobjpointerblock *gclobjtail2=NULL;
47 int gclobjtailindex2=0;
48 struct lobjpointerblock *gclobjspare=NULL;
49
50 #ifdef GC_DEBUG
51 inline void dumpSMem() {
52         int block = 0;
53         int sblock = 0;
54         int j = 0;
55         int i = 0;
56         bool reverse = false;
57         int coren = 0;
58         int x = 0;
59         int y = 0;
60         tprintf("Dump shared mem: \n");
61         tprintf("++++ reserved sblocks ++++ \n");
62         for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
63                 tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
64             *((int *)(i)), *((int *)(i + 4)), *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
65                                                 *((int *)(i + 4*4)), *((int *)(i + 4*5)), *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
66                                                 *((int *)(i + 4*8)), *((int *)(i + 4*9)), *((int *)(i + 4*10)), *((int *)(i + 4*11)),
67                                                 *((int *)(i + 4*12)), *((int *)(i + 4*13)), *((int *)(i + 4*14)), *((int *)(i + 4*15)));
68         }
69         sblock = gcreservedsb;
70         bool advanceblock = false;
71         for(i=gcbaseva;i<BAMBOO_BASE_VA+BAMBOO_SHARED_MEM_SIZE;i+=4*16){
72                 advanceblock = false;
73                 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
74                         if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
75                                 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
76                                         block++;
77                                         advanceblock = true;
78                                 }
79                         } else {
80                                 block++;
81                                 advanceblock = true;
82                         }
83                         if(advanceblock) {
84                                 if(block%NUMCORES == 0) {
85                                         reverse = !reverse;
86                                 } else {
87                                         if(((!reverse) && (y%2)) || (reverse && ((y%2)==0))) {
88                                                 if((x == 0) || ((x==1)&&(y==6))){
89                                                         if(reverse) {
90                                                                 if(coren > 48) {
91                                                                         coren -= bamboo_width - 1;
92                                                                 } else {
93                                                                         coren -= bamboo_width;
94                                                                 }
95                                                         } else {
96                                                                 if(coren > 47) {
97                                                                         coren += bamboo_width - 1;
98                                                                 } else {
99                                                                         coren += bamboo_width;
100                                                                 }
101                                                         }
102                                                 } else {
103                                                         coren--;
104                                                 }
105                                         } else {
106                                                 if(x==7) {
107                                                         if(reverse) {
108                                                                 if(coren > 48) {
109                                                                         coren -= bamboo_width - 1;
110                                                                 } else {
111                                                                         coren -= bamboo_width;
112                                                                 }
113                                                         } else {
114                                                                 if(coren > 47) {
115                                                                         coren += bamboo_width-1;
116                                                                 } else {
117                                                                         coren += bamboo_width;
118                                                                 }
119                                                         }
120                                                 } else {
121                                                         coren++;
122                                                 }
123                                         }
124                                 }
125                         }
126                         int tmpcore = coren;
127                         if(NUMCORES==62) {
128                                 if(tmpcore > 54) {
129                                         tmpcore+=2;
130                                 } else if(tmpcore > 47) {
131                                         tmpcore++;
132                                 }
133                         }
134                         x = tmpcore%bamboo_width;
135                         y = tmpcore/bamboo_width;
136                         tprintf("==== %d, %d : core (%d,%d), saddr %x====\n", block, sblock++, 
137                                             x, y, (sblock-1)*(BAMBOO_SMEM_SIZE)+BAMBOO_BASE_VA);
138                 }
139                 j++;
140     tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
141             *((int *)(i)), *((int *)(i + 4)), *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
142                                                 *((int *)(i + 4*4)), *((int *)(i + 4*5)), *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
143                                                 *((int *)(i + 4*8)), *((int *)(i + 4*9)), *((int *)(i + 4*10)), *((int *)(i + 4*11)),
144                                                 *((int *)(i + 4*12)), *((int *)(i + 4*13)), *((int *)(i + 4*14)), *((int *)(i + 4*15)));
145         }
146         tprintf("\n");
147 }
148 #endif
149
150 // should be invoked with interruption closed
151 inline void gc_enqueue_I(void *ptr) {
152 #ifdef DEBUG
153         BAMBOO_DEBUGPRINT(0xe601);
154         BAMBOO_DEBUGPRINT_REG(ptr);
155 #endif
156   if (gcheadindex==NUMPTRS) {
157     struct pointerblock * tmp;
158     if (gcspare!=NULL) {
159       tmp=gcspare;
160       gcspare=NULL;
161     } else {
162       tmp=RUNMALLOC_I(sizeof(struct pointerblock));
163                 } // if (gcspare!=NULL)
164     gchead->next=tmp;
165     gchead=tmp;
166     gcheadindex=0;
167   } // if (gcheadindex==NUMPTRS)
168   gchead->ptrs[gcheadindex++]=ptr;
169 #ifdef DEBUG
170         BAMBOO_DEBUGPRINT(0xe602);
171 #endif
172 } // void gc_enqueue_I(void *ptr)
173
174 // dequeue and destroy the queue
175 inline void * gc_dequeue() {
176   if (gctailindex==NUMPTRS) {
177     struct pointerblock *tmp=gctail;
178     gctail=gctail->next;
179     gctailindex=0;
180     if (gcspare!=NULL) {
181       RUNFREE(tmp);
182                 } else {
183       gcspare=tmp;
184                 } // if (gcspare!=NULL)
185   } // if (gctailindex==NUMPTRS)
186   return gctail->ptrs[gctailindex++];
187 } // void * gc_dequeue()
188
189 // dequeue and do not destroy the queue
190 inline void * gc_dequeue2() {
191         if (gctailindex2==NUMPTRS) {
192     struct pointerblock *tmp=gctail2;
193     gctail2=gctail2->next;
194     gctailindex2=0;
195   } // if (gctailindex2==NUMPTRS)
196   return gctail2->ptrs[gctailindex2++];
197 } // void * gc_dequeue2() 
198
199 inline int gc_moreItems() {
200   if ((gchead==gctail)&&(gctailindex==gcheadindex))
201     return 0;
202   return 1;
203 } // int gc_moreItems() 
204
205 inline int gc_moreItems2() {
206   if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
207     return 0;
208   return 1;
209 } // int gc_moreItems2()
210
211 // should be invoked with interruption closed
212 // enqueue a large obj: start addr & length
213 inline void gc_lobjenqueue_I(void *ptr, 
214                                          int length, 
215                                                                                          int host) {
216 #ifdef DEBUG
217         BAMBOO_DEBUGPRINT(0xe901);
218 #endif
219   if (gclobjheadindex==NUMLOBJPTRS) {
220     struct lobjpointerblock * tmp;
221     if (gclobjspare!=NULL) {
222       tmp=gclobjspare;
223       gclobjspare=NULL;
224     } else {
225       tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
226                 } // if (gclobjspare!=NULL)
227     gclobjhead->next=tmp;
228     gclobjhead=tmp;
229     gclobjheadindex=0;
230   } // if (gclobjheadindex==NUMLOBJPTRS)
231   gclobjhead->lobjs[gclobjheadindex]=ptr;
232         gclobjhead->lengths[gclobjheadindex]=length;
233         gclobjhead->hosts[gclobjheadindex++]=host;
234 #ifdef DEBUG
235         BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
236         BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
237         BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
238 #endif
239 } // void gc_lobjenqueue_I(void *ptr...)
240
241 // dequeue and destroy the queue
242 inline void * gc_lobjdequeue(int * length,
243                                          int * host) {
244   if (gclobjtailindex==NUMLOBJPTRS) {
245     struct lobjpointerblock *tmp=gclobjtail;
246     gclobjtail=gclobjtail->next;
247     gclobjtailindex=0;
248     if (gclobjspare!=NULL) {
249       RUNFREE(tmp);
250                 } else {
251       gclobjspare=tmp;
252                 } // if (gclobjspare!=NULL)
253   } // if (gclobjtailindex==NUMLOBJPTRS)
254         if(length != NULL) {
255                 *length = gclobjtail->lengths[gclobjtailindex];
256         }
257         if(host != NULL) {
258                 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
259         }
260   return gclobjtail->lobjs[gclobjtailindex++];
261 } // void * gc_lobjdequeue()
262
263 inline int gc_lobjmoreItems() {
264   if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
265     return 0;
266   return 1;
267 } // int gc_lobjmoreItems()
268
269 // dequeue and don't destroy the queue
270 inline void gc_lobjdequeue2() {
271   if (gclobjtailindex2==NUMLOBJPTRS) {
272     gclobjtail2=gclobjtail2->next;
273     gclobjtailindex2=1;
274   } else {
275                 gclobjtailindex2++;
276         }// if (gclobjtailindex2==NUMLOBJPTRS)
277 } // void * gc_lobjdequeue2()
278
279 inline int gc_lobjmoreItems2() {
280   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
281     return 0;
282   return 1;
283 } // int gc_lobjmoreItems2()
284
285 INTPTR gccurr_heapbound = 0;
286
287 inline void gettype_size(void * ptr, 
288                                      int * ttype, 
289                                                                          int * tsize) {
290         int type = ((int *)ptr)[0];
291         int size = 0;
292         if(type < NUMCLASSES) {
293                 // a normal object
294                 size = classsize[type];
295         } else {        
296                 // an array 
297                 struct ArrayObject *ao=(struct ArrayObject *)ptr;
298                 int elementsize=classsize[type];
299                 int length=ao->___length___; 
300                 size=sizeof(struct ArrayObject)+length*elementsize;
301         } // if(type < NUMCLASSES)
302         *ttype = type;
303         *tsize = size;
304 }
305
306 inline bool isLarge(void * ptr, 
307                                 int * ttype, 
308                                                                                 int * tsize) {
309 #ifdef DEBUG
310                 BAMBOO_DEBUGPRINT(0xe701);
311                 BAMBOO_DEBUGPRINT_REG(ptr);
312 #endif
313         // check if a pointer is referring to a large object
314         gettype_size(ptr, ttype, tsize);
315 #ifdef DEBUG
316         BAMBOO_DEBUGPRINT(*tsize);
317 #endif
318         int bound = (BAMBOO_SMEM_SIZE);
319         if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
320                 bound = (BAMBOO_SMEM_SIZE_L);
321         }
322         if((((int)ptr-gcbaseva)%(bound))==0) {
323                 // ptr is a start of a block
324 #ifdef DEBUG
325                 BAMBOO_DEBUGPRINT(0xe702);
326                 BAMBOO_DEBUGPRINT(1);
327 #endif
328                 return true;
329         }
330         if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
331                 // it acrosses the boundary of current block
332 #ifdef DEBUG
333                 BAMBOO_DEBUGPRINT(0xe703);
334                 BAMBOO_DEBUGPRINT(1);
335 #endif
336                 return true;
337         }
338 #ifdef DEBUG
339                 BAMBOO_DEBUGPRINT(0);
340 #endif
341         return false;
342 } // bool isLarge(void * ptr, int * ttype, int * tsize)
343
344 inline int hostcore(void * ptr) {
345         // check the host core of ptr
346         int host = 0;
347         int x = 0;
348         int y = 0;
349         RESIDECORE(ptr, &x, &y);
350         host = (x==0)?(x*bamboo_height+y):(x*bamboo_height+y-2);
351         return host;
352 } // int hostcore(void * ptr)
353
354 inline bool isLocal(void * ptr) {
355         // check if a pointer is in shared heap on this core
356         return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
357 } // bool isLocal(void * ptr)
358
359 inline bool gc_checkCoreStatus() {
360         bool allStall = true;
361         for(int i = 0; i < NUMCORES; ++i) {
362                 if(gccorestatus[i] != 0) {
363                         allStall = false;
364                         break;
365                 } // if(gccorestatus[i] != 0)
366         } // for(i = 0; i < NUMCORES; ++i)
367         return allStall;
368 }
369
370 inline void checkMarkStatue() {
371 #ifdef DEBUG
372         BAMBOO_DEBUGPRINT(0xee01);
373 #endif
374         int i;
375         if((!waitconfirm) || 
376                         (waitconfirm && (numconfirm == 0))) {
377 #ifdef DEBUG
378                 BAMBOO_DEBUGPRINT(0xee02);
379 #endif
380                 BAMBOO_START_CRITICAL_SECTION_STATUS();  
381                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
382                 gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
383                 gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
384                 // check the status of all cores
385                 bool allStall = gc_checkCoreStatus();
386 #ifdef DEBUG
387                 BAMBOO_DEBUGPRINT(0xee03);
388 #endif
389                 if(allStall) {
390 #ifdef DEBUG
391                         BAMBOO_DEBUGPRINT(0xee04);
392 #endif
393                         // check if the sum of send objs and receive obj are the same
394                         // yes->check if the info is the latest; no->go on executing
395                         int sumsendobj = 0;
396                         for(i = 0; i < NUMCORES; ++i) {
397                                 sumsendobj += gcnumsendobjs[i];
398                         } // for(i = 0; i < NUMCORES; ++i) 
399 #ifdef DEBUG
400                         BAMBOO_DEBUGPRINT(0xee05);
401                         BAMBOO_DEBUGPRINT_REG(sumsendobj);
402 #endif
403                         for(i = 0; i < NUMCORES; ++i) {
404                                 sumsendobj -= gcnumreceiveobjs[i];
405                         } // for(i = 0; i < NUMCORES; ++i) 
406 #ifdef DEBUG
407                         BAMBOO_DEBUGPRINT(0xee06);
408                         BAMBOO_DEBUGPRINT_REG(sumsendobj);
409 #endif
410                         if(0 == sumsendobj) {
411 #ifdef DEBUG
412                                 BAMBOO_DEBUGPRINT(0xee07);
413 #endif
414                                 if(!waitconfirm) {
415 #ifdef DEBUG
416                                         BAMBOO_DEBUGPRINT(0xee08);
417 #endif
418                                         // the first time found all cores stall
419                                         // send out status confirm msg to all other cores
420                                         // reset the corestatus array too
421                                         gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
422                                         waitconfirm = true;
423                                         numconfirm = NUMCORES - 1;
424                                         for(i = 1; i < NUMCORES; ++i) { 
425                                                 gccorestatus[i] = 1;
426                                                 // send mark phase finish confirm request msg to core i
427                                                 send_msg_1(i, GCMARKCONFIRM);
428                                         } // for(i = 1; i < NUMCORES; ++i) 
429                                 } else {
430 #ifdef DEBUG
431                                         BAMBOO_DEBUGPRINT(0xee09);
432 #endif
433                                         // all the core status info are the latest
434                                         // stop mark phase
435                                         gcphase = COMPACTPHASE;
436                                         // restore the gcstatus for all cores
437                                         for(i = 0; i < NUMCORES; ++i) {
438                                                 gccorestatus[i] = 1;
439                                         } // for(i = 0; i < NUMCORES; ++i)
440                                 } // if(!gcwautconfirm) else()
441                         } // if(0 == sumsendobj)
442                 } // if(allStall)
443                 BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
444         } // if((!waitconfirm)...
445 #ifdef DEBUG
446         BAMBOO_DEBUGPRINT(0xee0a);
447 #endif
448 } // void checkMarkStatue()
449
450 inline bool preGC() {
451         // preparation for gc
452         // make sure to clear all incoming msgs espacially transfer obj msgs
453 #ifdef DEBUG
454         BAMBOO_DEBUGPRINT(0xec01);
455 #endif
456         int i;
457         if((!waitconfirm) || 
458                                                   (waitconfirm && (numconfirm == 0))) {
459                 // send out status confirm msgs to all cores to check if there are
460                 // transfer obj msgs on-the-fly
461                 waitconfirm = true;
462                 numconfirm = NUMCORES - 1;
463                 for(i = 1; i < NUMCORES; ++i) { 
464                         corestatus[i] = 1;
465                         // send status confirm msg to core i
466                         send_msg_1(i, STATUSCONFIRM);
467                 } // for(i = 1; i < NUMCORES; ++i)
468
469 #ifdef DEBUG
470                 BAMBOO_DEBUGPRINT(0xec02);
471 #endif
472                 while(true) {
473                         if(numconfirm == 0) {
474                                 break;
475                         }
476                 } // wait for confirmations
477                 waitconfirm = false;
478                 numconfirm = 0;
479 #ifdef DEBUG
480                 BAMBOO_DEBUGPRINT(0xec03);
481 #endif
482                 numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
483                 numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
484                 int sumsendobj = 0;
485 #ifdef DEBUG
486                 BAMBOO_DEBUGPRINT(0xec04);
487 #endif
488                 for(i = 0; i < NUMCORES; ++i) {
489                         sumsendobj += numsendobjs[i];
490 #ifdef DEBUG
491                         BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
492 #endif
493                 } // for(i = 1; i < NUMCORES; ++i)
494 #ifdef DEBUG
495         BAMBOO_DEBUGPRINT(0xec05);
496         BAMBOO_DEBUGPRINT_REG(sumsendobj);
497 #endif
498                 for(i = 0; i < NUMCORES; ++i) {
499                         sumsendobj -= numreceiveobjs[i];
500 #ifdef DEBUG
501                         BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
502 #endif
503                 } // for(i = 1; i < NUMCORES; ++i)
504 #ifdef DEBUG
505                 BAMBOO_DEBUGPRINT(0xec06);
506                 BAMBOO_DEBUGPRINT_REG(sumsendobj);
507 #endif
508                 if(0 == sumsendobj) {
509                         return true;
510                 } else {
511                         // still have some transfer obj msgs on-the-fly, can not start gc
512                         return false;
513                 } // if(0 == sumsendobj) 
514         } else {
515 #ifdef DEBUG
516                 BAMBOO_DEBUGPRINT(0xec07);
517 #endif
518                 // previously asked for status confirmation and do not have all the 
519                 // confirmations yet, can not start gc
520                 return false;
521         } // if((!waitconfirm) || 
522 } // bool preGC()
523
524 inline void initGC() {
525         int i;
526         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
527                 for(i = 0; i < NUMCORES; ++i) {
528                         gccorestatus[i] = 1;
529                         gcnumsendobjs[i] = 0; 
530                         gcnumreceiveobjs[i] = 0;
531                         gcloads[i] = 0;
532                         gcrequiredmems[i] = 0;
533                         gcfilledblocks[i] = 0;
534                         gcstopblock[i] = 0;
535                 } // for(i = 0; i < NUMCORES; ++i)
536                 gcheaptop = 0;
537                 gctopcore = 0;
538                 gcheapdirection = 1;
539         }
540         gcself_numsendobjs = 0;
541         gcself_numreceiveobjs = 0;
542         gcmarkedptrbound = 0;
543         gcobj2map = 0;
544         gcmappedobj = 0;
545         gcismapped = false;
546         gcnumlobjs = 0;
547         gcmovestartaddr = 0;
548         gctomove = false;
549         gcblock2fill = 0;
550         gcmovepending = 0;
551         gccurr_heaptop = 0;
552         gcdstcore = 0;
553
554         // initialize queue
555         if (gchead==NULL) {
556                 gcheadindex=gctailindex=gctailindex2 = 0;
557                 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
558         } else {
559                 gctailindex = gctailindex2 = gcheadindex;
560                 gctail = gctail2 = gchead;
561         }
562
563         // initialize the large obj queues
564         if (gclobjhead==NULL) {
565                 gclobjheadindex=0;
566                 gclobjtailindex=0;
567                 gclobjtailindex2 = 0;
568                 gclobjhead=gclobjtail=gclobjtail2=
569                         RUNMALLOC(sizeof(struct lobjpointerblock));
570         } else {
571                 gclobjtailindex = gclobjtailindex2 = gclobjheadindex;
572                 gclobjtail = gclobjtail2 = gclobjhead;
573         }
574
575         freeRuntimeHash(gcpointertbl);
576         gcpointertbl = allocateRuntimeHash(20);
577 } // void initGC()
578
579 // compute load balance for all cores
580 inline int loadbalance() {
581         // compute load balance
582         int i;
583
584         // get the total loads
585         int tloads = gcloads[STARTUPCORE];
586         for(i = 1; i < NUMCORES; i++) {
587                 tloads += gcloads[i];
588         }
589         int heaptop = gcbaseva + tloads;
590 #ifdef DEBUG
591         BAMBOO_DEBUGPRINT(0xdddd);
592         BAMBOO_DEBUGPRINT_REG(tloads);
593         BAMBOO_DEBUGPRINT_REG(heaptop);
594 #endif
595         int b = 0;
596         BLOCKINDEX(heaptop, &b);
597         int numbpc = b / NUMCORES; // num of blocks per core
598 #ifdef DEBUG
599         BAMBOO_DEBUGPRINT_REG(b);
600         BAMBOO_DEBUGPRINT_REG(numbpc);
601 #endif
602         gcheapdirection = (numbpc%2 == 0);
603         int x = 0;
604         int y = 0;
605         RESIDECORE(heaptop, &x, &y);
606         gctopcore = (x == 0 ? y : x * bamboo_height + y - 2);
607 #ifdef DEBUG
608         BAMBOO_DEBUGPRINT_REG(x);
609         BAMBOO_DEBUGPRINT_REG(y);
610         BAMBOO_DEBUGPRINT_REG(gctopcore);
611 #endif
612         return numbpc;
613 } // void loadbalance()
614
615 inline bool cacheLObjs() {
616         // check the total mem size need for large objs
617         int sumsize = 0;
618         int size = 0;
619 #ifdef GC_DEBUG
620         BAMBOO_DEBUGPRINT(0xe801);
621 #endif
622         gclobjtail2 = gclobjtail;
623         gclobjtailindex2 = gclobjtailindex;
624         while(gc_lobjmoreItems2()){
625                 gc_lobjdequeue2();
626                 size = gclobjtail2->lengths[gclobjtailindex2 - 1];
627                 sumsize += size;
628 #ifdef GC_DEBUG
629                 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
630                 BAMBOO_DEBUGPRINT_REG(size);
631                 BAMBOO_DEBUGPRINT_REG(sumsize);
632 #endif
633         } // while(gc_lobjmoreItems2())
634
635         // check if there are enough space to cache these large objs
636         INTPTR dst = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) - sumsize;
637         if(gcheaptop > dst) {
638                 // do not have enough room to cache large objs
639                 return false;
640         }
641 #ifdef GC_DEBUG
642         BAMBOO_DEBUGPRINT(0xe802);
643         BAMBOO_DEBUGPRINT_REG(dst);
644 #endif
645
646         gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
647         // cache the largeObjs to the top of the shared heap
648         gclobjtail2 = gclobjtail;
649         gclobjtailindex2 = gclobjtailindex;
650         while(gc_lobjmoreItems2()) {
651                 gc_lobjdequeue2();
652                 size = gclobjtail2->lengths[gclobjtailindex2 - 1];
653                 // set the mark field to 2, indicating that this obj has been moved and need to be flushed
654                 ((int *)(gclobjtail2->lobjs[gclobjtailindex2-1]))[6] = 2;
655                 // TODO
656                 BAMBOO_DEBUGPRINT(0xdcdc);
657                 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2 - 1], size);
658                 dst += size;
659 #ifdef GC_DEBUG
660                 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
661                 BAMBOO_DEBUGPRINT(dst-size);
662                 BAMBOO_DEBUGPRINT_REG(size);
663 #endif
664         }
665         return true;
666 } // void cacheLObjs()
667
668 inline struct freeMemItem * updateFreeMemList(int localtop,
669                                                           int localsize,
670                                                                                                                                         struct freeMemItem * listtop,
671                                                                                                                                                                                         int * returntop) {
672         struct freeMemItem * tochange = listtop;
673         struct freeMemItem * tmp = bamboo_free_mem_list->head;
674         bool extendflag = false;
675         struct freeMemItem * ex_tmp = NULL;
676         // check if there is a hole in the block below it
677         while(true) {
678                 if(tmp->ptr<localtop) {
679                         if((tmp->ptr+tmp->size) == localtop) {
680                                 // extend the hole up to includ this block
681                                 tmp->size += localsize;
682                                 extendflag = true;
683                                 *returntop = tmp->ptr;
684                                 break;
685                         } // if((tmp->ptr+tmp->size) == localtop)
686                 } else {
687                         break;
688                 } // if(tmp->ptr<gcloads[i]) else ...
689                 if(tmp == tochange) {
690                         break;
691                 } else {
692                         tmp = tmp->next;
693                 } // if(tmp == tochange)
694         } // while(true)
695         if((extendflag) && (tmp != tochange)) {
696                 ex_tmp = tmp;
697                 tmp = tmp->next;
698         } // if(tmp->ptr<gcloads[i])
699         if(tmp != tochange) {
700                 while(true) {
701                         if((localtop+localsize) == tmp->ptr) {
702                                 // extend the hole below to include this block
703                                 extendflag = true;
704                                 if(ex_tmp == NULL) {
705                                         tmp->ptr = localtop;
706                                         tmp->size += localsize;
707                                         *returntop = localtop;
708                                 } else {
709                                         ex_tmp->size += tmp->size;
710                                         tmp->ptr = tmp->next->ptr;
711                                         tmp->size = tmp->next->size;
712                                         if(tmp->next == tochange) {
713                                                 tochange = tmp;
714                                         }
715                                         ex_tmp = tmp->next;
716                                         tmp->next = tmp->next->next;
717                                         RUNFREE(ex_tmp);
718                                         ex_tmp = NULL;
719                                 }
720                                 break;
721                         }
722                         if(tmp == tochange) {
723                                 break;
724                         } else {
725                                 tmp = tmp->next;
726                         } // if(tmp == tochange)
727                 } // while(true)
728         } // if(tmp != tochange)
729         if((!extendflag) && (tmp == tochange)) {
730                 // add a new item for this block hole
731                 if(tochange->next == NULL) {
732                         tochange->next = 
733                                 (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
734                 }
735                 tochange = tochange->next;
736                 tochange->ptr = localtop;
737                 tochange->size = localsize;
738                 *returntop = localtop;
739         } // if((!extendflag) && (tmp == tochange))
740         return tochange;
741 } // void updateFreeMemList(int, int, struct freeMemItem *, int *)
742
743 inline void moveLObjs() {
744 #ifdef DEBUG
745         BAMBOO_DEBUGPRINT(0xea01);
746 #endif
747         int remain = 0;
748         int bound = BAMBOO_SMEM_SIZE_L;
749         struct freeMemItem * tochange = bamboo_free_mem_list->head;
750         if(tochange == NULL) {
751                 bamboo_free_mem_list->head = tochange = 
752                         (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
753         }
754         // find current heap top
755         // flush all gcloads to indicate the real heap top on one core
756         // previous it represents the next available ptr on a core
757         if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L))) 
758                         && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
759                 // edge of a block, check if this is exactly the heaptop
760                 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
761                 gcloads[0]+=(gcfilledblocks[0]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
762         } else {
763                 // in the middle of a block, flush the remaining space in this block
764                 // and update it into the free mem list
765                 if(gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L))) {
766                         bound = BAMBOO_SMEM_SIZE;
767                 }
768                 remain = bound - gcloads[0]%bound;
769                 tochange->ptr = gcloads[0];
770                 tochange->size = remain;
771                 // zero out all these spare memory
772                 memset(tochange->ptr, '\0', tochange->size);
773         }
774         int tmpheaptop = gcloads[0];
775 #ifdef DEBUG
776         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
777 #endif
778         for(int i = 1; i < NUMCORES; i++) {
779                 int tmptop = 0;
780                 if((gcfilledblocks[i] > 0) 
781                                 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
782                         // edge of a block, check if this is exactly the heaptop
783                         BASEPTR(0, gcfilledblocks[i]-1, &gcloads[i]);
784                         gcloads[i]
785                                 +=(gcfilledblocks[i]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
786                         tmptop = gcloads[i];
787                 } else {
788                         // in the middle of a block, flush the remaining space in this block
789                         // and update it into the free mem list
790                         if(gcfilledblocks[i] > 0) {
791                                 bound = BAMBOO_SMEM_SIZE;
792                         } else {
793                                 bound = BAMBOO_SMEM_SIZE_L;
794                         }
795                         remain = bound - gcloads[i]%bound;
796                         // zero out all these spare memory
797                         memset(gcloads[i], '\0', remain);
798                         // update free mem list
799                         tochange = updateFreeMemList(gcloads[i], remain, tochange, &tmptop);
800                 } // if((gcfilledblocks[i] > 0)
801
802                 if(tmpheaptop < tmptop) {
803                         tmpheaptop = tmptop;
804                 }
805 #ifdef DEBUG
806                 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
807                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
808 #endif
809         }
810         tochange->ptr = tmpheaptop;
811         tochange->size = gcheaptop - tmpheaptop;
812         // zero out all these spare memory
813         memset(tochange->ptr, '\0', tochange->size);
814         if(bamboo_free_mem_list->tail != tochange) {
815                 bamboo_free_mem_list->tail = tochange;
816         }
817         while(tochange->next != NULL) {
818                 struct freeMemItem * toremove = tochange->next;
819                 tochange->next = toremove->next;
820                 RUNFREE(toremove);
821         }
822         // move large objs from gcheaptop to tmpheaptop
823         // write the header first
824         int tomove = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) - gcheaptop;
825 #ifdef DEBUG
826         BAMBOO_DEBUGPRINT(0xea02);
827         BAMBOO_DEBUGPRINT_REG(tomove);
828         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
829         BAMBOO_DEBUGPRINT_REG(gcheaptop);
830 #endif
831         // flush the sbstartbl
832         memset(&(gcsbstarttbl[gcreservedsb]), '\0', 
833                            BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE*sizeof(INTPTR));
834         if(tomove == 0) {
835                 gcheaptop = tmpheaptop;
836                 return;
837         }
838         // check how many blocks it acrosses
839         remain = tmpheaptop-gcbaseva;
840         int b = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;
841         // check the remaining space in this block
842         bound = (BAMBOO_SMEM_SIZE);
843         if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
844                 bound = (BAMBOO_SMEM_SIZE_L);
845         }
846         remain = bound - remain%bound;
847
848 #ifdef DEBUG
849         BAMBOO_DEBUGPRINT(0xea03);
850 #endif
851         int size = 0;
852         int isize = 0;
853         int host = 0;
854         int ptr = 0;
855         int base = tmpheaptop;
856         int cpysize = 0;
857         remain -= BAMBOO_CACHE_LINE_SIZE;
858         tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
859         while(gc_lobjmoreItems()) {
860                 ptr = (int)(gc_lobjdequeue(&size, &host));
861                 ALIGNSIZE(size, &isize);
862                 if(remain < isize) {
863                         // this object acrosses blocks
864                         if(cpysize > 0) {
865                                 // close current block, fill its header
866                                 memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
867                                 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
868                                 cpysize = 0;
869                                 base = tmpheaptop;
870                                 if(remain == 0) {
871                                         remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
872                                                        BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
873                                 } 
874                                 remain -= BAMBOO_CACHE_LINE_SIZE;
875                                 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
876                         } // if(cpysize > 0)
877
878                         // move the large obj
879                         memcpy(tmpheaptop, gcheaptop, size);
880                         // fill the remaining space with -2 padding
881                         memset(tmpheaptop+size, -2, isize-size);
882                         // zero out original mem caching the lobj
883                         memset(gcheaptop, '\0', size);
884 #ifdef DEBUG
885                         BAMBOO_DEBUGPRINT(0xea04);
886                         BAMBOO_DEBUGPRINT_REG(gcheaptop);
887                         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
888                         BAMBOO_DEBUGPRINT_REG(size);
889                         BAMBOO_DEBUGPRINT_REG(isize);
890 #endif
891                         gcheaptop += size;
892                         if(host == BAMBOO_NUM_OF_CORE) {
893                                 BAMBOO_START_CRITICAL_SECTION();
894                                 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
895                                 BAMBOO_CLOSE_CRITICAL_SECTION();
896 #ifdef DEBUG
897                                 BAMBOO_DEBUGPRINT(0xcdca);
898                                 BAMBOO_DEBUGPRINT_REG(ptr);
899                                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
900 #endif
901                         } else {
902                                 // send the original host core with the mapping info
903                                 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
904 #ifdef DEBUG
905                                 BAMBOO_DEBUGPRINT(0xcdcb);
906                                 BAMBOO_DEBUGPRINT_REG(ptr);
907                                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
908 #endif
909                         } // if(host == BAMBOO_NUM_OF_CORE) else ...
910                         tmpheaptop += isize;
911
912                         // set the gcsbstarttbl
913                         int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
914                         for(int k = 1; k < tmpsbs; k++) {
915                                 gcsbstarttbl[b+k] = (INTPTR)(-1);
916                         }
917                         b += tmpsbs;
918                         if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
919                                 gcsbstarttbl[b] = (INTPTR)(-1);
920                                 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
921                                                      BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
922                         } else {
923                                 gcsbstarttbl[b] = (INTPTR)(tmpheaptop);
924                                 remain = tmpheaptop-gcbaseva;
925                                 int bound = remain<(BAMBOO_LARGE_SMEM_BOUND)?(BAMBOO_SMEM_SIZE_L):(BAMBOO_SMEM_SIZE);
926                                 remain = bound - remain%bound;
927                         } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
928
929                         // close current block and fill the header
930                         memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
931                         *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
932                         cpysize = 0;
933                         base = tmpheaptop;
934                         remain -= BAMBOO_CACHE_LINE_SIZE;
935                         tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
936                 } else {
937                         remain -= isize;
938                         // move the large obj
939                         memcpy(tmpheaptop, gcheaptop, size);
940                         // fill the remaining space with -2 padding
941                         memset(tmpheaptop+size, -2, isize-size);
942 #ifdef DEBUG
943                         BAMBOO_DEBUGPRINT(0xea05);
944                         BAMBOO_DEBUGPRINT_REG(gcheaptop);
945                         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
946                         BAMBOO_DEBUGPRINT_REG(size);
947                         BAMBOO_DEBUGPRINT_REG(isize);
948 #endif
949                         gcheaptop += size;
950                         cpysize += isize;
951                         if(host == BAMBOO_NUM_OF_CORE) {
952                                 BAMBOO_START_CRITICAL_SECTION();
953                                 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
954                                 BAMBOO_CLOSE_CRITICAL_SECTION();
955 #ifdef DEBUG
956                                 BAMBOO_DEBUGPRINT(0xcdcc);
957                                 BAMBOO_DEBUGPRINT_REG(ptr);
958                                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
959 #endif
960                         } else {
961                                 // send the original host core with the mapping info
962                                 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
963 #ifdef DEBUG
964                                 BAMBOO_DEBUGPRINT(0xcdcd);
965                                 BAMBOO_DEBUGPRINT_REG(ptr);
966                                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
967 #endif
968                         } // if(host == BAMBOO_NUM_OF_CORE) else ...
969                         tmpheaptop += isize;
970                 } // if(remain < isize) else ...
971         } // while(gc_lobjmoreItems())
972         if(cpysize > 0) {
973                 // close current block, fill the header
974                 memset(base, '\0', BAMBOO_CACHE_LINE_SIZE);
975                 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
976         } else {
977                 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
978         }
979         gcheaptop = tmpheaptop;
980         // update the free mem list
981         tochange->size = (BAMBOO_BASE_VA)+(BAMBOO_SHARED_MEM_SIZE)-gcheaptop;
982         tochange->ptr = gcheaptop;
983 #ifdef DEBUG
984         BAMBOO_DEBUGPRINT(0xea06);
985         BAMBOO_DEBUGPRINT_REG(gcheaptop);
986 #endif
987 } // void moveLObjs()
988
989 /*inline void updateFreeMemList() {
990         struct freeMemItem * tochange = bamboo_free_mem_list->head;
991         if(tochange == NULL) {
992                 bamboo_free_mem_list->head = tochange = 
993                         (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
994         }
995         // handle the top of the heap
996         tochange->ptr = gcheaptop;
997         tochange->size = BAMBOO_SHARED_MEM_SIZE + BAMBOO_BASE_VA - gcheaptop;
998         // zero out all these spare memory
999         memset(tochange->ptr, '\0', tochange->size);
1000         if(bamboo_free_mem_list->tail != tochange) {
1001                 bamboo_free_mem_list->tail = tochange;
1002                 if(bamboo_free_mem_list->tail != NULL) {
1003                         RUNFREE(bamboo_free_mem_list->tail);
1004                 }
1005         }
1006 } // void updateFreeMemList()
1007 */
1008
1009 // enqueue root objs
1010 inline void tomark(struct garbagelist * stackptr) {
1011         if(MARKPHASE != gcphase) {
1012 #ifdef DEBUG
1013                 BAMBOO_DEBUGPRINT_REG(gcphase);
1014 #endif
1015                 BAMBOO_EXIT(0xb101);
1016         }
1017         gcbusystatus = true;
1018         gcnumlobjs = 0;
1019         
1020         int i,j;
1021         // enqueue current stack 
1022         while(stackptr!=NULL) {
1023 #ifdef DEBUG
1024                 BAMBOO_DEBUGPRINT(0xe501);
1025                 BAMBOO_DEBUGPRINT_REG(stackptr->size);
1026                 BAMBOO_DEBUGPRINT_REG(stackptr->next);
1027                 BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1028 #endif
1029                 for(i=0; i<stackptr->size; i++) {
1030                         if(stackptr->array[i] != NULL) {
1031                                 BAMBOO_START_CRITICAL_SECTION();
1032                                 gc_enqueue_I(stackptr->array[i]);
1033                                 BAMBOO_CLOSE_CRITICAL_SECTION();
1034                         }
1035                 }
1036                 stackptr=stackptr->next;
1037         }
1038
1039 #ifdef DEBUG
1040         BAMBOO_DEBUGPRINT(0xe503);
1041 #endif
1042         // enqueue objectsets
1043         for(i=0; i<NUMCLASSES; i++) {
1044                 struct parameterwrapper ** queues = 
1045                         objectqueues[BAMBOO_NUM_OF_CORE][i];
1046                 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1047                 for(j = 0; j < length; ++j) {
1048                         struct parameterwrapper * parameter = queues[j];
1049                         struct ObjectHash * set=parameter->objectset;
1050                         struct ObjectNode * ptr=set->listhead;
1051                         while(ptr!=NULL) {
1052                                 BAMBOO_START_CRITICAL_SECTION();
1053                                 gc_enqueue_I((void *)ptr->key);
1054                                 BAMBOO_CLOSE_CRITICAL_SECTION();
1055                                 ptr=ptr->lnext;
1056                         }
1057                 }
1058         }
1059
1060         // euqueue current task descriptor
1061         if(currtpd != NULL) {
1062 #ifdef DEBUG
1063                 BAMBOO_DEBUGPRINT(0xe504);
1064 #endif
1065                 for(i=0; i<currtpd->numParameters; i++) {
1066                         BAMBOO_START_CRITICAL_SECTION();
1067                         gc_enqueue_I(currtpd->parameterArray[i]);
1068                         BAMBOO_CLOSE_CRITICAL_SECTION();
1069                 }
1070         }
1071
1072 #ifdef DEBUG
1073         BAMBOO_DEBUGPRINT(0xe505);
1074 #endif
1075         // euqueue active tasks
1076         struct genpointerlist * ptr=activetasks->list;
1077         while(ptr!=NULL) {
1078                 struct taskparamdescriptor *tpd=ptr->src;
1079                 int i;
1080                 for(i=0; i<tpd->numParameters; i++) {
1081                         BAMBOO_START_CRITICAL_SECTION();
1082                         gc_enqueue_I(tpd->parameterArray[i]);
1083                         BAMBOO_CLOSE_CRITICAL_SECTION();
1084                 }
1085                 ptr=ptr->inext;
1086         }
1087
1088 #ifdef DEBUG
1089         BAMBOO_DEBUGPRINT(0xe506);
1090 #endif
1091         // enqueue cached transferred obj
1092         struct QueueItem * tmpobjptr =  getHead(&objqueue);
1093         while(tmpobjptr != NULL) {
1094                 struct transObjInfo * objInfo = 
1095                         (struct transObjInfo *)(tmpobjptr->objectptr); 
1096                 BAMBOO_START_CRITICAL_SECTION();
1097                 gc_enqueue_I(objInfo->objptr);
1098                 BAMBOO_CLOSE_CRITICAL_SECTION();
1099                 tmpobjptr = getNextQueueItem(tmpobjptr);
1100         }
1101
1102 #ifdef DEBUG
1103         BAMBOO_DEBUGPRINT(0xe507);
1104 #endif
1105         // enqueue cached objs to be transferred
1106         struct QueueItem * item = getHead(totransobjqueue);
1107         while(item != NULL) {
1108                 struct transObjInfo * totransobj = 
1109                         (struct transObjInfo *)(item->objectptr);
1110                 BAMBOO_START_CRITICAL_SECTION();
1111                 gc_enqueue_I(totransobj->objptr);
1112                 BAMBOO_CLOSE_CRITICAL_SECTION();
1113                 item = getNextQueueItem(item);
1114         } // while(item != NULL)
1115 } // void tomark(struct garbagelist * stackptr)
1116
1117 inline void markObj(void * objptr) {
1118         if(objptr == NULL) {
1119                 return;
1120         }
1121         if(ISSHAREDOBJ(objptr)) {
1122                 int host = hostcore(objptr);
1123                 if(BAMBOO_NUM_OF_CORE == host) {
1124                         // on this core
1125                         BAMBOO_START_CRITICAL_SECTION();
1126                         gc_enqueue_I(objptr);  
1127                         BAMBOO_CLOSE_CRITICAL_SECTION();
1128                 } else {
1129 #ifdef DEBUG
1130                         BAMBOO_DEBUGPRINT(0xbbbb);
1131                         BAMBOO_DEBUGPRINT_REG(host);
1132                         BAMBOO_DEBUGPRINT_REG(objptr);
1133 #endif
1134                         // send a msg to host informing that objptr is active
1135                         send_msg_2(host, GCMARKEDOBJ, objptr);
1136                         gcself_numsendobjs++;
1137                 }
1138         } else {
1139                 BAMBOO_START_CRITICAL_SECTION();
1140                 gc_enqueue_I(objptr);
1141                 BAMBOO_CLOSE_CRITICAL_SECTION();
1142         } // if(ISSHAREDOBJ(objptr))
1143 } // void markObj(void * objptr) 
1144
1145 inline void mark(bool isfirst, 
1146                              struct garbagelist * stackptr) {
1147 #ifdef DEBUG
1148         BAMBOO_DEBUGPRINT(0xed01);
1149 #endif
1150         if(isfirst) {
1151 #ifdef DEBUG
1152                 BAMBOO_DEBUGPRINT(0xed02);
1153 #endif
1154                 // enqueue root objs
1155                 tomark(stackptr);
1156                 gccurr_heaptop = 0; // record the size of all active objs in this core
1157                                   // aligned but does not consider block boundaries
1158                 gcmarkedptrbound = 0;
1159         }
1160 #ifdef DEBUG
1161         BAMBOO_DEBUGPRINT(0xed03);
1162 #endif
1163         int isize = 0;
1164         bool checkfield = true;
1165         bool sendStall = false;
1166         // mark phase
1167         while(MARKPHASE == gcphase) {
1168 #ifdef DEBUG
1169                 BAMBOO_DEBUGPRINT(0xed04);
1170 #endif
1171                 while(gc_moreItems2()) {
1172 #ifdef DEBUG
1173                         BAMBOO_DEBUGPRINT(0xed05);
1174 #endif
1175                         sendStall = false;
1176                         gcbusystatus = true;
1177                         checkfield = true;
1178                         void * ptr = gc_dequeue2();
1179 #ifdef DEBUG
1180                         BAMBOO_DEBUGPRINT_REG(ptr);
1181 #endif
1182                         int size = 0;
1183                         int isize = 0;
1184                         int type = 0;
1185                         // check if it is a shared obj
1186                         if(ISSHAREDOBJ(ptr)) {
1187                                 // a shared obj, check if it is a local obj on this core
1188                                 if(isLarge(ptr, &type, &size)) {
1189                                         // ptr is a large object
1190                                         if(((int *)ptr)[6] == 0) {
1191                                                 // not marked and not enqueued
1192 #ifdef GC_DEBUG
1193                                                 BAMBOO_DEBUGPRINT(0xecec);
1194                                                 BAMBOO_DEBUGPRINT_REG(ptr);
1195 #endif
1196                                                 BAMBOO_START_CRITICAL_SECTION();
1197                                                 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1198                                                 gcnumlobjs++;
1199                                                 BAMBOO_CLOSE_CRITICAL_SECTION();
1200                                                 // mark this obj
1201                                                 ((int *)ptr)[6] = 1;
1202                                         }
1203                                 } else {
1204                                         bool islocal = isLocal(ptr);
1205                                         if (islocal && (((int *)ptr)[6] == 0)) {
1206                                                 // ptr is an unmarked active object on this core
1207                                                 ALIGNSIZE(size, &isize);
1208                                                 gccurr_heaptop += isize;
1209 #ifdef DEBUG
1210                                                 BAMBOO_DEBUGPRINT(0xaaaa);
1211                                                 BAMBOO_DEBUGPRINT_REG(ptr);
1212                                                 BAMBOO_DEBUGPRINT_REG(isize);
1213 #endif
1214                                                 // mark this obj
1215                                                 ((int *)ptr)[6] = 1;
1216                                                 if(ptr + size > gcmarkedptrbound) {
1217                                                         gcmarkedptrbound = ptr + size;
1218                                                 } // if(ptr + size > gcmarkedptrbound)
1219                                         } else if ((!islocal) && (((int *)ptr)[6] == 0)) {
1220                                                 int host = hostcore(ptr);
1221 #ifdef DEBUG
1222                                                 BAMBOO_DEBUGPRINT(0xbbbb);
1223                                                 BAMBOO_DEBUGPRINT_REG(host);
1224                                                 BAMBOO_DEBUGPRINT_REG(ptr);
1225 #endif
1226                                                 // send a msg to host informing that ptr is active
1227                                                 send_msg_2(host, GCMARKEDOBJ, ptr);
1228                                                 gcself_numsendobjs++;
1229                                                 checkfield = false;
1230                                         }// if(isLocal(ptr)) else ...
1231                                 } // if(isLarge(ptr, &type, &size)) else ...
1232                         } // if(ISSHAREDOBJ(ptr))
1233 #ifdef DEBUG
1234                         BAMBOO_DEBUGPRINT(0xed06);
1235 #endif
1236
1237                         if(checkfield) {
1238                                 // scan all pointers in ptr
1239                                 unsigned INTPTR * pointer;
1240                                 pointer=pointerarray[type];
1241                                 if (pointer==0) {
1242                                         /* Array of primitives */
1243                                         /* Do nothing */
1244                                 } else if (((INTPTR)pointer)==1) {
1245                                         /* Array of pointers */
1246                                         struct ArrayObject *ao=(struct ArrayObject *) ptr;
1247                                         int length=ao->___length___;
1248                                         int j;
1249                                         for(j=0; j<length; j++) {
1250                                                 void *objptr = 
1251                                                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1252                                                 markObj(objptr);
1253                                         }
1254                                 } else {
1255                                         INTPTR size=pointer[0];
1256                                         int i;
1257                                         for(i=1; i<=size; i++) {
1258                                                 unsigned int offset=pointer[i];
1259                                                 void * objptr=*((void **)(((char *)ptr)+offset));
1260                                                 markObj(objptr);
1261                                         }
1262                                 } // if (pointer==0) else if ... else ...
1263                         } // if(checkfield)
1264                 } // while(!isEmpty(gctomark))
1265 #ifdef DEBUG
1266                 BAMBOO_DEBUGPRINT(0xed07);
1267 #endif
1268                 gcbusystatus = false;
1269                 // send mark finish msg to core coordinator
1270                 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1271 #ifdef DEBUG
1272                         BAMBOO_DEBUGPRINT(0xed08);
1273 #endif
1274                         gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1275                         gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
1276                         gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
1277                         gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1278                 } else {
1279                         if(!sendStall) {
1280 #ifdef DEBUG
1281                                 BAMBOO_DEBUGPRINT(0xed09);
1282 #endif
1283                                 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1284                                                                          gcself_numsendobjs, gcself_numreceiveobjs);
1285                                 sendStall = true;
1286                         }
1287                 }
1288 #ifdef DEBUG
1289                 BAMBOO_DEBUGPRINT(0xed0a);
1290 #endif
1291
1292                 if(BAMBOO_NUM_OF_CORE == 0) {
1293 #ifdef DEBUG
1294                         BAMBOO_DEBUGPRINT(0xed0b);
1295 #endif
1296                         return;
1297                 }
1298         } // while(MARKPHASE == gcphase)
1299 } // mark()
1300
1301 inline void compact2Heaptophelper(int coren,
1302                                               int* p,
1303                                                                                                                                         int* numblocks,
1304                                                                                                                                         int* remain) {
1305         int b;
1306         int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1307         if(STARTUPCORE == coren) {
1308                 gctomove = true;
1309                 gcmovestartaddr = *p;
1310                 gcdstcore = gctopcore;
1311                 gcblock2fill = *numblocks + 1;
1312         } else {
1313                 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1);
1314         }
1315 #ifdef DEBUG
1316         BAMBOO_DEBUGPRINT_REG(coren);
1317         BAMBOO_DEBUGPRINT_REG(gctopcore);
1318         BAMBOO_DEBUGPRINT_REG(*p);
1319         BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1320 #endif
1321         if(memneed < *remain) {
1322 #ifdef DEBUG
1323                 BAMBOO_DEBUGPRINT(0xd104);
1324 #endif
1325                 *p = *p + memneed;
1326                 gcrequiredmems[coren] = 0;
1327                 gcloads[gctopcore] += memneed;
1328                 *remain = *remain - memneed;
1329         } else {
1330 #ifdef DEBUG
1331                 BAMBOO_DEBUGPRINT(0xd105);
1332 #endif
1333                 // next available block
1334                 *p = *p + *remain;
1335                 gcfilledblocks[gctopcore] += 1;
1336                 int newbase = 0;
1337                 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1338                 gcloads[gctopcore] = newbase;
1339                 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1340                 gcstopblock[gctopcore]++;
1341                 gctopcore = nextTopcore(gctopcore, gcheapdirection);
1342                 *numblocks = gcstopblock[gctopcore];
1343                 *p = gcloads[gctopcore];
1344                 BLOCKINDEX(*p, &b);
1345                 *remain = (b<NUMCORES)?((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1346                                                                                                   :((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1347 #ifdef DEBUG
1348                 BAMBOO_DEBUGPRINT(0xd106);
1349                 BAMBOO_DEBUGPRINT_REG(gctopcore);
1350                 BAMBOO_DEBUGPRINT_REG(*p);
1351                 BAMBOO_DEBUGPRINT_REG(b);
1352                 BAMBOO_DEBUGPRINT_REG(*remain);
1353 #endif
1354         } // if(memneed < remain)
1355         gcmovepending--;
1356 } // void compact2Heaptophelper(int, int*, int*, int*)
1357
1358 inline void compact2Heaptop() {
1359         // no cores with spare mem and some cores are blocked with pending move
1360         // find the current heap top and make them move to the heap top
1361         int p;
1362         int numblocks = gcfilledblocks[gctopcore];
1363         //BASEPTR(gctopcore, numblocks, &p);
1364         p = gcloads[gctopcore];
1365         int b;
1366         BLOCKINDEX(p, &b);
1367         int remain = (b<NUMCORES)?((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1368                                        :((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1369         // check if the top core finishes
1370         if(gccorestatus[gctopcore] != 0) {
1371 #ifdef DEBUG
1372                 BAMBOO_DEBUGPRINT(0xd101);
1373                 BAMBOO_DEBUGPRINT_REG(gctopcore);
1374 #endif
1375                 // let the top core finishes its own work first
1376                 compact2Heaptophelper(gctopcore, &p, &numblocks, &remain);
1377                 return;
1378         }
1379
1380 #ifdef DEBUG
1381         BAMBOO_DEBUGPRINT(0xd102);
1382         BAMBOO_DEBUGPRINT_REG(gctopcore);
1383         BAMBOO_DEBUGPRINT_REG(p);
1384         BAMBOO_DEBUGPRINT_REG(b);
1385         BAMBOO_DEBUGPRINT_REG(remain);
1386 #endif
1387         /*if((gctopcore == STARTUPCORE) && (b == 0)) {
1388                 remain -= gcreservedsb*BAMBOO_SMEM_SIZE;
1389                 p += gcreservedsb*BAMBOO_SMEM_SIZE;
1390         }*/
1391         for(int i = 0; i < NUMCORES; i++) {
1392                 BAMBOO_START_CRITICAL_SECTION();
1393                 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1394 #ifdef DEBUG
1395                         BAMBOO_DEBUGPRINT(0xd103);
1396 #endif
1397                         compact2Heaptophelper(i, &p, &numblocks, &remain);
1398                         if(gccorestatus[gctopcore] != 0) {
1399 #ifdef DEBUG
1400                                 BAMBOO_DEBUGPRINT(0xd101);
1401                                 BAMBOO_DEBUGPRINT_REG(gctopcore);
1402 #endif
1403                                 // the top core is not free now
1404                                 return;
1405                         }
1406                 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1407                 BAMBOO_CLOSE_CRITICAL_SECTION();
1408         } // for(i = 0; i < NUMCORES; i++)
1409 #ifdef DEBUG
1410         BAMBOO_DEBUGPRINT(0xd106);
1411 #endif
1412 } // void compact2Heaptop()
1413
1414 inline int nextTopcore(int topcore, bool direction) {
1415         int nextopcore = topcore;
1416         if((NUMCORES == 62) && (nextopcore>5)) {
1417                 nextopcore += 2;
1418         }
1419         int x = nextopcore / bamboo_height;
1420         int y = nextopcore % bamboo_height;
1421         if((direction && (y%2 == 0)) || ((!direction) && (y%2))) {
1422                 // increase
1423                 if(x == 7) {
1424                         if(direction) {
1425                                 y++;
1426                         } else {
1427                                 y--;
1428                         }
1429                 } else {
1430                         x++;
1431                 }
1432         } else {
1433                 // decrease
1434                 if((x == 0) || ((x==1) &&(y==6))) {
1435                         if(direction) {
1436                                 y++;
1437                                 if(y==6) {
1438                                         x = 1;
1439                                 }
1440                         } else {
1441                                 y--;
1442                                 if(y==5) {
1443                                         x = 0;
1444                                 }
1445                         }
1446                 } else {
1447                         x--;
1448                 }
1449         }
1450         nextopcore = x*bamboo_height+y;
1451         if(NUMCORES==62) {
1452                 if(x>0) {
1453                         nextopcore -= 2;
1454                 }
1455         }
1456         return nextopcore;
1457 } // int nextTopcore(int topcore, bool direction)
1458
1459 inline void resolvePendingMoveRequest() {
1460 #ifdef DEBUG
1461         BAMBOO_DEBUGPRINT(0xeb01);
1462 #endif
1463 #ifdef DEBUG
1464                 BAMBOO_DEBUGPRINT(0xeeee);
1465                 for(int k = 0; k < NUMCORES; k++) {
1466                         BAMBOO_DEBUGPRINT(0xf000+k);
1467                         BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1468                         BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1469                         BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1470                         BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1471                 }
1472                 BAMBOO_DEBUGPRINT(0xffff);
1473 #endif
1474         int i;
1475         int j;
1476         bool nosparemem = true;
1477         bool haspending = false;
1478         bool hasrunning = false;
1479         bool noblock = false;
1480         int dstcore = 0; // the core who need spare mem
1481         int sourcecore = 0; // the core who has spare mem
1482         for(i = j = 0; (i < NUMCORES) && (j < NUMCORES);) {
1483                 if(nosparemem) {
1484                         // check if there are cores with spare mem
1485                         if(gccorestatus[i] == 0) {
1486                                 // finished working, check if it still have spare mem
1487                                 if(gcfilledblocks[i] < gcstopblock[i]) {
1488                                         // still have spare mem
1489                                         nosparemem = false;
1490                                         sourcecore = i;
1491                                 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1492                         }
1493                         i++;
1494                 } // if(nosparemem)
1495                 if(!haspending) {
1496                         if(gccorestatus[j] != 0) {
1497                                 // not finished, check if it has pending move requests
1498                                 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1499                                         dstcore = j;
1500                                         haspending = true;
1501                                 } else {
1502                                         hasrunning = true;
1503                                 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1504                         } // if(gccorestatus[i] == 0) else ...
1505                         j++;
1506                 } // if(!haspending)
1507                 if(!nosparemem && haspending) {
1508                         // find match
1509                         int tomove = 0;
1510                         int startaddr = 0;
1511                         BAMBOO_START_CRITICAL_SECTION();
1512                         gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore, 
1513                                                                                gcrequiredmems[dstcore], 
1514                                                                                                                                                                                            &tomove, 
1515                                                                                                                                                                                            &startaddr);
1516                         BAMBOO_CLOSE_CRITICAL_SECTION();
1517 #ifdef DEBUG
1518                         BAMBOO_DEBUGPRINT(0xeb02);
1519                         BAMBOO_DEBUGPRINT_REG(sourcecore);
1520                         BAMBOO_DEBUGPRINT_REG(dstcore);
1521                         BAMBOO_DEBUGPRINT_REG(startaddr);
1522                         BAMBOO_DEBUGPRINT_REG(tomove);
1523 #endif
1524                         if(STARTUPCORE == dstcore) {
1525 #ifdef DEBUG
1526                                 BAMBOO_DEBUGPRINT(0xeb03);
1527 #endif
1528                                 gcdstcore = sourcecore;
1529                                 gctomove = true;
1530                                 gcmovestartaddr = startaddr;
1531                                 gcblock2fill = tomove;
1532                         } else {
1533 #ifdef DEBUG
1534                                 BAMBOO_DEBUGPRINT(0xeb04);
1535 #endif
1536                                 send_msg_4(dstcore, GCMOVESTART, sourcecore, startaddr, tomove);
1537                         }
1538                         gcmovepending--;
1539                         nosparemem = true;
1540                         haspending = false;
1541                         noblock = true;
1542                 }
1543         } // for(i = 0; i < NUMCORES; i++)
1544 #ifdef DEBUG
1545         BAMBOO_DEBUGPRINT(0xcccc);
1546         BAMBOO_DEBUGPRINT_REG(hasrunning);
1547         BAMBOO_DEBUGPRINT_REG(haspending);
1548         BAMBOO_DEBUGPRINT_REG(noblock);
1549 #endif
1550
1551         if(!hasrunning && !noblock) {
1552                 gcphase = SUBTLECOMPACTPHASE;
1553                 compact2Heaptop();
1554         }
1555
1556 } // void resovePendingMoveRequest()
1557
1558 struct moveHelper {
1559         int numblocks; // block num for heap
1560         INTPTR base; // base virtual address of current heap block
1561         INTPTR ptr; // virtual address of current heap top
1562         int offset; // offset in current heap block
1563         int blockbase; // virtual address of current small block to check
1564         int blockbound; // bound virtual address of current small blcok
1565         int sblockindex; // index of the small blocks
1566         int top; // real size of current heap block to check
1567         int bound; // bound size of current heap block to check
1568 }; // struct moveHelper
1569
1570 inline void nextSBlock(struct moveHelper * orig) {
1571         orig->blockbase = orig->blockbound;
1572 #ifdef GC_DEBUG
1573         BAMBOO_DEBUGPRINT(0xecc0);
1574         BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1575         BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1576         BAMBOO_DEBUGPRINT_REG(orig->bound);
1577         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1578 #endif
1579         if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound) 
1580                         || ((*((int*)orig->ptr))==0) || ((*((int*)orig->blockbase))==0)) {
1581 innernextSBlock:
1582                 // end of current heap block, jump to next one
1583                 orig->numblocks++;
1584 #ifdef GC_DEBUG
1585                 BAMBOO_DEBUGPRINT(0xecc1);
1586                 BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1587 #endif
1588                 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1589 #ifdef DEBUG
1590                 BAMBOO_DEBUGPRINT(orig->base);
1591 #endif
1592                 orig->bound = orig->base + BAMBOO_SMEM_SIZE;
1593                 orig->blockbase = orig->base;
1594                 orig->sblockindex = (orig->blockbase-BAMBOO_BASE_VA)/BAMBOO_SMEM_SIZE;
1595                 if(gcsbstarttbl[orig->sblockindex] == -1) {
1596                         // goto next sblock
1597 #ifdef DEBUG
1598                         BAMBOO_DEBUGPRINT(0xecc2);
1599 #endif
1600                         orig->sblockindex += 1;
1601                         orig->blockbase += BAMBOO_SMEM_SIZE;
1602                         goto innernextSBlock;
1603                 } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1604                         // not start from the very beginning
1605                         orig->blockbase = gcsbstarttbl[orig->sblockindex];
1606                 }
1607         }
1608         orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1609         orig->offset = BAMBOO_CACHE_LINE_SIZE;
1610         orig->ptr = orig->blockbase + orig->offset;
1611 #ifdef GC_DEBUG
1612         BAMBOO_DEBUGPRINT(0xecc3);
1613         BAMBOO_DEBUGPRINT_REG(orig->base);
1614         BAMBOO_DEBUGPRINT_REG(orig->bound);
1615         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1616 #endif
1617         if(orig->ptr >= orig->bound) {
1618                 // met a lobj, move to next block
1619                 goto innernextSBlock;
1620         }
1621 } // void nextSBlock(struct moveHelper * orig) 
1622
1623 inline void initOrig_Dst(struct moveHelper * orig, 
1624                                      struct moveHelper * to) {
1625         // init the dst ptr
1626         to->numblocks = 0;
1627         to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1628         to->bound = BAMBOO_SMEM_SIZE_L;
1629         BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1630
1631 #ifdef DEBUG
1632         BAMBOO_DEBUGPRINT(0xef01);
1633         BAMBOO_DEBUGPRINT_REG(to->base);
1634 #endif
1635         /*if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1636                 to->base += gcreservedsb * BAMBOO_SMEM_SIZE;
1637                 to->top += gcreservedsb * BAMBOO_SMEM_SIZE;
1638         }*/
1639         to->ptr = to->base + to->offset;
1640
1641         // init the orig ptr
1642         orig->numblocks = 0;
1643         orig->base = to->base;
1644         orig->bound = to->base + BAMBOO_SMEM_SIZE_L;
1645         orig->blockbase = orig->base;
1646         /*if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1647                 orig->sblockindex = gcreservedsb;
1648         } else {*/
1649                 orig->sblockindex = (orig->base - BAMBOO_BASE_VA) / BAMBOO_SMEM_SIZE;
1650         //}
1651 #ifdef DEBUG
1652         BAMBOO_DEBUGPRINT(0xef02);
1653         BAMBOO_DEBUGPRINT_REG(orig->base);
1654         BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1655         BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1656         BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1657 #endif
1658
1659         if(gcsbstarttbl[orig->sblockindex] == -1) {
1660 #ifdef DEBUG
1661                 BAMBOO_DEBUGPRINT(0xef03);
1662 #endif
1663                 // goto next sblock
1664                 orig->blockbound = 
1665                         BAMBOO_BASE_VA+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1666                 nextSBlock(orig);
1667                 return;
1668         } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1669 #ifdef DEBUG
1670                 BAMBOO_DEBUGPRINT(0xef04);
1671 #endif
1672                 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1673         }
1674 #ifdef DEBUG
1675         BAMBOO_DEBUGPRINT(0xef05);
1676 #endif
1677         orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1678         orig->offset = BAMBOO_CACHE_LINE_SIZE;
1679         orig->ptr = orig->blockbase + orig->offset;
1680 #ifdef DEBUG
1681         BAMBOO_DEBUGPRINT(0xef06);
1682         BAMBOO_DEBUGPRINT_REG(orig->base);
1683 #endif
1684 } // void initOrig_Dst(struct moveHelper * orig, struct moveHelper * to) 
1685
1686 inline void nextBlock(struct moveHelper * to) {
1687         to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1688         to->bound += BAMBOO_SMEM_SIZE;
1689         to->numblocks++;
1690         BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1691         to->offset = BAMBOO_CACHE_LINE_SIZE;
1692         to->ptr = to->base + to->offset;
1693 } // void nextBlock(struct moveHelper * to)
1694
1695 // endaddr does not contain spaces for headers
1696 inline bool moveobj(struct moveHelper * orig, 
1697                                 struct moveHelper * to, 
1698                                                         int stopblock) {
1699         if(stopblock == 0) {
1700                 return true;
1701         }
1702
1703 #ifdef DEBUG
1704         BAMBOO_DEBUGPRINT(0xe201);
1705         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1706         BAMBOO_DEBUGPRINT_REG(to->ptr);
1707 #endif
1708
1709         int type = 0;
1710         int size = 0;
1711         int mark = 0;
1712         int isize = 0;
1713 innermoveobj:
1714         while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1715                 orig->ptr = (int*)(orig->ptr) + 1;
1716         }
1717         if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
1718                 nextSBlock(orig);
1719                 goto innermoveobj;
1720         }
1721 #ifdef DEBUG
1722         BAMBOO_DEBUGPRINT(0xe202);
1723 #endif
1724         // check the obj's type, size and mark flag
1725         type = ((int *)(orig->ptr))[0];
1726         size = 0;
1727         if(type == 0) {
1728                 // end of this block, go to next one
1729                 nextSBlock(orig);
1730                 goto innermoveobj;
1731         } else if(type < NUMCLASSES) {
1732                 // a normal object
1733                 size = classsize[type];
1734         } else {        
1735                 // an array 
1736                 struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
1737                 int elementsize=classsize[type];
1738                 int length=ao->___length___; 
1739                 size=sizeof(struct ArrayObject)+length*elementsize;
1740         }
1741         mark = ((int *)(orig->ptr))[6];
1742 #ifdef DEBUG
1743         BAMBOO_DEBUGPRINT(0xe203);
1744 #endif
1745         if(mark == 1) {
1746 #ifdef DEBUG
1747                 BAMBOO_DEBUGPRINT(0xe204);
1748 #endif
1749                 // marked obj, copy it to current heap top
1750                 // check to see if remaining space is enough
1751                 ALIGNSIZE(size, &isize);
1752                 if(to->top + isize > to->bound) {
1753                         // fill -1 indicating the end of this block
1754                         /*if(to->top != to->bound) {
1755                                 *((int*)to->ptr) = -1;
1756                         }*/
1757                         //memset(to->ptr+1,  -2, to->bound - to->top - 1);
1758                         // fill the header of this block and then go to next block
1759         to->offset += to->bound - to->top;
1760                         memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1761                         (*((int*)(to->base))) = to->offset;
1762                         nextBlock(to);
1763                         if(stopblock == to->numblocks) {
1764                                 // already fulfilled the block
1765                                 to->offset = 0;
1766                                 to->ptr = to->base;
1767                                 return true;
1768                         } // if(stopblock == to->numblocks)
1769                 } // if(to->top + isize > to->bound)
1770                 // set the mark field to 2, indicating that this obj has been moved and need to be flushed
1771                 ((int *)(orig->ptr))[6] = 2;
1772                 if(to->ptr != orig->ptr) {
1773                         memcpy(to->ptr, orig->ptr, size);
1774                         // fill the remaining space with -2
1775                         memset(to->ptr+size, -2, isize-size);
1776                 }
1777                 // store mapping info
1778                 BAMBOO_START_CRITICAL_SECTION();
1779                 RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr); 
1780                 BAMBOO_CLOSE_CRITICAL_SECTION();
1781 #ifdef DEBUG
1782                 BAMBOO_DEBUGPRINT(0xcdce);
1783                 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1784                 BAMBOO_DEBUGPRINT_REG(to->ptr);
1785 #endif
1786                 gccurr_heaptop -= isize;
1787                 to->ptr += isize;
1788                 to->offset += isize;
1789                 to->top += isize;
1790                 if(to->top == to->bound) {
1791                         // fill the header of this block and then go to next block
1792                         memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1793                         (*((int*)(to->base))) = to->offset;
1794                         nextBlock(to);
1795                 }
1796         } // if(mark == 1)
1797 #ifdef DEBUG
1798         BAMBOO_DEBUGPRINT(0xe205);
1799 #endif
1800         // move to next obj
1801         orig->ptr += isize;
1802 #ifdef DEBUG
1803         BAMBOO_DEBUGPRINT_REG(isize);
1804         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1805         BAMBOO_DEBUGPRINT_REG(orig->bound);
1806 #endif
1807         if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
1808 #ifdef DEBUG
1809         BAMBOO_DEBUGPRINT(0xe206);
1810 #endif
1811                 nextSBlock(orig);
1812         }
1813 #ifdef DEBUG
1814         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1815 #endif
1816         return false;
1817 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
1818
1819 // should be invoked with interrupt closed
1820 inline int assignSpareMem_I(int sourcecore,
1821                                         int * requiredmem,
1822                                                                                                           int * tomove,
1823                                                                                                           int * startaddr) {
1824         int b = 0;
1825         BLOCKINDEX(gcloads[sourcecore], &b);
1826         int boundptr = b<NUMCORES?(b+1)*BAMBOO_SMEM_SIZE_L
1827                 :BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES+1)*BAMBOO_SMEM_SIZE;
1828         int remain = boundptr - gcloads[sourcecore];
1829         int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
1830         *startaddr = gcloads[sourcecore];
1831         *tomove = gcfilledblocks[sourcecore] + 1;
1832         if(memneed < remain) {
1833                 gcloads[sourcecore] += memneed;
1834                 return 0;
1835         } else {
1836                 // next available block
1837                 gcfilledblocks[sourcecore] += 1;
1838                 int newbase = 0;
1839                 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
1840                 gcloads[sourcecore] = newbase;
1841                 return requiredmem-remain;
1842         }
1843 } // int assignSpareMem_I(int ,int * , int * , int * )
1844
1845 // should be invoked with interrupt closed
1846 inline bool gcfindSpareMem_I(int * startaddr,
1847                                          int * tomove,
1848                                                                                                    int * dstcore,
1849                                                                                                    int requiredmem,
1850                                                                                                    int requiredcore) {
1851         for(int k = 0; k < NUMCORES; k++) {
1852                 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
1853                         // check if this stopped core has enough mem
1854                         assignSpareMem_I(k, requiredmem, tomove, startaddr);
1855                         *dstcore = k;
1856                         return true;
1857                 }
1858         }
1859         // if can not find spare mem right now, hold the request
1860         gcrequiredmems[requiredcore] = requiredmem;
1861         gcmovepending++;
1862         return false;
1863 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
1864
1865 inline bool compacthelper(struct moveHelper * orig,
1866                                       struct moveHelper * to,
1867                                                                                                         int * filledblocks,
1868                                                                                                         int * heaptopptr,
1869                                                                                                         bool * localcompact) {
1870         // scan over all objs in this block, compact the marked objs 
1871         // loop stop when finishing either scanning all active objs or 
1872         // fulfilled the gcstopblock
1873 #ifdef DEBUG
1874         BAMBOO_DEBUGPRINT(0xe101);
1875         BAMBOO_DEBUGPRINT_REG(gcblock2fill);
1876 #endif
1877 innercompact:
1878         do {
1879                 bool stop = moveobj(orig, to, gcblock2fill);
1880                 if(stop) {
1881                         break;
1882                 }
1883         } while(orig->ptr < gcmarkedptrbound);
1884         // if no objs have been compact, do nothing, 
1885         // otherwise, fill the header of this block
1886         if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
1887                 memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1888                 (*((int*)(to->base))) = to->offset;
1889         } else {
1890                 to->offset = 0;
1891                 to->ptr = to->base;
1892                 to->top -= BAMBOO_CACHE_LINE_SIZE;
1893         } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
1894         if(*localcompact) {
1895                 *heaptopptr = to->ptr;
1896                 *filledblocks = to->numblocks;
1897         }
1898 #ifdef DEBUG
1899         BAMBOO_DEBUGPRINT(0xe102);
1900         BAMBOO_DEBUGPRINT_REG(orig->ptr);
1901         BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
1902         BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1903         BAMBOO_DEBUGPRINT_REG(*filledblocks);
1904         BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
1905 #endif
1906
1907         // send msgs to core coordinator indicating that the compact is finishing
1908         // send compact finish message to core coordinator
1909         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1910                 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
1911                 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
1912                 if(orig->ptr < gcmarkedptrbound) {
1913 #ifdef DEBUG
1914                         BAMBOO_DEBUGPRINT(0xe103);
1915 #endif
1916                         // ask for more mem
1917                         gctomove = false;
1918                         BAMBOO_START_CRITICAL_SECTION();
1919                         if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore, 
1920                                                               gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
1921 #ifdef DEBUG
1922                                 BAMBOO_DEBUGPRINT(0xe104);
1923 #endif
1924                                 gctomove = true;
1925                         } else {
1926                                 BAMBOO_CLOSE_CRITICAL_SECTION();
1927 #ifdef DEBUG
1928                                 BAMBOO_DEBUGPRINT(0xe105);
1929 #endif
1930                                 return false; 
1931                         }
1932                         BAMBOO_CLOSE_CRITICAL_SECTION();
1933                 } else {
1934 #ifdef DEBUG
1935                         BAMBOO_DEBUGPRINT(0xe106);
1936 #endif
1937                         gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1938                         gctomove = false;
1939                         return true;
1940                 }
1941         } else {
1942                 if(orig->ptr < gcmarkedptrbound) {
1943 #ifdef DEBUG
1944                         BAMBOO_DEBUGPRINT(0xe107);
1945 #endif
1946                         // ask for more mem
1947                         gctomove = false;
1948                         send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, 
1949                                                *filledblocks, *heaptopptr, gccurr_heaptop);
1950                 } else {
1951 #ifdef DEBUG
1952                         BAMBOO_DEBUGPRINT(0xe108);
1953                         BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1954 #endif
1955                         // finish compacting
1956                         send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
1957                                                *filledblocks, *heaptopptr, 0);
1958                 }
1959         } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
1960
1961         if(orig->ptr < gcmarkedptrbound) {
1962 #ifdef DEBUG
1963                 BAMBOO_DEBUGPRINT(0xe109);
1964 #endif
1965                 // still have unpacked obj
1966                 while(true) {
1967                         if(gctomove) {
1968                                 break;
1969                         }
1970                 };
1971                 gctomove = false;
1972 #ifdef DEBUG
1973                 BAMBOO_DEBUGPRINT(0xe10a);
1974 #endif
1975
1976                 to->ptr = gcmovestartaddr;
1977                 to->numblocks = gcblock2fill - 1;
1978                 to->bound = (to->numblocks==0)?
1979                         BAMBOO_SMEM_SIZE_L:
1980                         BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
1981                 BASEPTR(gcdstcore, to->numblocks, &(to->base));
1982                 to->offset = to->ptr - to->base;
1983                 to->top = (to->numblocks==0)?
1984                         (to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
1985                 to->base = to->ptr;
1986                 to->offset = BAMBOO_CACHE_LINE_SIZE;
1987                 to->ptr += to->offset; // for header
1988                 to->top += to->offset;
1989                 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
1990                         *localcompact = true;
1991                 } else {
1992                         *localcompact = false;
1993                 }
1994                 goto innercompact;
1995         }
1996 #ifdef DEBUG
1997         BAMBOO_DEBUGPRINT(0xe10b);
1998 #endif
1999         return true;
2000 } // void compacthelper()
2001
2002 inline void compact() {
2003         if(COMPACTPHASE != gcphase) {
2004                 BAMBOO_EXIT(0xb102);
2005         }
2006
2007         // initialize pointers for comapcting
2008         struct moveHelper * orig = 
2009                 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2010         struct moveHelper * to = 
2011                 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2012
2013         initOrig_Dst(orig, to);
2014         
2015         int filledblocks = 0;
2016         INTPTR heaptopptr = 0;
2017         bool localcompact = true;
2018         compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2019         
2020         RUNFREE(orig);
2021         RUNFREE(to);
2022 } // compact()
2023
2024 inline void * flushObj(void * objptr) {
2025 #ifdef DEBUG
2026         BAMBOO_DEBUGPRINT(0xe401);
2027 #endif
2028         void * dstptr = NULL;
2029         if(ISSHAREDOBJ(objptr)) {
2030 #ifdef DEBUG
2031                 BAMBOO_DEBUGPRINT(0xe402);
2032                 BAMBOO_DEBUGPRINT_REG(objptr);
2033 #endif
2034                 // a shared obj ptr, change to new address
2035                 BAMBOO_START_CRITICAL_SECTION();
2036                 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2037                 BAMBOO_CLOSE_CRITICAL_SECTION();
2038 #ifdef DEBUG
2039                 BAMBOO_DEBUGPRINT_REG(dstptr);
2040 #endif
2041                 if(NULL == dstptr) {
2042 #ifdef DEBUG
2043                         BAMBOO_DEBUGPRINT(0xe403);
2044                         BAMBOO_DEBUGPRINT_REG(objptr);
2045                         BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2046 #endif
2047                         // send msg to host core for the mapping info
2048                         gcobj2map = (int)objptr;
2049                         gcismapped = false;
2050                         gcmappedobj = NULL;
2051                         send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
2052                                                                  BAMBOO_NUM_OF_CORE);
2053                         while(true) {
2054                                 if(gcismapped) {
2055                                         break;
2056                                 }
2057                         }
2058                         BAMBOO_START_CRITICAL_SECTION();
2059                         RuntimeHashget(gcpointertbl, objptr, &dstptr);
2060                         BAMBOO_CLOSE_CRITICAL_SECTION();
2061 #ifdef DEBUG
2062                         BAMBOO_DEBUGPRINT_REG(dstptr);
2063 #endif
2064                 }
2065         } // if(ISSHAREDOBJ(objptr))
2066 #ifdef DEBUG
2067         BAMBOO_DEBUGPRINT(0xe404);
2068 #endif
2069         return dstptr;
2070 } // void flushObj(void * objptr, void ** tochange)
2071
2072 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2073         int i,j;
2074         // flush current stack 
2075         while(stackptr!=NULL) {
2076                 for(i=0; i<stackptr->size; i++) {
2077                         if(stackptr->array[i] != NULL) {
2078                                 stackptr->array[i] = flushObj(stackptr->array[i]);
2079                         }
2080                 }
2081                 stackptr=stackptr->next;
2082         }
2083
2084         // flush objectsets
2085         for(i=0; i<NUMCLASSES; i++) {
2086                 struct parameterwrapper ** queues = 
2087                         objectqueues[BAMBOO_NUM_OF_CORE][i];
2088                 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2089                 for(j = 0; j < length; ++j) {
2090                         struct parameterwrapper * parameter = queues[j];
2091                         struct ObjectHash * set=parameter->objectset;
2092                         struct ObjectNode * ptr=set->listhead;
2093                         while(ptr!=NULL) {
2094                                 ptr->key = flushObj((void *)ptr->key);
2095                                 ptr=ptr->lnext;
2096                         }
2097                 }
2098         }
2099
2100         // flush current task descriptor
2101         if(currtpd != NULL) {
2102                 for(i=0; i<currtpd->numParameters; i++) {
2103                         currtpd->parameterArray[i] = flushObj(currtpd->parameterArray[i]);
2104                 }
2105         }
2106
2107         // flush active tasks
2108         struct genpointerlist * ptr=activetasks->list;
2109         while(ptr!=NULL) {
2110                 struct taskparamdescriptor *tpd=ptr->src;
2111                 int i;
2112                 for(i=0; i<tpd->numParameters; i++) {
2113                         tpd->parameterArray[i] = flushObj(tpd->parameterArray[i]);
2114                 }
2115                 ptr=ptr->inext;
2116         }
2117
2118         // flush cached transferred obj
2119         struct QueueItem * tmpobjptr =  getHead(&objqueue);
2120         while(tmpobjptr != NULL) {
2121                 struct transObjInfo * objInfo = 
2122                         (struct transObjInfo *)(tmpobjptr->objectptr); 
2123                 objInfo->objptr = flushObj(objInfo->objptr);
2124                 tmpobjptr = getNextQueueItem(tmpobjptr);
2125         }
2126
2127         // flush cached objs to be transferred
2128         struct QueueItem * item = getHead(totransobjqueue);
2129         while(item != NULL) {
2130                 struct transObjInfo * totransobj = 
2131                         (struct transObjInfo *)(item->objectptr);
2132                 totransobj->objptr = flushObj(totransobj->objptr);
2133                 item = getNextQueueItem(item);
2134         } // while(item != NULL)
2135 } // void flushRuntimeObj(struct garbagelist * stackptr)
2136
2137 inline void flush(struct garbagelist * stackptr) {
2138         flushRuntimeObj(stackptr);
2139         
2140         while(gc_moreItems()) {
2141 #ifdef DEBUG
2142                 BAMBOO_DEBUGPRINT(0xe301);
2143 #endif
2144                 void * ptr = gc_dequeue();
2145                 void * tptr = flushObj(ptr);
2146 #ifdef DEBUG
2147                 BAMBOO_DEBUGPRINT(0xe302);
2148                 BAMBOO_DEBUGPRINT_REG(ptr);
2149                 BAMBOO_DEBUGPRINT_REG(tptr);
2150 #endif
2151                 if(tptr != NULL) {
2152                         ptr = tptr;
2153                 }
2154                 if(((int *)(ptr))[6] == 2) {
2155                         int type = ((int *)(ptr))[0];
2156                         // scan all pointers in ptr
2157                         unsigned INTPTR * pointer;
2158                         pointer=pointerarray[type];
2159 #ifdef DEBUG
2160                         BAMBOO_DEBUGPRINT(0xe303);
2161                         BAMBOO_DEBUGPRINT_REG(pointer);
2162 #endif
2163                         if (pointer==0) {
2164                                 /* Array of primitives */
2165                                 /* Do nothing */
2166                         } else if (((INTPTR)pointer)==1) {
2167 #ifdef DEBUG
2168                                 BAMBOO_DEBUGPRINT(0xe304);
2169 #endif
2170                                 /* Array of pointers */
2171                                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2172                                 int length=ao->___length___;
2173                                 int j;
2174                                 for(j=0; j<length; j++) {
2175 #ifdef DEBUG
2176                                         BAMBOO_DEBUGPRINT(0xe305);
2177 #endif
2178                                         void *objptr=
2179                                                 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2180 #ifdef DEBUG
2181                                         BAMBOO_DEBUGPRINT_REG(objptr);
2182 #endif
2183                                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = 
2184                                                 flushObj(objptr);
2185                                 }
2186                         } else {
2187 #ifdef DEBUG
2188                                 BAMBOO_DEBUGPRINT(0xe306);
2189 #endif
2190                                 INTPTR size=pointer[0];
2191                                 int i;
2192                                 for(i=1; i<=size; i++) {
2193 #ifdef DEBUG
2194                                         BAMBOO_DEBUGPRINT(0xe307);
2195 #endif
2196                                         unsigned int offset=pointer[i];
2197                                         void * objptr=*((void **)(((char *)ptr)+offset));
2198 #ifdef DEBUG
2199                                         BAMBOO_DEBUGPRINT_REG(objptr);
2200 #endif
2201                                         *((void **)(((char *)ptr)+offset)) = flushObj(objptr);
2202                                 } // for(i=1; i<=size; i++) 
2203                         } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2204                         // restore the mark field, indicating that this obj has been flushed
2205                         ((int *)(ptr))[6] = 0;
2206                 } // if(((int *)(ptr))[6] == 2)
2207         } // while(moi != NULL)
2208 #ifdef DEBUG
2209         BAMBOO_DEBUGPRINT(0xe308);
2210 #endif
2211         // send flush finish message to core coordinator
2212         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2213                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2214         } else {
2215                 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE);
2216         }
2217 #ifdef DEBUG
2218         BAMBOO_DEBUGPRINT(0xe309);
2219 #endif
2220 } // flush()
2221
2222 inline void gc_collect(struct garbagelist * stackptr) {
2223         // core collector routine
2224         while(true) {
2225                 //BAMBOO_START_CRITICAL_SECTION();
2226                 if(INITPHASE == gcphase) {
2227                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2228                         break;
2229                 }
2230                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2231         }
2232 #ifdef GC_DEBUG
2233         tprintf("Do initGC\n");
2234 #endif
2235         initGC();
2236         //send init finish msg to core coordinator
2237         send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE);
2238         while(true) {
2239                 //BAMBOO_START_CRITICAL_SECTION();
2240                 if(MARKPHASE == gcphase) {
2241                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2242                         break;
2243                 }
2244                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2245         }
2246 #ifdef GC_DEBUG
2247         tprintf("Start mark phase\n");
2248 #endif
2249         mark(true, stackptr);
2250 #ifdef GC_DEBUG
2251         tprintf("Finish mark phase, start compact phase\n");
2252 #endif
2253         compact();
2254 #ifdef GC_DEBUG
2255         tprintf("Finish compact phase\n");
2256 #endif
2257         while(true) {
2258                 //BAMBOO_START_CRITICAL_SECTION();
2259                 if(FLUSHPHASE == gcphase) {
2260                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2261                         break;
2262                 }
2263                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2264         }
2265 #ifdef GC_DEBUG
2266         tprintf("Start flush phase\n");
2267 #endif
2268         flush(stackptr);
2269 #ifdef GC_DEBUG
2270         tprintf("Finish flush phase\n");
2271 #endif
2272
2273         while(true) {
2274                 //BAMBOO_START_CRITICAL_SECTION();
2275                 if(FINISHPHASE == gcphase) {
2276                         //BAMBOO_CLOSE_CRITICAL_SECTION();
2277                         break;
2278                 }
2279                 //BAMBOO_CLOSE_CRITICAL_SECTION();
2280         }
2281 #ifdef GC_DEBUG
2282         tprintf("Finish gc!\n");
2283 #endif
2284 } // void gc_collect(struct garbagelist * stackptr)
2285
2286 inline void gc(struct garbagelist * stackptr) {
2287         // check if do gc
2288         if(!gcflag) {
2289                 gcprocessing = false;
2290                 return;
2291         }
2292
2293         // core coordinator routine
2294         if(0 == BAMBOO_NUM_OF_CORE) {
2295 #ifdef GC_DEBUG
2296         tprintf("Check if can do gc or not\n");
2297 #endif
2298                 if(!preGC()) {
2299                         // not ready to do gc
2300                         gcflag = true;
2301                         return;
2302                 }
2303
2304 #ifdef GC_DEBUG
2305                 tprintf("start gc! \n");
2306                 dumpSMem();
2307 #endif
2308
2309                 gcprocessing = true;
2310                 int i = 0;
2311                 waitconfirm = false;
2312                 waitconfirm = 0;
2313                 gcphase = INITPHASE;
2314                 for(i = 1; i < NUMCORES; i++) {
2315                         // send GC init messages to all cores
2316                         send_msg_1(i, GCSTARTINIT);
2317                 }
2318                 bool isfirst = true;
2319                 bool allStall = false;
2320
2321                 initGC();
2322 #ifdef GC_DEBUG
2323                 tprintf("Check core status \n");
2324 #endif
2325
2326                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2327                 while(true) {
2328                         BAMBOO_START_CRITICAL_SECTION();
2329                         if(gc_checkCoreStatus()) {
2330                                 BAMBOO_CLOSE_CRITICAL_SECTION();
2331                                 break;
2332                         }
2333                         BAMBOO_CLOSE_CRITICAL_SECTION();
2334                 }
2335 #ifdef GC_DEBUG
2336                 tprintf("Start mark phase \n");
2337 #endif
2338                 // all cores have finished compacting
2339                 // restore the gcstatus of all cores
2340                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
2341                 for(i = 1; i < NUMCORES; ++i) {
2342                         gccorestatus[i] = 1;
2343                         // send GC start messages to all cores
2344                         send_msg_1(i, GCSTART);
2345                 }
2346
2347                 gcphase = MARKPHASE;
2348                 // mark phase
2349                 while(MARKPHASE == gcphase) {
2350                         mark(isfirst, stackptr);
2351                         if(isfirst) {
2352                                 isfirst = false;
2353                         }
2354
2355                         // check gcstatus
2356                         checkMarkStatue(); 
2357                 }  // while(MARKPHASE == gcphase)
2358                 // send msgs to all cores requiring large objs info
2359                 numconfirm = NUMCORES - 1;
2360                 for(i = 1; i < NUMCORES; ++i) {
2361                         send_msg_1(i, GCLOBJREQUEST);
2362                 }
2363                 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
2364                 while(true) {
2365                         if(numconfirm==0) {
2366                                 break;
2367                         }
2368                 } // wait for responses
2369 #ifdef GC_DEBUG
2370                 tprintf("prepare to cache large objs \n");
2371                 //dumpSMem();
2372 #endif
2373                 // cache all large objs
2374                 if(!cacheLObjs()) {
2375                         // no enough space to cache large objs
2376                         BAMBOO_EXIT(0xb103);
2377                 }
2378                 // predict number of blocks to fill for each core
2379                 int numpbc = loadbalance();
2380                 // TODO
2381                 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
2382 #ifdef GC_DEBUG
2383                 tprintf("mark phase finished \n");
2384                 //dumpSMem();
2385 #endif
2386                 int tmpn = 0;
2387                 int tmptopptr = 0;
2388                 if(!gcheapdirection) {
2389                         tmpn = 1;
2390                 }
2391                 BASEPTR(gctopcore, tmpn, &tmptopptr);
2392                 // TODO
2393                 tmptopptr = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
2394 #ifdef DEBUG
2395                 BAMBOO_DEBUGPRINT(0xabab);
2396                 BAMBOO_DEBUGPRINT_REG(tmptopptr);
2397 #endif
2398                 for(i = 0; i < NUMCORES; ++i) {
2399                         int tmpcoreptr = 0;
2400                         BASEPTR(i, tmpn, &tmpcoreptr);
2401                         //send start compact messages to all cores
2402                         if (tmpcoreptr < tmptopptr) {
2403                                 gcstopblock[i] =numpbc + 1;
2404                                 if(i != STARTUPCORE) {
2405                                         send_msg_2(i, GCSTARTCOMPACT, numpbc+1); 
2406                                 } else {
2407                                         gcblock2fill = numpbc+1;
2408                                 } // if(i != STARTUPCORE)
2409                         } else {
2410                                 gcstopblock[i] = numpbc;
2411                                 if(i != STARTUPCORE) {
2412                                         send_msg_2(i, GCSTARTCOMPACT, numpbc);
2413                                 } else {
2414                                         gcblock2fill = numpbc;
2415                                 } // if(i != STARTUPCORE)
2416                         }
2417 #ifdef DEBUG
2418                         BAMBOO_DEBUGPRINT(0xf000+i);
2419                         BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
2420                         BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
2421 #endif
2422                         // init some data strutures for compact phase
2423                         gcloads[i] = 0;
2424                         gcfilledblocks[i] = 0;
2425                         gcrequiredmems[i] = 0;
2426                 }
2427
2428                 // compact phase
2429                 bool finalcompact = false;
2430                 // initialize pointers for comapcting
2431                 struct moveHelper * orig = 
2432                         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2433                 struct moveHelper * to = 
2434                         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2435                 initOrig_Dst(orig, to);
2436                 int filledblocks = 0;
2437                 INTPTR heaptopptr = 0;
2438                 bool finishcompact = false;
2439                 bool iscontinue = true;
2440                 bool localcompact = true;
2441                 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
2442                         if((!finishcompact) && iscontinue) {
2443 #ifdef DEBUG
2444                                 BAMBOO_DEBUGPRINT(0xe001);
2445                                 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2446 #endif
2447                                 finishcompact = compacthelper(orig, to, &filledblocks, 
2448                                                                           &heaptopptr, &localcompact);
2449 #ifdef DEBUG
2450                                 BAMBOO_DEBUGPRINT(0xe002);
2451                                 BAMBOO_DEBUGPRINT_REG(finishcompact);
2452                                 BAMBOO_DEBUGPRINT_REG(gctomove);
2453                                 BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
2454                                 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
2455                                 BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
2456 #endif
2457                         }
2458
2459                         if(gc_checkCoreStatus()) {
2460                                 // all cores have finished compacting
2461                                 // restore the gcstatus of all cores
2462                                 for(i = 0; i < NUMCORES; ++i) {
2463                                         gccorestatus[i] = 1;
2464                                 }
2465                                 break;
2466                         } else {
2467                                 // check if there are spare mem for pending move requires
2468                                 if(COMPACTPHASE == gcphase) {
2469 #ifdef DEBUG
2470                                         BAMBOO_DEBUGPRINT(0xe003);
2471 #endif
2472                                         resolvePendingMoveRequest();
2473 #ifdef DEBUG
2474                                         BAMBOO_DEBUGPRINT_REG(gctomove);
2475 #endif
2476                                 } else {
2477 #ifdef DEBUG
2478                                         BAMBOO_DEBUGPRINT(0xe004);
2479 #endif
2480                                         compact2Heaptop();
2481                                 }
2482                         } // if(gc_checkCoreStatus()) else ...
2483
2484                         if(gctomove) {
2485 #ifdef DEBUG
2486                                 BAMBOO_DEBUGPRINT(0xe005);
2487                                 BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
2488                                 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2489                                 BAMBOO_DEBUGPRINT_REG(gctomove);
2490 #endif
2491                                 to->ptr = gcmovestartaddr;
2492                                 to->numblocks = gcblock2fill - 1;
2493                                 to->bound = (to->numblocks==0)?
2494                                         BAMBOO_SMEM_SIZE_L:
2495                                         BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2496                                 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2497                                 to->offset = to->ptr - to->base;
2498                                 to->top = (to->numblocks==0)?
2499                                         (to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
2500                                 to->base = to->ptr;
2501                                 to->offset = BAMBOO_CACHE_LINE_SIZE;
2502                                 to->ptr += to->offset; // for header
2503                                 to->top += to->offset;
2504                                 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2505                                         localcompact = true;
2506                                 } else {
2507                                         localcompact = false;
2508                                 }
2509                                 gctomove = false;
2510                                 iscontinue = true;
2511                         } else if(!finishcompact) {
2512                                 // still pending
2513                                 iscontinue = false;
2514                         } // if(gctomove)
2515
2516                 } // while(COMPACTPHASE == gcphase) 
2517 #ifdef GC_DEBUG
2518                 tprintf("prepare to move large objs \n");
2519                 //dumpSMem();
2520 #endif
2521                 // move largeObjs
2522                 moveLObjs();
2523 #ifdef GC_DEBUG
2524                 tprintf("compact phase finished \n");
2525                 //dumpSMem();
2526 #endif
2527
2528                 gcphase = FLUSHPHASE;
2529                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
2530                 for(i = 1; i < NUMCORES; ++i) {
2531                         // send start flush messages to all cores
2532                         gccorestatus[i] = 1;
2533                         send_msg_1(i, GCSTARTFLUSH);
2534                 }
2535
2536 #ifdef GC_DEBUG
2537                 tprintf("Start flush phase \n");
2538 #endif
2539                 // flush phase
2540                 flush(stackptr);
2541                 // TODO
2542                 BAMBOO_DEBUGPRINT(0xefef);
2543                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2544                 while(FLUSHPHASE == gcphase) {
2545                         // check the status of all cores
2546                         if(gc_checkCoreStatus()) {
2547                                 break;
2548                         }
2549                 } // while(FLUSHPHASE == gcphase)
2550                 gcphase = FINISHPHASE;
2551
2552 /*
2553                 // need to create free memory list  
2554                 updateFreeMemList();
2555 #ifdef GC_DEBUG
2556                 tprintf("flush phase finished \n");
2557                 //dumpSMem();
2558 #endif
2559 */
2560
2561                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
2562                 for(i = 1; i < NUMCORES; ++i) {
2563                         // send gc finish messages to all cores
2564                         send_msg_1(i, GCFINISH);
2565                         gccorestatus[i] = 1;
2566                 }
2567 #ifdef GC_DEBUG
2568                 tprintf("gc finished \n");
2569                 dumpSMem();
2570 #endif
2571         } else {
2572                 gcprocessing = true;
2573                 gc_collect(stackptr);
2574         }
2575
2576         // invalidate all shared mem pointers
2577         bamboo_cur_msp = NULL;
2578         bamboo_smem_size = 0;
2579
2580         gcflag = false;
2581         gcprocessing = false;
2582
2583 } // void gc(struct garbagelist * stackptr)
2584
2585 #endif