3 #include "multicoregarbage.h"
4 #include "multicoreruntime.h"
5 #include "runtime_arch.h"
6 #include "SimpleHash.h"
7 #include "GenericHashtable.h"
8 #include "ObjectHash.h"
9 #include "GCSharedHash.h"
12 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
13 extern int numqueues[][NUMCLASSES];
15 extern struct genhashtable * activetasks;
16 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
17 extern struct taskparamdescriptor *currtpd;
19 extern struct LockValue runtime_locks[MAXTASKPARAMS];
20 extern int runtime_locklen;
23 extern unsigned int gcmem_mixed_threshold;
24 extern unsigned int gcmem_mixed_usedmem;
29 struct pointerblock *next;
32 struct pointerblock *gchead=NULL;
34 struct pointerblock *gctail=NULL;
36 struct pointerblock *gctail2=NULL;
38 struct pointerblock *gcspare=NULL;
40 #define NUMLOBJPTRS 20
42 struct lobjpointerblock {
43 void * lobjs[NUMLOBJPTRS];
44 int lengths[NUMLOBJPTRS];
45 int hosts[NUMLOBJPTRS];
46 struct lobjpointerblock *next;
47 struct lobjpointerblock *prev;
50 struct lobjpointerblock *gclobjhead=NULL;
51 int gclobjheadindex=0;
52 struct lobjpointerblock *gclobjtail=NULL;
53 int gclobjtailindex=0;
54 struct lobjpointerblock *gclobjtail2=NULL;
55 int gclobjtailindex2=0;
56 struct lobjpointerblock *gclobjspare=NULL;
59 typedef struct gc_cache_revise_info {
60 int orig_page_start_va;
66 unsigned int revised_sampling[NUMCORESACTIVE];
67 } gc_cache_revise_info_t;
68 gc_cache_revise_info_t gc_cache_revise_infomation;
69 #endif// GC_CACHE_ADAPT
72 // dump whole mem in blocks
73 inline void dumpSMem() {
81 printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
83 // reserved blocks for sblocktbl
84 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
86 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
87 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
88 udn_tile_coord_x(), udn_tile_coord_y(),
89 *((int *)(i)), *((int *)(i + 4)),
90 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
91 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
92 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
93 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
94 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
95 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
96 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
98 sblock = gcreservedsb;
99 bool advanceblock = false;
101 for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
102 advanceblock = false;
103 // computing sblock # and block #, core coordinate (x,y) also
104 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
106 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
107 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
119 coren = gc_block2core[block%(NUMCORES4GC*2)];
121 // compute core coordinate
122 BAMBOO_COORDS(coren, &x, &y);
123 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
124 udn_tile_coord_x(), udn_tile_coord_y(),
125 block, sblock++, x, y,
126 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
129 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
130 udn_tile_coord_x(), udn_tile_coord_y(),
131 *((int *)(i)), *((int *)(i + 4)),
132 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
133 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
134 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
135 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
136 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
137 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
138 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
140 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
144 // should be invoked with interruption closed
145 inline void gc_enqueue_I(void *ptr) {
147 BAMBOO_DEBUGPRINT(0xe601);
148 BAMBOO_DEBUGPRINT_REG(ptr);
150 if (gcheadindex==NUMPTRS) {
151 struct pointerblock * tmp;
156 tmp=RUNMALLOC_I(sizeof(struct pointerblock));
157 } // if (gcspare!=NULL)
161 } // if (gcheadindex==NUMPTRS)
162 gchead->ptrs[gcheadindex++]=ptr;
164 BAMBOO_DEBUGPRINT(0xe602);
166 } // void gc_enqueue_I(void *ptr)
168 // dequeue and destroy the queue
169 inline void * gc_dequeue_I() {
170 if (gctailindex==NUMPTRS) {
171 struct pointerblock *tmp=gctail;
178 } // if (gcspare!=NULL)
179 } // if (gctailindex==NUMPTRS)
180 return gctail->ptrs[gctailindex++];
181 } // void * gc_dequeue()
183 // dequeue and do not destroy the queue
184 inline void * gc_dequeue2_I() {
185 if (gctailindex2==NUMPTRS) {
186 struct pointerblock *tmp=gctail2;
187 gctail2=gctail2->next;
189 } // if (gctailindex2==NUMPTRS)
190 return gctail2->ptrs[gctailindex2++];
191 } // void * gc_dequeue2()
193 inline int gc_moreItems_I() {
194 if ((gchead==gctail)&&(gctailindex==gcheadindex))
197 } // int gc_moreItems()
199 inline int gc_moreItems2_I() {
200 if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
203 } // int gc_moreItems2()
205 // should be invoked with interruption closed
206 // enqueue a large obj: start addr & length
207 inline void gc_lobjenqueue_I(void *ptr,
211 BAMBOO_DEBUGPRINT(0xe901);
213 if (gclobjheadindex==NUMLOBJPTRS) {
214 struct lobjpointerblock * tmp;
215 if (gclobjspare!=NULL) {
219 tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
220 } // if (gclobjspare!=NULL)
221 gclobjhead->next=tmp;
222 tmp->prev = gclobjhead;
225 } // if (gclobjheadindex==NUMLOBJPTRS)
226 gclobjhead->lobjs[gclobjheadindex]=ptr;
227 gclobjhead->lengths[gclobjheadindex]=length;
228 gclobjhead->hosts[gclobjheadindex++]=host;
230 BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
231 BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
232 BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
234 } // void gc_lobjenqueue_I(void *ptr...)
236 // dequeue and destroy the queue
237 inline void * gc_lobjdequeue_I(int * length,
239 if (gclobjtailindex==NUMLOBJPTRS) {
240 struct lobjpointerblock *tmp=gclobjtail;
241 gclobjtail=gclobjtail->next;
243 gclobjtail->prev = NULL;
244 if (gclobjspare!=NULL) {
250 } // if (gclobjspare!=NULL)
251 } // if (gclobjtailindex==NUMLOBJPTRS)
253 *length = gclobjtail->lengths[gclobjtailindex];
256 *host = (int)(gclobjtail->hosts[gclobjtailindex]);
258 return gclobjtail->lobjs[gclobjtailindex++];
259 } // void * gc_lobjdequeue()
261 inline int gc_lobjmoreItems_I() {
262 if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
265 } // int gc_lobjmoreItems()
267 // dequeue and don't destroy the queue
268 inline void gc_lobjdequeue2_I() {
269 if (gclobjtailindex2==NUMLOBJPTRS) {
270 gclobjtail2=gclobjtail2->next;
274 } // if (gclobjtailindex2==NUMLOBJPTRS)
275 } // void * gc_lobjdequeue2()
277 inline int gc_lobjmoreItems2_I() {
278 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
281 } // int gc_lobjmoreItems2()
283 // 'reversly' dequeue and don't destroy the queue
284 inline void gc_lobjdequeue3_I() {
285 if (gclobjtailindex2==0) {
286 gclobjtail2=gclobjtail2->prev;
287 gclobjtailindex2=NUMLOBJPTRS-1;
290 } // if (gclobjtailindex2==NUMLOBJPTRS)
291 } // void * gc_lobjdequeue3()
293 inline int gc_lobjmoreItems3_I() {
294 if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
297 } // int gc_lobjmoreItems3()
299 inline void gc_lobjqueueinit4_I() {
300 gclobjtail2 = gclobjtail;
301 gclobjtailindex2 = gclobjtailindex;
302 } // void gc_lobjqueueinit2()
304 inline void * gc_lobjdequeue4_I(int * length,
306 if (gclobjtailindex2==NUMLOBJPTRS) {
307 gclobjtail2=gclobjtail2->next;
309 } // if (gclobjtailindex==NUMLOBJPTRS)
311 *length = gclobjtail2->lengths[gclobjtailindex2];
314 *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
316 return gclobjtail2->lobjs[gclobjtailindex2++];
317 } // void * gc_lobjdequeue()
319 inline int gc_lobjmoreItems4_I() {
320 if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
323 } // int gc_lobjmoreItems(
325 INTPTR gccurr_heapbound = 0;
327 inline void gettype_size(void * ptr,
330 int type = ((int *)ptr)[0];
332 if(type < NUMCLASSES) {
334 size = classsize[type];
337 struct ArrayObject *ao=(struct ArrayObject *)ptr;
338 int elementsize=classsize[type];
339 int length=ao->___length___;
340 size=sizeof(struct ArrayObject)+length*elementsize;
341 } // if(type < NUMCLASSES)
346 inline bool isLarge(void * ptr,
350 BAMBOO_DEBUGPRINT(0xe701);
351 BAMBOO_DEBUGPRINT_REG(ptr);
353 // check if a pointer is referring to a large object
354 gettype_size(ptr, ttype, tsize);
356 BAMBOO_DEBUGPRINT(*tsize);
358 int bound = (BAMBOO_SMEM_SIZE);
359 if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
360 bound = (BAMBOO_SMEM_SIZE_L);
362 if((((int)ptr-gcbaseva)%(bound))==0) {
363 // ptr is a start of a block
365 BAMBOO_DEBUGPRINT(0xe702);
366 BAMBOO_DEBUGPRINT(1);
370 if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
371 // it acrosses the boundary of current block
373 BAMBOO_DEBUGPRINT(0xe703);
374 BAMBOO_DEBUGPRINT(1);
379 BAMBOO_DEBUGPRINT(0);
382 } // bool isLarge(void * ptr, int * ttype, int * tsize)
384 inline int hostcore(void * ptr) {
385 // check the host core of ptr
387 RESIDECORE(ptr, &host);
389 BAMBOO_DEBUGPRINT(0xedd0);
390 BAMBOO_DEBUGPRINT_REG(ptr);
391 BAMBOO_DEBUGPRINT_REG(host);
394 } // int hostcore(void * ptr)
396 inline void cpu2coords(int coren,
399 *x = bamboo_cpu2coords[2*coren];
400 *y = bamboo_cpu2coords[2*coren+1];
401 } // void cpu2coords(...)
403 inline bool isLocal(void * ptr) {
404 // check if a pointer is in shared heap on this core
405 return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
406 } // bool isLocal(void * ptr)
408 inline bool gc_checkCoreStatus_I() {
409 bool allStall = true;
410 for(int i = 0; i < NUMCORES4GC; ++i) {
411 if(gccorestatus[i] != 0) {
414 } // if(gccorestatus[i] != 0)
415 } // for(i = 0; i < NUMCORES4GC; ++i)
419 inline bool gc_checkAllCoreStatus_I() {
420 bool allStall = true;
421 for(int i = 0; i < NUMCORESACTIVE; ++i) {
422 if(gccorestatus[i] != 0) {
425 } // if(gccorestatus[i] != 0)
426 } // for(i = 0; i < NUMCORESACTIVE; ++i)
430 inline void checkMarkStatue() {
432 BAMBOO_DEBUGPRINT(0xee01);
436 (waitconfirm && (numconfirm == 0))) {
438 BAMBOO_DEBUGPRINT(0xee02);
443 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
446 entry_index = gcnumsrobjs_index;
448 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
449 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
450 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
451 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
452 // check the status of all cores
453 bool allStall = gc_checkAllCoreStatus_I();
455 BAMBOO_DEBUGPRINT(0xee03);
459 BAMBOO_DEBUGPRINT(0xee04);
464 BAMBOO_DEBUGPRINT(0xee05);
466 // the first time found all cores stall
467 // send out status confirm msg to all other cores
468 // reset the corestatus array too
469 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
471 numconfirm = NUMCORESACTIVE - 1;
472 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
473 for(i = 1; i < NUMCORESACTIVE; ++i) {
475 // send mark phase finish confirm request msg to core i
476 send_msg_1(i, GCMARKCONFIRM, false);
477 } // for(i = 1; i < NUMCORESACTIVE; ++i)
480 // check if the sum of send objs and receive obj are the same
481 // yes->check if the info is the latest; no->go on executing
483 for(i = 0; i < NUMCORESACTIVE; ++i) {
484 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
485 } // for(i = 0; i < NUMCORESACTIVE; ++i)
487 BAMBOO_DEBUGPRINT(0xee06);
488 BAMBOO_DEBUGPRINT_REG(sumsendobj);
490 for(i = 0; i < NUMCORESACTIVE; ++i) {
491 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
492 } // for(i = 0; i < NUMCORESACTIVE; ++i)
494 BAMBOO_DEBUGPRINT(0xee07);
495 BAMBOO_DEBUGPRINT_REG(sumsendobj);
497 if(0 == sumsendobj) {
498 // Check if there are changes of the numsendobjs or numreceiveobjs on
500 bool ischanged = false;
501 for(i = 0; i < NUMCORESACTIVE; ++i) {
502 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
503 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
507 } // for(i = 0; i < NUMCORESACTIVE; ++i)
509 BAMBOO_DEBUGPRINT(0xee08);
510 BAMBOO_DEBUGPRINT_REG(ischanged);
514 BAMBOO_DEBUGPRINT(0xee09);
516 // all the core status info are the latest
518 gcphase = COMPACTPHASE;
519 // restore the gcstatus for all cores
520 for(i = 0; i < NUMCORESACTIVE; ++i) {
522 } // for(i = 0; i < NUMCORESACTIVE; ++i)
525 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
528 // There were changes between phase 1 and phase 2, can not decide
529 // whether the mark phase has been finished
531 // As it fails in phase 2, flip the entries
532 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
533 } // if(0 == sumsendobj) else ...
534 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
535 } // if(!gcwaitconfirm) else()
537 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
539 } // if((!waitconfirm)...
541 BAMBOO_DEBUGPRINT(0xee0a);
543 } // void checkMarkStatue()
545 inline void initGC() {
547 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
548 for(i = 0; i < NUMCORES4GC; ++i) {
550 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
551 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
553 gcrequiredmems[i] = 0;
554 gcfilledblocks[i] = 0;
556 } // for(i = 0; i < NUMCORES4GC; ++i)
557 for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
559 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
560 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
565 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
566 gcself_numsendobjs = 0;
567 gcself_numreceiveobjs = 0;
568 gcmarkedptrbound = 0;
581 gcheadindex=gctailindex=gctailindex2 = 0;
582 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
584 gctailindex = gctailindex2 = gcheadindex;
585 gctail = gctail2 = gchead;
588 // initialize the large obj queues
589 if (gclobjhead==NULL) {
592 gclobjtailindex2 = 0;
593 gclobjhead=gclobjtail=gclobjtail2=
594 RUNMALLOC(sizeof(struct lobjpointerblock));
596 gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
597 gclobjtail = gclobjtail2 = gclobjhead;
599 gclobjhead->next = gclobjhead->prev = NULL;
601 #ifdef LOCALHASHTBL_TEST
602 freeRuntimeHash(gcpointertbl);
603 gcpointertbl = allocateRuntimeHash(20);
605 mgchashreset(gcpointertbl);
608 freeMGCHash(gcforwardobjtbl);
609 gcforwardobjtbl = allocateMGCHash(20, 3);
611 // initialize the mapping info related structures
612 if((BAMBOO_NUM_OF_CORE < NUMCORES4GC) && (gcsharedptbl != NULL)) {
613 // Never free the shared hash table, just reset it
614 mgcsharedhashReset(gcsharedptbl);
617 gc_num_livespace = 0;
618 gc_num_freespace = 0;
620 gc_num_lobjspace = 0;
622 gc_num_forwardobj = 0;
623 gc_num_profiles = NUMCORESACTIVE - 1;
627 // compute load balance for all cores
628 inline int loadbalance(int * heaptop) {
629 // compute load balance
632 // get the total loads
633 int tloads = gcloads[STARTUPCORE];
634 for(i = 1; i < NUMCORES4GC; i++) {
635 tloads += gcloads[i];
637 *heaptop = gcbaseva + tloads;
640 BAMBOO_DEBUGPRINT(0xdddd);
641 BAMBOO_DEBUGPRINT_REG(tloads);
642 BAMBOO_DEBUGPRINT_REG(*heaptop);
645 BLOCKINDEX(*heaptop, &b);
646 int numbpc = b / NUMCORES4GC; // num of blocks per core
648 BAMBOO_DEBUGPRINT_REG(b);
649 BAMBOO_DEBUGPRINT_REG(numbpc);
652 RESIDECORE(heaptop, &gctopcore);
654 BAMBOO_DEBUGPRINT_REG(gctopcore);
657 } // void loadbalance(int * heaptop)
659 inline bool cacheLObjs() {
660 // check the total mem size need for large objs
661 unsigned long long sumsize = 0;
664 BAMBOO_DEBUGPRINT(0xe801);
666 gclobjtail2 = gclobjtail;
667 gclobjtailindex2 = gclobjtailindex;
671 // compute total mem size required and sort the lobjs in ascending order
672 while(gc_lobjmoreItems2_I()) {
674 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
675 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
676 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
682 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
683 BAMBOO_DEBUGPRINT_REG(tmp_len);
684 BAMBOO_DEBUGPRINT_REG(sumsize);
686 int i = gclobjtailindex2-1;
687 struct lobjpointerblock * tmp_block = gclobjtail2;
688 // find the place to insert
691 if(tmp_block->prev == NULL) {
694 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
695 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
696 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
697 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
698 tmp_block = tmp_block->prev;
702 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
704 if(tmp_block->lobjs[i-1] > tmp_lobj) {
705 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
706 tmp_block->lengths[i] = tmp_block->lengths[i-1];
707 tmp_block->hosts[i] = tmp_block->hosts[i-1];
711 } // if(tmp_block->lobjs[i-1] < tmp_lobj)
712 } // if(i ==0 ) else {}
715 if(i != gclobjtailindex2 - 1) {
716 tmp_block->lobjs[i] = tmp_lobj;
717 tmp_block->lengths[i] = tmp_len;
718 tmp_block->hosts[i] = tmp_host;
720 } // while(gc_lobjmoreItems2())
723 gc_num_lobjspace = sumsize;
725 // check if there are enough space to cache these large objs
726 INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
727 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
728 // do not have enough room to cache large objs
730 BAMBOO_DEBUGPRINT(0xe802);
731 BAMBOO_DEBUGPRINT_REG(dst);
732 BAMBOO_DEBUGPRINT_REG(gcheaptop);
733 BAMBOO_DEBUGPRINT_REG(sumsize);
738 BAMBOO_DEBUGPRINT(0xe803);
739 BAMBOO_DEBUGPRINT_REG(dst);
740 BAMBOO_DEBUGPRINT_REG(gcheaptop);
743 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
744 // cache the largeObjs to the top of the shared heap
745 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
746 while(gc_lobjmoreItems3_I()) {
748 size = gclobjtail2->lengths[gclobjtailindex2];
749 // set the mark field to , indicating that this obj has been moved
750 // and need to be flushed
751 ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
753 if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
754 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
756 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
759 BAMBOO_DEBUGPRINT(0x804);
760 BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
761 BAMBOO_DEBUGPRINT(dst);
762 BAMBOO_DEBUGPRINT_REG(size);
763 BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
764 BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
768 } // void cacheLObjs()
770 // update the bmmboo_smemtbl to record current shared mem usage
771 void updateSmemTbl(int coren,
774 int bound = BAMBOO_SMEM_SIZE_L;
775 BLOCKINDEX(localtop, <opcore);
776 if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
777 bound = BAMBOO_SMEM_SIZE;
779 int load = (localtop-gcbaseva)%bound;
784 toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
785 if(toset < ltopcore) {
786 bamboo_smemtbl[toset]=
787 (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
789 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
791 } else if(toset == ltopcore) {
792 bamboo_smemtbl[toset] = load;
794 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
806 } // void updateSmemTbl(int, int)
808 inline void moveLObjs() {
810 BAMBOO_DEBUGPRINT(0xea01);
813 // update the gcmem_mixed_usedmem
814 gcmem_mixed_usedmem = 0;
816 // zero out the smemtbl
817 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
818 // find current heap top
819 // flush all gcloads to indicate the real heap top on one core
820 // previous it represents the next available ptr on a core
821 if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
822 && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
823 // edge of a block, check if this is exactly the heaptop
824 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
825 gcloads[0]+=(gcfilledblocks[0]>1 ?
826 (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
828 updateSmemTbl(0, gcloads[0]);
830 BAMBOO_DEBUGPRINT(0xea02);
831 BAMBOO_DEBUGPRINT_REG(gcloads[0]);
832 BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
834 for(int i = 1; i < NUMCORES4GC; i++) {
837 BAMBOO_DEBUGPRINT(0xf000+i);
838 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
839 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
841 if((gcfilledblocks[i] > 0)
842 && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
843 // edge of a block, check if this is exactly the heaptop
844 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
846 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
849 updateSmemTbl(i, gcloads[i]);
851 BAMBOO_DEBUGPRINT_REG(gcloads[i]);
853 } // for(int i = 1; i < NUMCORES4GC; i++) {
855 // find current heap top
857 // a bug here: when using local allocation, directly move large objects
858 // to the highest free chunk might not be memory efficient
863 for(i = gcnumblock-1; i >= 0; i--) {
864 if(bamboo_smemtbl[i] > 0) {
869 tmpheaptop = gcbaseva;
871 tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
872 (BAMBOO_SMEM_SIZE_L*i) :
873 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
876 // move large objs from gcheaptop to tmpheaptop
877 // write the header first
878 unsigned int tomove = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
880 gcmem_mixed_usedmem += tomove;
883 BAMBOO_DEBUGPRINT(0xea03);
884 BAMBOO_DEBUGPRINT_REG(tomove);
885 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
886 BAMBOO_DEBUGPRINT_REG(gcheaptop);
888 // flush the sbstartbl
889 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
890 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
892 gcheaptop = tmpheaptop;
894 // check how many blocks it acrosses
895 int remain = tmpheaptop-gcbaseva;
896 int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;//number of the sblock
897 int b = 0; // number of the block
898 BLOCKINDEX(tmpheaptop, &b);
899 // check the remaining space in this block
900 bound = (BAMBOO_SMEM_SIZE);
901 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
902 bound = (BAMBOO_SMEM_SIZE_L);
904 remain = bound - remain%bound;
907 BAMBOO_DEBUGPRINT(0xea04);
913 int base = tmpheaptop;
915 remain -= BAMBOO_CACHE_LINE_SIZE;
916 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
917 gc_lobjqueueinit4_I();
918 while(gc_lobjmoreItems4_I()) {
919 ptr = (int)(gc_lobjdequeue4_I(&size, &host));
920 ALIGNSIZE(size, &isize);
922 // this object acrosses blocks
924 // close current block, fill its header
925 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
926 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
927 bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
931 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
932 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
934 remain -= BAMBOO_CACHE_LINE_SIZE;
935 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
936 BLOCKINDEX(tmpheaptop, &b);
937 sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
940 // move the large obj
941 if((int)gcheaptop < (int)(tmpheaptop)+size) {
942 memmove(tmpheaptop, gcheaptop, size);
944 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
945 memcpy(tmpheaptop, gcheaptop, size);
947 // fill the remaining space with -2 padding
948 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
950 BAMBOO_DEBUGPRINT(0xea05);
951 BAMBOO_DEBUGPRINT_REG(gcheaptop);
952 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
953 BAMBOO_DEBUGPRINT_REG(size);
954 BAMBOO_DEBUGPRINT_REG(isize);
955 BAMBOO_DEBUGPRINT_REG(base);
958 // cache the mapping info anyway
959 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
960 #ifdef LOCALHASHTBL_TEST
961 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
963 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
965 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
967 BAMBOO_DEBUGPRINT(0xcdca);
968 BAMBOO_DEBUGPRINT_REG(ptr);
969 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
971 if(host != BAMBOO_NUM_OF_CORE) {
972 // send the original host core with the mapping info
973 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
975 BAMBOO_DEBUGPRINT(0xcdcb);
976 BAMBOO_DEBUGPRINT_REG(ptr);
977 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
979 } // if(host != BAMBOO_NUM_OF_CORE)
982 // set the gcsbstarttbl and bamboo_smemtbl
983 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
984 for(int k = 1; k < tmpsbs; k++) {
985 gcsbstarttbl[sb+k] = (INTPTR)(-1);
988 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
989 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
990 for(; b < tmpsbs; b++) {
991 bamboo_smemtbl[b] = bound;
992 if(b==NUMCORES4GC-1) {
993 bound = BAMBOO_SMEM_SIZE;
996 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
997 gcsbstarttbl[sb] = (INTPTR)(-1);
998 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
999 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1000 bamboo_smemtbl[b] = bound;
1002 gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
1003 remain = tmpheaptop-gcbaseva;
1004 bamboo_smemtbl[b] = remain%bound;
1005 remain = bound - bamboo_smemtbl[b];
1006 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
1008 // close current block and fill the header
1009 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1010 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
1013 if(remain == BAMBOO_CACHE_LINE_SIZE) {
1014 // fill with 0 in case
1015 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
1017 remain -= BAMBOO_CACHE_LINE_SIZE;
1018 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1021 // move the large obj
1022 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1023 memmove(tmpheaptop, gcheaptop, size);
1025 memcpy(tmpheaptop, gcheaptop, size);
1027 // fill the remaining space with -2 padding
1028 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1030 BAMBOO_DEBUGPRINT(0xea06);
1031 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1032 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1033 BAMBOO_DEBUGPRINT_REG(size);
1034 BAMBOO_DEBUGPRINT_REG(isize);
1039 // cache the mapping info anyway
1040 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1041 #ifdef LOCALHASHTBL_TEST
1042 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1044 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1046 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1048 BAMBOO_DEBUGPRINT(0xcdcc);
1049 BAMBOO_DEBUGPRINT_REG(ptr);
1050 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1051 BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
1053 if(host != BAMBOO_NUM_OF_CORE) {
1054 // send the original host core with the mapping info
1055 send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1057 BAMBOO_DEBUGPRINT(0xcdcd);
1058 BAMBOO_DEBUGPRINT_REG(ptr);
1059 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1061 } // if(host != BAMBOO_NUM_OF_CORE)
1062 tmpheaptop += isize;
1064 // update bamboo_smemtbl
1065 bamboo_smemtbl[b] += isize;
1066 } // if(remain < isize) else ...
1067 } // while(gc_lobjmoreItems())
1069 // close current block, fill the header
1070 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1071 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1072 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;// add the size of the header
1074 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
1076 gcheaptop = tmpheaptop;
1078 } // if(tomove == 0)
1081 BAMBOO_DEBUGPRINT(0xea07);
1082 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1085 bamboo_free_block = 0;
1088 tbound = (bamboo_free_block<NUMCORES4GC) ?
1089 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1090 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
1091 bamboo_free_block++;
1093 // the first non-full partition
1099 // check how many live space there are
1100 gc_num_livespace = 0;
1101 for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
1102 gc_num_livespace += bamboo_smemtbl[tmpi];
1104 gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
1107 BAMBOO_DEBUGPRINT(0xea08);
1108 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1110 } // void moveLObjs()
1112 inline void markObj(void * objptr) {
1113 if(objptr == NULL) {
1116 if(ISSHAREDOBJ(objptr)) {
1117 int host = hostcore(objptr);
1118 if(BAMBOO_NUM_OF_CORE == host) {
1120 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1121 if(((int *)objptr)[6] == INIT) {
1122 // this is the first time that this object is discovered,
1123 // set the flag as DISCOVERED
1124 ((int *)objptr)[6] |= DISCOVERED;
1125 BAMBOO_CACHE_FLUSH_LINE(objptr);
1126 gc_enqueue_I(objptr);
1128 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1131 BAMBOO_DEBUGPRINT(0xbbbb);
1132 BAMBOO_DEBUGPRINT_REG(host);
1133 BAMBOO_DEBUGPRINT_REG(objptr);
1135 // check if this obj has been forwarded
1136 if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1137 // send a msg to host informing that objptr is active
1138 send_msg_2(host, GCMARKEDOBJ, objptr, false);
1140 gc_num_forwardobj++;
1141 #endif // GC_PROFILE
1142 gcself_numsendobjs++;
1143 MGCHashadd(gcforwardobjtbl, (int)objptr);
1147 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1148 gc_enqueue_I(objptr);
1149 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1150 } // if(ISSHAREDOBJ(objptr))
1151 } // void markObj(void * objptr)
1153 // enqueue root objs
1154 inline void tomark(struct garbagelist * stackptr) {
1155 if(MARKPHASE != gcphase) {
1157 BAMBOO_DEBUGPRINT_REG(gcphase);
1159 BAMBOO_EXIT(0xb101);
1161 gcbusystatus = true;
1165 // enqueue current stack
1166 while(stackptr!=NULL) {
1168 BAMBOO_DEBUGPRINT(0xe501);
1169 BAMBOO_DEBUGPRINT_REG(stackptr->size);
1170 BAMBOO_DEBUGPRINT_REG(stackptr->next);
1171 BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1173 for(i=0; i<stackptr->size; i++) {
1174 if(stackptr->array[i] != NULL) {
1175 markObj(stackptr->array[i]);
1178 stackptr=stackptr->next;
1182 BAMBOO_DEBUGPRINT(0xe503);
1184 // enqueue objectsets
1185 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1186 for(i=0; i<NUMCLASSES; i++) {
1187 struct parameterwrapper ** queues =
1188 objectqueues[BAMBOO_NUM_OF_CORE][i];
1189 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1190 for(j = 0; j < length; ++j) {
1191 struct parameterwrapper * parameter = queues[j];
1192 struct ObjectHash * set=parameter->objectset;
1193 struct ObjectNode * ptr=set->listhead;
1195 markObj((void *)ptr->key);
1202 // euqueue current task descriptor
1203 if(currtpd != NULL) {
1205 BAMBOO_DEBUGPRINT(0xe504);
1207 for(i=0; i<currtpd->numParameters; i++) {
1208 markObj(currtpd->parameterArray[i]);
1213 BAMBOO_DEBUGPRINT(0xe505);
1215 // euqueue active tasks
1216 if(activetasks != NULL) {
1217 struct genpointerlist * ptr=activetasks->list;
1219 struct taskparamdescriptor *tpd=ptr->src;
1221 for(i=0; i<tpd->numParameters; i++) {
1222 markObj(tpd->parameterArray[i]);
1229 BAMBOO_DEBUGPRINT(0xe506);
1231 // enqueue cached transferred obj
1232 struct QueueItem * tmpobjptr = getHead(&objqueue);
1233 while(tmpobjptr != NULL) {
1234 struct transObjInfo * objInfo =
1235 (struct transObjInfo *)(tmpobjptr->objectptr);
1236 markObj(objInfo->objptr);
1237 tmpobjptr = getNextQueueItem(tmpobjptr);
1241 BAMBOO_DEBUGPRINT(0xe507);
1243 // enqueue cached objs to be transferred
1244 struct QueueItem * item = getHead(totransobjqueue);
1245 while(item != NULL) {
1246 struct transObjInfo * totransobj =
1247 (struct transObjInfo *)(item->objectptr);
1248 markObj(totransobj->objptr);
1249 item = getNextQueueItem(item);
1250 } // while(item != NULL)
1253 BAMBOO_DEBUGPRINT(0xe508);
1255 // enqueue lock related info
1256 for(i = 0; i < runtime_locklen; ++i) {
1257 markObj((void *)(runtime_locks[i].redirectlock));
1258 if(runtime_locks[i].value != NULL) {
1259 markObj((void *)(runtime_locks[i].value));
1263 } // void tomark(struct garbagelist * stackptr)
1265 inline void mark(bool isfirst,
1266 struct garbagelist * stackptr) {
1268 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed01);
1272 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed02);
1274 // enqueue root objs
1276 gccurr_heaptop = 0; // record the size of all active objs in this core
1277 // aligned but does not consider block boundaries
1278 gcmarkedptrbound = 0;
1281 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed03);
1284 bool checkfield = true;
1285 bool sendStall = false;
1287 while(MARKPHASE == gcphase) {
1289 if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04);
1292 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1293 bool hasItems = gc_moreItems2_I();
1294 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1296 BAMBOO_DEBUGPRINT(0xed05);
1302 gcbusystatus = true;
1304 void * ptr = gc_dequeue2_I();
1307 BAMBOO_DEBUGPRINT_REG(ptr);
1312 // check if it is a shared obj
1313 if(ISSHAREDOBJ(ptr)) {
1314 // a shared obj, check if it is a local obj on this core
1315 int host = hostcore(ptr);
1316 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1318 bool isnotmarked = ((((int *)ptr)[6] & DISCOVERED) != 0);
1319 if(isLarge(ptr, &type, &size) && isnotmarked) {
1320 // ptr is a large object and not marked or enqueued
1322 BAMBOO_DEBUGPRINT(0xecec);
1323 BAMBOO_DEBUGPRINT_REG(ptr);
1324 BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1326 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1327 gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1329 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1331 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1332 BAMBOO_CACHE_FLUSH_LINE(ptr);
1333 } else if(isnotmarked) {
1334 // ptr is an unmarked active object on this core
1335 ALIGNSIZE(size, &isize);
1336 gccurr_heaptop += isize;
1338 BAMBOO_DEBUGPRINT(0xaaaa);
1339 BAMBOO_DEBUGPRINT_REG(ptr);
1340 BAMBOO_DEBUGPRINT_REG(isize);
1341 BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1344 ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1345 BAMBOO_CACHE_FLUSH_LINE(ptr);
1347 if(ptr + size > gcmarkedptrbound) {
1348 gcmarkedptrbound = ptr + size;
1349 } // if(ptr + size > gcmarkedptrbound)
1351 // ptr is not an active obj or has been marked
1353 } // if(isLarge(ptr, &type, &size)) else ...
1354 } /* can never reach here
1357 if(BAMBOO_NUM_OF_CORE == 0) {
1358 BAMBOO_DEBUGPRINT(0xbbbb);
1359 BAMBOO_DEBUGPRINT_REG(host);
1360 BAMBOO_DEBUGPRINT_REG(ptr);
1363 // check if this obj has been forwarded
1364 if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1365 // send a msg to host informing that ptr is active
1366 send_msg_2(host, GCMARKEDOBJ, ptr, false);
1367 gcself_numsendobjs++;
1368 MGCHashadd(gcforwardobjtbl, (int)ptr);
1371 }// if(isLocal(ptr)) else ...*/
1372 } // if(ISSHAREDOBJ(ptr))
1374 BAMBOO_DEBUGPRINT(0xed06);
1378 // scan all pointers in ptr
1379 unsigned INTPTR * pointer;
1380 pointer=pointerarray[type];
1382 /* Array of primitives */
1384 } else if (((INTPTR)pointer)==1) {
1385 /* Array of pointers */
1386 struct ArrayObject *ao=(struct ArrayObject *) ptr;
1387 int length=ao->___length___;
1389 for(j=0; j<length; j++) {
1391 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1395 INTPTR size=pointer[0];
1397 for(i=1; i<=size; i++) {
1398 unsigned int offset=pointer[i];
1399 void * objptr=*((void **)(((char *)ptr)+offset));
1402 } // if (pointer==0) else if ... else ...
1404 } // while(gc_moreItems2())
1406 BAMBOO_DEBUGPRINT(0xed07);
1408 gcbusystatus = false;
1409 // send mark finish msg to core coordinator
1410 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1412 BAMBOO_DEBUGPRINT(0xed08);
1414 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1415 gcnumsendobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1416 gcnumreceiveobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=
1417 gcself_numreceiveobjs;
1418 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1422 BAMBOO_DEBUGPRINT(0xed09);
1424 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1425 gcself_numsendobjs, gcself_numreceiveobjs, false);
1428 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1430 BAMBOO_DEBUGPRINT(0xed0a);
1433 if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1435 BAMBOO_DEBUGPRINT(0xed0b);
1439 } // while(MARKPHASE == gcphase)
1444 inline void compact2Heaptophelper_I(int coren,
1449 int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1450 if(STARTUPCORE == coren) {
1452 gcmovestartaddr = *p;
1453 gcdstcore = gctopcore;
1454 gcblock2fill = *numblocks + 1;
1456 send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1459 BAMBOO_DEBUGPRINT_REG(coren);
1460 BAMBOO_DEBUGPRINT_REG(gctopcore);
1461 BAMBOO_DEBUGPRINT_REG(*p);
1462 BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1464 if(memneed < *remain) {
1466 BAMBOO_DEBUGPRINT(0xd104);
1469 gcrequiredmems[coren] = 0;
1470 gcloads[gctopcore] += memneed;
1471 *remain = *remain - memneed;
1474 BAMBOO_DEBUGPRINT(0xd105);
1476 // next available block
1478 gcfilledblocks[gctopcore] += 1;
1480 BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1481 gcloads[gctopcore] = newbase;
1482 gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1483 gcstopblock[gctopcore]++;
1484 gctopcore = NEXTTOPCORE(gctopblock);
1486 *numblocks = gcstopblock[gctopcore];
1487 *p = gcloads[gctopcore];
1489 *remain=(b<NUMCORES4GC) ?
1490 ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1491 : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1493 BAMBOO_DEBUGPRINT(0xd106);
1494 BAMBOO_DEBUGPRINT_REG(gctopcore);
1495 BAMBOO_DEBUGPRINT_REG(*p);
1496 BAMBOO_DEBUGPRINT_REG(b);
1497 BAMBOO_DEBUGPRINT_REG(*remain);
1499 } // if(memneed < remain)
1501 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1503 inline void compact2Heaptop() {
1504 // no cores with spare mem and some cores are blocked with pending move
1505 // find the current heap top and make them move to the heap top
1507 int numblocks = gcfilledblocks[gctopcore];
1508 p = gcloads[gctopcore];
1511 int remain = (b<NUMCORES4GC) ?
1512 ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1513 : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1514 // check if the top core finishes
1515 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1516 if(gccorestatus[gctopcore] != 0) {
1518 BAMBOO_DEBUGPRINT(0xd101);
1519 BAMBOO_DEBUGPRINT_REG(gctopcore);
1521 // let the top core finishes its own work first
1522 compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1523 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1526 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1529 BAMBOO_DEBUGPRINT(0xd102);
1530 BAMBOO_DEBUGPRINT_REG(gctopcore);
1531 BAMBOO_DEBUGPRINT_REG(p);
1532 BAMBOO_DEBUGPRINT_REG(b);
1533 BAMBOO_DEBUGPRINT_REG(remain);
1535 for(int i = 0; i < NUMCORES4GC; i++) {
1536 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1537 if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1539 BAMBOO_DEBUGPRINT(0xd103);
1541 compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1542 if(gccorestatus[gctopcore] != 0) {
1544 BAMBOO_DEBUGPRINT(0xd101);
1545 BAMBOO_DEBUGPRINT_REG(gctopcore);
1547 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1548 // the top core is not free now
1551 } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1552 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1553 } // for(i = 0; i < NUMCORES4GC; i++)
1555 BAMBOO_DEBUGPRINT(0xd106);
1557 } // void compact2Heaptop()
1559 inline void resolvePendingMoveRequest() {
1561 BAMBOO_DEBUGPRINT(0xeb01);
1564 BAMBOO_DEBUGPRINT(0xeeee);
1565 for(int k = 0; k < NUMCORES4GC; k++) {
1566 BAMBOO_DEBUGPRINT(0xf000+k);
1567 BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1568 BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1569 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1570 BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1572 BAMBOO_DEBUGPRINT(0xffff);
1576 bool nosparemem = true;
1577 bool haspending = false;
1578 bool hasrunning = false;
1579 bool noblock = false;
1580 int dstcore = 0; // the core who need spare mem
1581 int sourcecore = 0; // the core who has spare mem
1582 for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1584 // check if there are cores with spare mem
1585 if(gccorestatus[i] == 0) {
1586 // finished working, check if it still have spare mem
1587 if(gcfilledblocks[i] < gcstopblock[i]) {
1588 // still have spare mem
1591 } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1596 if(gccorestatus[j] != 0) {
1597 // not finished, check if it has pending move requests
1598 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1603 } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1604 } // if(gccorestatus[i] == 0) else ...
1606 } // if(!haspending)
1607 if(!nosparemem && haspending) {
1611 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1612 gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1613 gcrequiredmems[dstcore],
1616 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1618 BAMBOO_DEBUGPRINT(0xeb02);
1619 BAMBOO_DEBUGPRINT_REG(sourcecore);
1620 BAMBOO_DEBUGPRINT_REG(dstcore);
1621 BAMBOO_DEBUGPRINT_REG(startaddr);
1622 BAMBOO_DEBUGPRINT_REG(tomove);
1624 if(STARTUPCORE == dstcore) {
1626 BAMBOO_DEBUGPRINT(0xeb03);
1628 gcdstcore = sourcecore;
1630 gcmovestartaddr = startaddr;
1631 gcblock2fill = tomove;
1634 BAMBOO_DEBUGPRINT(0xeb04);
1636 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1637 startaddr, tomove, false);
1644 } // for(i = 0; i < NUMCORES4GC; i++)
1646 BAMBOO_DEBUGPRINT(0xcccc);
1647 BAMBOO_DEBUGPRINT_REG(hasrunning);
1648 BAMBOO_DEBUGPRINT_REG(haspending);
1649 BAMBOO_DEBUGPRINT_REG(noblock);
1652 if(!hasrunning && !noblock) {
1653 gcphase = SUBTLECOMPACTPHASE;
1657 } // void resovePendingMoveRequest()
1660 int numblocks; // block num for heap
1661 INTPTR base; // base virtual address of current heap block
1662 INTPTR ptr; // virtual address of current heap top
1663 int offset; // offset in current heap block
1664 int blockbase; // virtual address of current small block to check
1665 int blockbound; // bound virtual address of current small blcok
1666 int sblockindex; // index of the small blocks
1667 int top; // real size of current heap block to check
1668 int bound; // bound size of current heap block to check
1669 }; // struct moveHelper
1671 // If out of boundary of valid shared memory, return false, else return true
1672 inline bool nextSBlock(struct moveHelper * orig) {
1673 orig->blockbase = orig->blockbound;
1674 bool sbchanged = false;
1676 BAMBOO_DEBUGPRINT(0xecc0);
1677 BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1678 BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1679 BAMBOO_DEBUGPRINT_REG(orig->bound);
1680 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1683 // check if across a big block
1684 // TODO now do not zero out the whole memory, maybe the last two conditions
1686 if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)
1687 || ((orig->ptr != NULL) && (*((int*)orig->ptr))==0)
1688 || ((*((int*)orig->blockbase))==0)) {
1690 // end of current heap block, jump to next one
1693 BAMBOO_DEBUGPRINT(0xecc1);
1694 BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1696 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1698 BAMBOO_DEBUGPRINT(orig->base);
1700 if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1702 orig->ptr = orig->base; // set current ptr to out of boundary too
1705 orig->blockbase = orig->base;
1706 orig->sblockindex = (orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1709 BLOCKINDEX(orig->base, &blocknum);
1710 if(bamboo_smemtbl[blocknum] == 0) {
1712 goto innernextSBlock;
1714 // check the bamboo_smemtbl to decide the real bound
1715 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1716 } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1717 orig->sblockindex += 1;
1719 } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1721 // check if this sblock should be skipped or have special start point
1722 if(gcsbstarttbl[orig->sblockindex] == -1) {
1725 BAMBOO_DEBUGPRINT(0xecc2);
1727 orig->sblockindex += 1;
1728 orig->blockbase += BAMBOO_SMEM_SIZE;
1729 goto outernextSBlock;
1730 } else if((gcsbstarttbl[orig->sblockindex] != 0)
1732 // the first time to access this SBlock
1734 BAMBOO_DEBUGPRINT(0xecc3);
1736 // not start from the very beginning
1737 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1738 } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1740 // setup information for this sblock
1741 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1742 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1743 orig->ptr = orig->blockbase + orig->offset;
1745 BAMBOO_DEBUGPRINT(0xecc4);
1746 BAMBOO_DEBUGPRINT_REG(orig->base);
1747 BAMBOO_DEBUGPRINT_REG(orig->bound);
1748 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1749 BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1750 BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1751 BAMBOO_DEBUGPRINT_REG(orig->offset);
1753 if(orig->ptr >= orig->bound) {
1754 // met a lobj, move to next block
1755 goto innernextSBlock;
1759 } // bool nextSBlock(struct moveHelper * orig)
1761 // return false if there are no available data to compact
1762 inline bool initOrig_Dst(struct moveHelper * orig,
1763 struct moveHelper * to) {
1766 to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1767 to->bound = BAMBOO_SMEM_SIZE_L;
1768 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1771 BAMBOO_DEBUGPRINT(0xef01);
1772 BAMBOO_DEBUGPRINT_REG(to->base);
1774 to->ptr = to->base + to->offset;
1775 #ifdef GC_CACHE_ADAPT
1776 // initialize the gc_cache_revise_information
1777 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1778 gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1779 ((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1780 gc_cache_revise_infomation.to_page_index =
1781 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
1782 gc_cache_revise_infomation.orig_page_start_va = -1;
1783 #endif // GC_CACHE_ADAPT
1785 // init the orig ptr
1786 orig->numblocks = 0;
1787 orig->base = to->base;
1789 BLOCKINDEX(orig->base, &blocknum);
1790 // check the bamboo_smemtbl to decide the real bound
1791 orig->bound = orig->base + bamboo_smemtbl[blocknum];
1792 orig->blockbase = orig->base;
1793 orig->sblockindex = (orig->base - gcbaseva) / BAMBOO_SMEM_SIZE;
1795 BAMBOO_DEBUGPRINT(0xef02);
1796 BAMBOO_DEBUGPRINT_REG(orig->base);
1797 BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1798 BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1799 BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1802 if(gcsbstarttbl[orig->sblockindex] == -1) {
1804 BAMBOO_DEBUGPRINT(0xef03);
1808 gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1809 return nextSBlock(orig);
1810 } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1812 BAMBOO_DEBUGPRINT(0xef04);
1814 orig->blockbase = gcsbstarttbl[orig->sblockindex];
1817 BAMBOO_DEBUGPRINT(0xef05);
1819 orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1820 orig->offset = BAMBOO_CACHE_LINE_SIZE;
1821 orig->ptr = orig->blockbase + orig->offset;
1823 BAMBOO_DEBUGPRINT(0xef06);
1824 BAMBOO_DEBUGPRINT_REG(orig->base);
1828 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1830 inline void nextBlock(struct moveHelper * to) {
1831 to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1832 to->bound += BAMBOO_SMEM_SIZE;
1834 BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1835 to->offset = BAMBOO_CACHE_LINE_SIZE;
1836 to->ptr = to->base + to->offset;
1837 } // void nextBlock(struct moveHelper * to)
1839 // endaddr does not contain spaces for headers
1840 inline bool moveobj(struct moveHelper * orig,
1841 struct moveHelper * to,
1843 if(stopblock == 0) {
1848 BAMBOO_DEBUGPRINT(0xe201);
1849 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1850 BAMBOO_DEBUGPRINT_REG(to->ptr);
1858 while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1859 orig->ptr = (int*)(orig->ptr) + 1;
1861 #ifdef GC_CACHE_ADAPT
1862 if(orig->ptr >= gc_cache_revise_infomation.orig_page_end_va) {
1863 // end of an orig page
1864 // compute the impact of this page for the new page
1865 unsigned int tmp_factor=
1866 to->ptr-gc_cache_revise_infomation.to_page_start_va;
1867 int topage=gc_cache_revise_infomation.to_page_index;
1868 int oldpage = gc_cache_revise_infomation.orig_page_index;
1869 unsigned int * newtable=&gccachesamplingtbl_r[topage];
1870 unsigned int * oldtable=&gccachesamplingtbl[oldpage];
1872 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1873 (*newtable) += (*oldtable)*tmp_factor;
1874 newtable=(unsigned int*)(
1875 ((char *)newtable)+size_cachesamplingtbl_local_r);
1876 oldtable=(unsigned int*)(((char *)oldtable)+size_cachesamplingtbl_local);
1878 // prepare for an new orig page
1879 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1880 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1881 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1882 (BAMBOO_PAGE_SIZE)*(tmp_index+1);
1883 gc_cache_revise_infomation.orig_page_index = tmp_index;
1884 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1887 if((orig->ptr >= orig->bound) || (orig->ptr == orig->blockbound)) {
1888 if(!nextSBlock(orig)) {
1889 // finished, no more data
1895 BAMBOO_DEBUGPRINT(0xe202);
1896 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1897 BAMBOO_DEBUGPRINT(((int *)(orig->ptr))[0]);
1899 // check the obj's type, size and mark flag
1900 type = ((int *)(orig->ptr))[0];
1903 // end of this block, go to next one
1904 if(!nextSBlock(orig)) {
1905 // finished, no more data
1909 } else if(type < NUMCLASSES) {
1911 size = classsize[type];
1914 struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
1915 int elementsize=classsize[type];
1916 int length=ao->___length___;
1917 size=sizeof(struct ArrayObject)+length*elementsize;
1919 mark = ((int *)(orig->ptr))[6];
1920 bool isremote = ((((int *)(orig->ptr))[6] & REMOTEM) != 0);
1922 BAMBOO_DEBUGPRINT(0xe203);
1923 BAMBOO_DEBUGPRINT_REG(orig->ptr);
1924 BAMBOO_DEBUGPRINT_REG(size);
1926 ALIGNSIZE(size, &isize); // no matter is the obj marked or not
1927 // should be able to across it
1928 if((mark & MARKED) != 0) {
1930 BAMBOO_DEBUGPRINT(0xe204);
1935 // marked obj, copy it to current heap top
1936 // check to see if remaining space is enough
1937 if(to->top + isize > to->bound) {
1938 // fill 0 indicating the end of this block
1939 BAMBOO_MEMSET_WH(to->ptr, '\0', to->bound - to->top);
1940 // fill the header of this block and then go to next block
1941 to->offset += to->bound - to->top;
1942 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1943 (*((int*)(to->base))) = to->offset;
1944 #ifdef GC_CACHE_ADAPT
1945 int tmp_ptr = to->ptr;
1946 #endif // GC_CACHE_ADAPT
1948 #ifdef GC_CACHE_ADAPT
1949 if((to->ptr) >= gc_cache_revise_infomation.to_page_end_va) {
1950 // end of an to page, wrap up its information
1951 unsigned int tmp_factor =
1952 tmp_ptr-gc_cache_revise_infomation.to_page_start_va;
1953 int topage=gc_cache_revise_infomation.to_page_index;
1954 int oldpage = gc_cache_revise_infomation.orig_page_index;
1955 unsigned int * newtable=&gccachesamplingtbl_r[topage];
1956 unsigned int * oldtable=&gccachesamplingtbl[oldpage];
1958 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1959 (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
1960 newtable=(unsigned int*) (
1961 ((char *)newtable)+size_cachesamplingtbl_local_r);
1962 oldtable=(unsigned int*) (
1963 ((char *)oldtable)+size_cachesamplingtbl_local);
1965 // prepare for an new to page
1966 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1967 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1968 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1969 (BAMBOO_PAGE_SIZE)*(tmp_index+1);
1970 gc_cache_revise_infomation.orig_page_index = tmp_index;
1971 gc_cache_revise_infomation.to_page_start_va = to->ptr;
1972 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
1973 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1974 gc_cache_revise_infomation.to_page_index =
1975 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1977 #endif // GC_CACHE_ADAPT
1978 if(stopblock == to->numblocks) {
1979 // already fulfilled the block
1981 } // if(stopblock == to->numblocks)
1982 } // if(to->top + isize > to->bound)
1983 // set the mark field to 2, indicating that this obj has been moved
1984 // and need to be flushed
1985 ((int *)(orig->ptr))[6] = COMPACTED;
1986 if(to->ptr != orig->ptr) {
1987 if((int)(orig->ptr) < (int)(to->ptr)+size) {
1988 memmove(to->ptr, orig->ptr, size);
1990 memcpy(to->ptr, orig->ptr, size);
1992 // fill the remaining space with -2
1993 BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size);
1995 // store mapping info
1996 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1997 #ifdef LOCALHASHTBL_TEST
1998 RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2000 mgchashInsert_I(gcpointertbl, orig->ptr, to->ptr);
2003 // add to the sharedptbl
2004 if(gcsharedptbl != NULL) {
2005 mgcsharedhashInsert_I(gcsharedptbl, orig->ptr, to->ptr);
2008 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2010 BAMBOO_DEBUGPRINT(0xcdce);
2011 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2012 BAMBOO_DEBUGPRINT_REG(to->ptr);
2013 BAMBOO_DEBUGPRINT_REG(isize);
2015 gccurr_heaptop -= isize;
2017 to->offset += isize;
2019 #ifdef GC_CACHE_ADAPT
2020 int tmp_ptr = to->ptr;
2021 #endif // GC_CACHE_ADAPT
2022 if(to->top == to->bound) {
2023 // fill the header of this block and then go to next block
2024 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2025 (*((int*)(to->base))) = to->offset;
2028 #ifdef GC_CACHE_ADAPT
2029 if((to->ptr) >= gc_cache_revise_infomation.to_page_end_va) {
2030 // end of an to page, wrap up its information
2031 unsigned int tmp_factor =
2032 tmp_ptr-gc_cache_revise_infomation.to_page_start_va;
2033 int topage=gc_cache_revise_infomation.to_page_index;
2034 int oldpage = gc_cache_revise_infomation.orig_page_index;
2035 unsigned int * newtable=&gccachesamplingtbl_r[topage];
2036 unsigned int * oldtable=&gccachesamplingtbl[oldpage];
2038 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2039 (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
2040 newtable=(unsigned int*) (
2041 ((char *)newtable)+size_cachesamplingtbl_local_r);
2042 oldtable=(unsigned int*) (
2043 ((char *)oldtable)+size_cachesamplingtbl_local);
2045 // prepare for an new to page
2046 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2047 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2048 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2049 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2050 gc_cache_revise_infomation.orig_page_index =
2051 (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2052 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2053 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2054 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2055 gc_cache_revise_infomation.to_page_index =
2056 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2058 #endif // GC_CACHE_ADAPT
2061 BAMBOO_DEBUGPRINT(0xe205);
2067 BAMBOO_DEBUGPRINT_REG(isize);
2068 BAMBOO_DEBUGPRINT_REG(size);
2069 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2070 BAMBOO_DEBUGPRINT_REG(orig->bound);
2072 if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
2074 BAMBOO_DEBUGPRINT(0xe206);
2076 if(!nextSBlock(orig)) {
2077 // finished, no more data
2082 BAMBOO_DEBUGPRINT(0xe207);
2083 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2086 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
2088 // should be invoked with interrupt closed
2089 inline int assignSpareMem_I(int sourcecore,
2094 BLOCKINDEX(gcloads[sourcecore], &b);
2095 int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
2096 : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
2097 int remain = boundptr - gcloads[sourcecore];
2098 int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
2099 *startaddr = gcloads[sourcecore];
2100 *tomove = gcfilledblocks[sourcecore] + 1;
2101 if(memneed < remain) {
2102 gcloads[sourcecore] += memneed;
2105 // next available block
2106 gcfilledblocks[sourcecore] += 1;
2108 BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
2109 gcloads[sourcecore] = newbase;
2110 return requiredmem-remain;
2112 } // int assignSpareMem_I(int ,int * , int * , int * )
2114 // should be invoked with interrupt closed
2115 inline bool gcfindSpareMem_I(int * startaddr,
2120 for(int k = 0; k < NUMCORES4GC; k++) {
2121 if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
2122 // check if this stopped core has enough mem
2123 assignSpareMem_I(k, requiredmem, tomove, startaddr);
2128 // if can not find spare mem right now, hold the request
2129 gcrequiredmems[requiredcore] = requiredmem;
2132 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
2134 inline bool compacthelper(struct moveHelper * orig,
2135 struct moveHelper * to,
2138 bool * localcompact) {
2139 // scan over all objs in this block, compact the marked objs
2140 // loop stop when finishing either scanning all active objs or
2141 // fulfilled the gcstopblock
2143 BAMBOO_DEBUGPRINT(0xe101);
2144 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2145 BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2148 while(orig->ptr < gcmarkedptrbound) {
2149 bool stop = moveobj(orig, to, gcblock2fill);
2154 #ifdef GC_CACHE_ADAPT
2155 // end of an to page, wrap up its information
2156 unsigned int tmp_factor =
2157 to->ptr-gc_cache_revise_infomation.to_page_start_va;
2158 int topage=gc_cache_revise_infomation.to_page_index;
2159 int oldpage = gc_cache_revise_infomation.orig_page_index;
2160 unsigned int * newtable=&gccachesamplingtbl_r[topage];
2161 unsigned int * oldtable=&gccachesamplingtbl[oldpage];
2163 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2164 (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
2165 newtable=(unsigned int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
2166 oldtable=(unsigned int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2168 #endif // GC_CACHE_ADAPT
2169 // if no objs have been compact, do nothing,
2170 // otherwise, fill the header of this block
2171 if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
2172 BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2173 (*((int*)(to->base))) = to->offset;
2177 to->top -= BAMBOO_CACHE_LINE_SIZE;
2178 } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
2180 *heaptopptr = to->ptr;
2181 *filledblocks = to->numblocks;
2184 BAMBOO_DEBUGPRINT(0xe102);
2185 BAMBOO_DEBUGPRINT_REG(orig->ptr);
2186 BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2187 BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2188 BAMBOO_DEBUGPRINT_REG(*filledblocks);
2189 BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
2192 // send msgs to core coordinator indicating that the compact is finishing
2193 // send compact finish message to core coordinator
2194 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2195 gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
2196 gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
2197 if(orig->ptr < gcmarkedptrbound) {
2199 BAMBOO_DEBUGPRINT(0xe103);
2203 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2204 if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
2205 gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
2207 BAMBOO_DEBUGPRINT(0xe104);
2211 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2213 BAMBOO_DEBUGPRINT(0xe105);
2217 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2220 BAMBOO_DEBUGPRINT(0xe106);
2222 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2227 if(orig->ptr < gcmarkedptrbound) {
2229 BAMBOO_DEBUGPRINT(0xe107);
2233 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2234 *filledblocks, *heaptopptr, gccurr_heaptop, false);
2237 BAMBOO_DEBUGPRINT(0xe108);
2238 BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2240 // finish compacting
2241 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2242 *filledblocks, *heaptopptr, 0, false);
2244 } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
2246 if(orig->ptr < gcmarkedptrbound) {
2248 BAMBOO_DEBUGPRINT(0xe109);
2250 // still have unpacked obj
2259 BAMBOO_DEBUGPRINT(0xe10a);
2262 to->ptr = gcmovestartaddr;
2263 to->numblocks = gcblock2fill - 1;
2264 to->bound = (to->numblocks==0) ?
2265 BAMBOO_SMEM_SIZE_L :
2266 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2267 BASEPTR(gcdstcore, to->numblocks, &(to->base));
2268 to->offset = to->ptr - to->base;
2269 to->top = (to->numblocks==0) ?
2270 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2272 to->offset = BAMBOO_CACHE_LINE_SIZE;
2273 to->ptr += to->offset; // for header
2274 to->top += to->offset;
2275 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2276 *localcompact = true;
2278 *localcompact = false;
2280 #ifdef GC_CACHE_ADAPT
2281 // initialize the gc_cache_revise_information
2282 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2283 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2284 (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2285 gc_cache_revise_infomation.to_page_index =
2286 (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
2287 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2288 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2289 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2290 gc_cache_revise_infomation.orig_page_index =
2291 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2292 #endif // GC_CACHE_ADAPT
2296 BAMBOO_DEBUGPRINT(0xe10b);
2299 } // void compacthelper()
2301 inline void compact() {
2302 if(COMPACTPHASE != gcphase) {
2303 BAMBOO_EXIT(0xb102);
2306 // initialize pointers for comapcting
2307 struct moveHelper * orig =
2308 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2309 struct moveHelper * to =
2310 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2312 if(!initOrig_Dst(orig, to)) {
2313 // no available data to compact
2314 // send compact finish msg to STARTUP core
2316 BAMBOO_DEBUGPRINT(0xe001);
2317 BAMBOO_DEBUGPRINT_REG(to->base);
2319 send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2320 0, to->base, 0, false);
2325 #ifdef GC_CACHE_ADAPT
2326 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2327 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2328 (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2329 gc_cache_revise_infomation.orig_page_index =
2330 (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2331 #endif // GC_CACHE_ADAPT
2333 int filledblocks = 0;
2334 INTPTR heaptopptr = 0;
2335 bool localcompact = true;
2336 compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2342 // if return NULL, means
2343 // 1. objptr is NULL
2344 // 2. objptr is not a shared obj
2345 // in these cases, remain the original value is OK
2346 inline void * flushObj(void * objptr) {
2348 BAMBOO_DEBUGPRINT(0xe401);
2350 if(objptr == NULL) {
2353 void * dstptr = NULL;
2354 if(ISSHAREDOBJ(objptr)) {
2356 BAMBOO_DEBUGPRINT(0xe402);
2357 BAMBOO_DEBUGPRINT_REG(objptr);
2359 // a shared obj ptr, change to new address
2360 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2361 #ifdef LOCALHASHTBL_TEST
2362 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2364 dstptr = mgchashSearch(gcpointertbl, objptr);
2366 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2368 BAMBOO_DEBUGPRINT_REG(dstptr);
2371 if(NULL == dstptr) {
2374 BAMBOO_DEBUGPRINT(0xe403);
2375 BAMBOO_DEBUGPRINT_REG(objptr);
2376 BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2378 if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2379 // error! the obj is right on this core, but cannot find it
2380 BAMBOO_DEBUGPRINT_REG(objptr);
2381 BAMBOO_EXIT(0xb103);
2383 int hostc = hostcore(objptr);
2384 // check the corresponsing sharedptbl
2385 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2386 mgcsharedhashtbl_t * sptbl = gcrpointertbls[hostc];
2388 dstptr = mgcsharedhashSearch(sptbl, (int)objptr);
2389 if(dstptr != NULL) {
2390 #ifdef LOCALHASHTBL_TEST
2391 RuntimeHashadd_I(gcpointertbl, (int)objptr, (int)dstptr);
2393 mgchashInsert_I(gcpointertbl, (int)objptr, (int)dstptr);
2397 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2399 if(dstptr == NULL) {
2400 // still can not get the mapping info,
2401 // send msg to host core for the mapping info
2402 gcobj2map = (int)objptr;
2405 // the first time require the mapping, send msg to the hostcore
2406 // for the mapping info
2407 send_msg_3(hostc, GCMAPREQUEST, (int)objptr,
2408 BAMBOO_NUM_OF_CORE, false);
2414 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2415 #ifdef LOCALHASHTBL_TEST
2416 RuntimeHashget(gcpointertbl, objptr, &dstptr);
2418 dstptr = mgchashSearch(gcpointertbl, objptr);
2420 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2421 } // if(dstptr == NULL)
2422 } // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
2424 BAMBOO_DEBUGPRINT_REG(dstptr);
2426 } // if(NULL == dstptr)
2427 } // if(ISSHAREDOBJ(objptr))
2428 // if not a shared obj, return NULL to indicate no need to flush
2430 BAMBOO_DEBUGPRINT(0xe404);
2433 } // void flushObj(void * objptr)
2435 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2437 // flush current stack
2438 while(stackptr!=NULL) {
2439 for(i=0; i<stackptr->size; i++) {
2440 if(stackptr->array[i] != NULL) {
2441 void * dst = flushObj(stackptr->array[i]);
2443 stackptr->array[i] = dst;
2447 stackptr=stackptr->next;
2451 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2452 for(i=0; i<NUMCLASSES; i++) {
2453 struct parameterwrapper ** queues =
2454 objectqueues[BAMBOO_NUM_OF_CORE][i];
2455 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2456 for(j = 0; j < length; ++j) {
2457 struct parameterwrapper * parameter = queues[j];
2458 struct ObjectHash * set=parameter->objectset;
2459 struct ObjectNode * ptr=set->listhead;
2461 void * dst = flushObj((void *)ptr->key);
2467 ObjectHashrehash(set);
2472 // flush current task descriptor
2473 if(currtpd != NULL) {
2474 for(i=0; i<currtpd->numParameters; i++) {
2475 void * dst = flushObj(currtpd->parameterArray[i]);
2477 currtpd->parameterArray[i] = dst;
2482 // flush active tasks
2483 if(activetasks != NULL) {
2484 struct genpointerlist * ptr=activetasks->list;
2486 struct taskparamdescriptor *tpd=ptr->src;
2488 for(i=0; i<tpd->numParameters; i++) {
2489 void * dst = flushObj(tpd->parameterArray[i]);
2491 tpd->parameterArray[i] = dst;
2496 genrehash(activetasks);
2499 // flush cached transferred obj
2500 struct QueueItem * tmpobjptr = getHead(&objqueue);
2501 while(tmpobjptr != NULL) {
2502 struct transObjInfo * objInfo =
2503 (struct transObjInfo *)(tmpobjptr->objectptr);
2504 void * dst = flushObj(objInfo->objptr);
2506 objInfo->objptr = dst;
2508 tmpobjptr = getNextQueueItem(tmpobjptr);
2511 // flush cached objs to be transferred
2512 struct QueueItem * item = getHead(totransobjqueue);
2513 while(item != NULL) {
2514 struct transObjInfo * totransobj =
2515 (struct transObjInfo *)(item->objectptr);
2516 void * dst = flushObj(totransobj->objptr);
2518 totransobj->objptr = dst;
2520 item = getNextQueueItem(item);
2521 } // while(item != NULL)
2523 // enqueue lock related info
2524 for(i = 0; i < runtime_locklen; ++i) {
2525 void * dst = flushObj(runtime_locks[i].redirectlock);
2527 runtime_locks[i].redirectlock = (int)dst;
2529 if(runtime_locks[i].value != NULL) {
2530 void * dst=flushObj(runtime_locks[i].value);
2532 runtime_locks[i].value = (int)dst;
2537 } // void flushRuntimeObj(struct garbagelist * stackptr)
2539 inline void transmappinginfo() {
2540 // broadcast the sharedptbl pointer
2541 for(int i = 0; i < NUMCORESACTIVE; i++) {
2542 if(i != BAMBOO_NUM_OF_CORE) {
2543 send_msg_3(i, GCMAPTBL, gcsharedptbl, BAMBOO_NUM_OF_CORE, false);
2547 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
2548 send_msg_2(STARTUPCORE, GCFINISHMAPINFO, BAMBOO_NUM_OF_CORE, false);
2552 inline void flush(struct garbagelist * stackptr) {
2554 flushRuntimeObj(stackptr);
2557 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2558 bool hasItems = gc_moreItems_I();
2559 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2565 BAMBOO_DEBUGPRINT(0xe301);
2567 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2568 void * ptr = gc_dequeue_I();
2569 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2570 if(ISSHAREDOBJ(ptr)) {
2571 // should be a local shared obj and should have mapping info
2572 ptr = flushObj(ptr);
2574 BAMBOO_DEBUGPRINT(0xe302);
2575 BAMBOO_DEBUGPRINT_REG(ptr);
2576 BAMBOO_DEBUGPRINT_REG(tptr);
2577 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2580 BAMBOO_EXIT(0xb105);
2582 } // if(ISSHAREDOBJ(ptr))
2583 if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
2584 int type = ((int *)(ptr))[0];
2585 // scan all pointers in ptr
2586 unsigned INTPTR * pointer;
2587 pointer=pointerarray[type];
2589 BAMBOO_DEBUGPRINT(0xe303);
2590 BAMBOO_DEBUGPRINT_REG(pointer);
2593 /* Array of primitives */
2595 } else if (((INTPTR)pointer)==1) {
2597 BAMBOO_DEBUGPRINT(0xe304);
2599 /* Array of pointers */
2600 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2601 int length=ao->___length___;
2603 for(j=0; j<length; j++) {
2605 BAMBOO_DEBUGPRINT(0xe305);
2608 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2610 BAMBOO_DEBUGPRINT_REG(objptr);
2612 if(objptr != NULL) {
2613 void * dst = flushObj(objptr);
2615 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2621 BAMBOO_DEBUGPRINT(0xe306);
2623 INTPTR size=pointer[0];
2625 for(i=1; i<=size; i++) {
2627 BAMBOO_DEBUGPRINT(0xe307);
2629 unsigned int offset=pointer[i];
2630 void * objptr=*((void **)(((char *)ptr)+offset));
2632 BAMBOO_DEBUGPRINT_REG(objptr);
2634 if(objptr != NULL) {
2635 void * dst = flushObj(objptr);
2637 *((void **)(((char *)ptr)+offset)) = dst;
2640 } // for(i=1; i<=size; i++)
2641 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2642 // restore the mark field, indicating that this obj has been flushed
2643 if(ISSHAREDOBJ(ptr)) {
2644 ((int *)(ptr))[6] = INIT;
2646 } // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
2647 } // while(gc_moreItems())
2649 BAMBOO_DEBUGPRINT(0xe308);
2652 // TODO bug here: the startup core contains all lobjs' info, thus all the
2653 // lobjs are flushed in sequence.
2655 while(gc_lobjmoreItems_I()) {
2657 BAMBOO_DEBUGPRINT(0xe309);
2659 void * ptr = gc_lobjdequeue_I(NULL, NULL);
2660 ptr = flushObj(ptr);
2662 BAMBOO_DEBUGPRINT(0xe30a);
2663 BAMBOO_DEBUGPRINT_REG(ptr);
2664 BAMBOO_DEBUGPRINT_REG(tptr);
2665 BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2668 BAMBOO_EXIT(0xb106);
2670 if(((int *)(ptr))[6] == COMPACTED) {
2671 int type = ((int *)(ptr))[0];
2672 // scan all pointers in ptr
2673 unsigned INTPTR * pointer;
2674 pointer=pointerarray[type];
2676 BAMBOO_DEBUGPRINT(0xe30b);
2677 BAMBOO_DEBUGPRINT_REG(pointer);
2680 /* Array of primitives */
2682 } else if (((INTPTR)pointer)==1) {
2684 BAMBOO_DEBUGPRINT(0xe30c);
2686 /* Array of pointers */
2687 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2688 int length=ao->___length___;
2690 for(j=0; j<length; j++) {
2692 BAMBOO_DEBUGPRINT(0xe30d);
2695 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2697 BAMBOO_DEBUGPRINT_REG(objptr);
2699 if(objptr != NULL) {
2700 void * dst = flushObj(objptr);
2702 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2708 BAMBOO_DEBUGPRINT(0xe30e);
2710 INTPTR size=pointer[0];
2712 for(i=1; i<=size; i++) {
2714 BAMBOO_DEBUGPRINT(0xe30f);
2716 unsigned int offset=pointer[i];
2717 void * objptr=*((void **)(((char *)ptr)+offset));
2720 BAMBOO_DEBUGPRINT_REG(objptr);
2722 if(objptr != NULL) {
2723 void * dst = flushObj(objptr);
2725 *((void **)(((char *)ptr)+offset)) = dst;
2728 } // for(i=1; i<=size; i++)
2729 } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2730 // restore the mark field, indicating that this obj has been flushed
2731 ((int *)(ptr))[6] = INIT;
2732 } // if(((int *)(ptr))[6] == COMPACTED)
2733 } // while(gc_lobjmoreItems())
2735 BAMBOO_DEBUGPRINT(0xe310);
2738 // send flush finish message to core coordinator
2739 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2740 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2742 send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2745 BAMBOO_DEBUGPRINT(0xe311);
2749 #ifdef GC_CACHE_ADAPT
2750 // prepare for cache adaption:
2751 // -- flush the shared heap
2752 // -- clean dtlb entries
2753 // -- change cache strategy
2754 void cacheAdapt_gc(bool isgccachestage) {
2755 // flush the shared heap
2756 BAMBOO_CACHE_FLUSH_L2();
2758 // clean the dtlb entries
2759 BAMBOO_CLEAN_DTLB();
2761 // change the cache strategy
2762 gccachestage = isgccachestage;
2763 } // cacheAdapt_gc(bool isgccachestage)
2765 // the master core decides how to adapt cache strategy for the mutator
2766 // according to collected statistic data
2768 // make all pages hfh
2769 int cacheAdapt_policy_h4h(){
2770 unsigned int page_index = 0;
2772 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2774 int * tmp_p = gccachepolicytbl+1;
2775 for(page_index = 0; page_index < page_num; page_index++) {
2776 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2777 bamboo_cache_policy_t policy = {0};
2778 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2779 *tmp_p = page_index;
2781 *tmp_p = policy.word;
2787 } // int cacheAdapt_policy_hfh()
2789 // make all pages local as non-cache-adaptable gc local mode
2790 int cacheAdapt_policy_local(){
2791 unsigned int page_index = 0;
2793 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2795 int * tmp_p = gccachepolicytbl+1;
2796 for(page_index = 0; page_index < page_num; page_index++) {
2797 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2798 bamboo_cache_policy_t policy = {0};
2800 BLOCKINDEX(page_sva, &block);
2801 int coren = gc_block2core[block%(NUMCORES4GC*2)];
2802 // locally cache the page in the hotest core
2803 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2804 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2805 policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
2806 policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
2807 *tmp_p = page_index;
2809 *tmp_p = policy.word;
2815 } // int cacheAdapt_policy_local()
2817 int cacheAdapt_policy_hotest(){
2818 unsigned int page_index = 0;
2820 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2822 int * tmp_p = gccachepolicytbl+1;
2823 for(page_index = 0; page_index < page_num; page_index++) {
2824 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2825 bamboo_cache_policy_t policy = {0};
2827 unsigned int hotfreq = 0;
2829 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2830 for(int i = 0; i < NUMCORESACTIVE; i++) {
2831 unsigned int freq = *local_tbl;
2832 local_tbl=(unsigned int *)(
2833 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2835 // check the freqency, decide if this page is hot for the core
2836 if(hotfreq < freq) {
2842 // Decide the cache strategy for this page
2843 // If decide to adapt a new cache strategy, write into the shared block of
2844 // the gcsharedsamplingtbl. The mem recording information that has been
2845 // written is enough to hold the information.
2846 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2848 // this page has not been accessed, do not change its cache policy
2851 // locally cache the page in the hotest core
2852 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2853 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2854 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2855 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2856 *tmp_p = page_index;
2858 *tmp_p = policy.word;
2865 } // int cacheAdapt_policy_hotest()
2867 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50
2868 // cache the page on the core that accesses it the most if that core accesses
2869 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
2871 int cacheAdapt_policy_dominate(){
2872 unsigned int page_index = 0;
2874 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2876 int * tmp_p = gccachepolicytbl+1;
2877 for(page_index = 0; page_index < page_num; page_index++) {
2878 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2879 bamboo_cache_policy_t policy = {0};
2881 unsigned long long totalfreq = 0;
2882 unsigned int hotfreq = 0;
2884 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2885 for(int i = 0; i < NUMCORESACTIVE; i++) {
2886 unsigned int freq = *local_tbl;
2887 local_tbl=(unsigned int *)(
2888 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2890 // check the freqency, decide if this page is hot for the core
2891 if(hotfreq < freq) {
2897 // Decide the cache strategy for this page
2898 // If decide to adapt a new cache strategy, write into the shared block of
2900 // Format: page start va + cache policy
2902 // this page has not been accessed, do not change its cache policy
2906 (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
2907 hotfreq/=BAMBOO_PAGE_SIZE;
2908 if(hotfreq < totalfreq) {
2910 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2912 // locally cache the page in the hotest core
2913 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2914 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2915 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2916 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2918 *tmp_p = page_index;
2920 *tmp_p = policy.word;
2926 } // int cacheAdapt_policy_dominate()
2928 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
2930 void gc_quicksort(unsigned long long *array,
2936 int rightIdx = right;
2937 if((right-left+1) >= 1) {
2938 pivot = (left+right)/2;
2939 while((leftIdx <= pivot) && (rightIdx >= pivot)) {
2940 int pivotValue = array[pivot*3-offset];
2941 while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
2944 while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
2947 // swap [leftIdx] & [rightIdx]
2948 for(int k = 0; k < 3; k++) {
2949 unsigned long long tmp = array[3*rightIdx-k];
2950 array[3*rightIdx-k] = array[3*leftIdx-k];
2951 array[3*leftIdx-k] = tmp;
2955 if((leftIdx-1) == pivot) {
2956 pivot = rightIdx = rightIdx + 1;
2957 } else if((leftIdx+1) == pivot) {
2958 pivot = leftIdx = leftIdx-1;
2961 gc_quicksort(array, left, pivot-1, offset);
2962 gc_quicksort(array, pivot+1, right, offset);
2965 } // void gc_quicksort(...)
2967 // Every page cached on the core that accesses it the most.
2968 // Check to see if any core's pages total more accesses than threshold
2969 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
2970 // most remote accesses and hash for home them until we get below
2971 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
2972 int cacheAdapt_policy_overload(){
2973 unsigned int page_index = 0;
2975 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2977 int * tmp_p = gccachepolicytbl+1;
2978 unsigned long long workload[NUMCORESACTIVE];
2979 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
2980 unsigned long long total_workload = 0;
2981 unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
2982 memset(core2heavypages,0,
2983 sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
2984 for(page_index = 0; page_index < page_num; page_index++) {
2985 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2986 bamboo_cache_policy_t policy = {0};
2988 unsigned long long totalfreq = 0;
2989 unsigned int hotfreq = 0;
2991 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2992 for(int i = 0; i < NUMCORESACTIVE; i++) {
2993 unsigned int freq = *local_tbl;
2994 local_tbl=(unsigned int *)(
2995 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2997 // check the freqency, decide if this page is hot for the core
2998 if(hotfreq < freq) {
3003 // Decide the cache strategy for this page
3004 // If decide to adapt a new cache strategy, write into the shared block of
3005 // the gcsharedsamplingtbl. The mem recording information that has been
3006 // written is enough to hold the information.
3007 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3009 // this page has not been accessed, do not change its cache policy
3013 totalfreq/=BAMBOO_PAGE_SIZE;
3014 hotfreq/=BAMBOO_PAGE_SIZE;
3015 // locally cache the page in the hotest core
3016 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3017 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3018 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3019 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3020 *tmp_p = page_index;
3022 *tmp_p = policy.word;
3025 workload[hotestcore] += totalfreq;
3026 total_workload += totalfreq;
3027 // insert into core2heavypages using quicksort
3028 unsigned long long remoteaccess = totalfreq - hotfreq;
3029 int index = (int)core2heavypages[hotestcore][0];
3030 core2heavypages[hotestcore][3*index+3] = remoteaccess;
3031 core2heavypages[hotestcore][3*index+2] = totalfreq;
3032 core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
3033 core2heavypages[hotestcore][0]++;
3036 unsigned long long workload_threshold =
3037 total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
3038 // Check the workload of each core
3039 for(int i = 0; i < NUMCORESACTIVE; i++) {
3041 int index = (int)core2heavypages[i][0];
3042 if(workload[i] > workload_threshold) {
3043 // sort according to the remoteaccess
3044 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3045 while((workload[i] > workload_threshold) && (j<index*3)) {
3046 // hfh those pages with more remote accesses
3047 bamboo_cache_policy_t policy = {0};
3048 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3049 *((int*)core2heavypages[i][j]) = policy.word;
3050 workload[i] -= core2heavypages[i][j+1];
3057 } // int cacheAdapt_policy_overload()
3059 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
3060 #define GC_CACHE_ADAPT_CROWD_THRESHOLD 20
3061 // Every page cached on the core that accesses it the most.
3062 // Check to see if any core's pages total more accesses than threshold
3063 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
3064 // most remote accesses and hash for home them until we get below
3065 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
3066 // Sort pages based on activity....
3067 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
3068 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
3069 // then start hfh these pages(selecting the ones with the most remote
3070 // accesses first or fewest local accesses) until we get below
3071 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
3072 int cacheAdapt_policy_crowd(){
3073 unsigned int page_index = 0;
3075 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3077 int * tmp_p = gccachepolicytbl+1;
3078 unsigned long long workload[NUMCORESACTIVE];
3079 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3080 unsigned long long total_workload = 0;
3081 unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
3082 memset(core2heavypages,0,
3083 sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
3084 for(page_index = 0; page_index < page_num; page_index++) {
3085 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3086 bamboo_cache_policy_t policy = {0};
3088 unsigned long long totalfreq = 0;
3089 unsigned int hotfreq = 0;
3091 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
3092 for(int i = 0; i < NUMCORESACTIVE; i++) {
3093 unsigned int freq = *local_tbl;
3094 local_tbl=(unsigned int *)(
3095 ((char *)local_tbl)+size_cachesamplingtbl_local_r);
3097 // check the freqency, decide if this page is hot for the core
3098 if(hotfreq < freq) {
3103 // Decide the cache strategy for this page
3104 // If decide to adapt a new cache strategy, write into the shared block of
3105 // the gcsharedsamplingtbl. The mem recording information that has been
3106 // written is enough to hold the information.
3107 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3109 // this page has not been accessed, do not change its cache policy
3112 totalfreq/=BAMBOO_PAGE_SIZE;
3113 hotfreq/=BAMBOO_PAGE_SIZE;
3114 // locally cache the page in the hotest core
3115 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3116 policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3117 policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3118 policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3119 *tmp_p = page_index;
3121 *tmp_p = policy.word;
3124 workload[hotestcore] += totalfreq;
3125 total_workload += totalfreq;
3126 // insert into core2heavypages using quicksort
3127 unsigned long long remoteaccess = totalfreq - hotfreq;
3128 int index = (int)core2heavypages[hotestcore][0];
3129 core2heavypages[hotestcore][3*index+3] = remoteaccess;
3130 core2heavypages[hotestcore][3*index+2] = totalfreq;
3131 core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
3132 core2heavypages[hotestcore][0]++;
3135 unsigned long long workload_threshold =
3136 total_workload / GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
3137 // Check the workload of each core
3138 for(int i = 0; i < NUMCORESACTIVE; i++) {
3140 int index = (int)core2heavypages[i][0];
3141 if(workload[i] > workload_threshold) {
3142 // sort according to the remoteaccess
3143 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3144 while((workload[i] > workload_threshold) && (j<index*3)) {
3145 // hfh those pages with more remote accesses
3146 bamboo_cache_policy_t policy = {0};
3147 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3148 *((int*)core2heavypages[i][j]) = policy.word;
3149 workload[i] -= core2heavypages[i][j+1];
3154 // Check if the accesses are crowded on few pages
3155 // sort according to the total access
3157 gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
3158 unsigned long long threshold =
3159 GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3160 int num_crowded = 0;
3161 unsigned long long t_workload = 0;
3163 t_workload += core2heavypages[i][j+num_crowded*3+1];
3165 } while(t_workload < threshold);
3166 // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
3167 // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
3168 if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
3169 // need to hfh these pages
3170 // sort the pages according to remote access
3171 gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
3172 // h4h those pages with more remote accesses
3173 bamboo_cache_policy_t policy = {0};
3174 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3175 *((int*)core2heavypages[i][j]) = policy.word;
3176 workload[i] -= core2heavypages[i][j+1];
3177 t_workload -= core2heavypages[i][j+1];
3179 threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3185 } // int cacheAdapt_policy_overload()
3187 void cacheAdapt_master() {
3188 #ifdef GC_CACHE_ADAPT_SAMPLING_OUTPUT
3189 gc_output_cache_sampling_r();
3190 #endif // GC_CACHE_ADAPT_SAMPLING_OUTPUT
3192 // check the statistic data
3193 // for each page, decide the new cache strategy
3194 #ifdef GC_CACHE_ADAPT_POLICY1
3195 numchanged = cacheAdapt_policy_h4h();
3196 #elif defined GC_CACHE_ADAPT_POLICY2
3197 numchanged = cacheAdapt_policy_local();
3198 #elif defined GC_CACHE_ADAPT_POLICY3
3199 numchanged = cacheAdapt_policy_hotest();
3200 #elif defined GC_CACHE_ADAPT_POLICY4
3201 numchanged = cacheAdapt_policy_dominate();
3202 #elif defined GC_CACHE_ADAPT_POLICY5
3203 numchanged = cacheAdapt_policy_overload();
3204 #elif defined GC_CACHE_ADAPT_POLICY6
3205 numchanged = cacheAdapt_policy_crowd();
3207 *gccachepolicytbl = numchanged;
3210 // adapt the cache strategy for the mutator
3211 void cacheAdapt_mutator() {
3212 int numchanged = *gccachepolicytbl;
3213 // check the changes and adapt them
3214 int * tmp_p = gccachepolicytbl+1;
3215 while(numchanged--) {
3216 // read out the policy
3217 int page_index = *tmp_p;
3218 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
3220 bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
3221 policy, BAMBOO_PAGE_SIZE);
3227 void gc_output_cache_sampling() {
3228 unsigned int page_index = 0;
3230 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3231 for(page_index = 0; page_index < page_num; page_index++) {
3232 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3234 BLOCKINDEX(page_sva, &block);
3235 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3236 tprintf("va: %x page_index: %d host: %d\n",
3237 (int)page_sva, page_index, coren);
3238 for(int i = 0; i < NUMCORESACTIVE; i++) {
3239 unsigned int * local_tbl = (unsigned int *)((void *)gccachesamplingtbl
3240 +size_cachesamplingtbl_local*i);
3241 unsigned int freq = local_tbl[page_index];
3242 printf("%8d ",freq);
3246 printf("=================\n");
3247 } // gc_output_cache_sampling
3249 void gc_output_cache_sampling_r() {
3250 unsigned int page_index = 0;
3252 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3253 for(page_index = 0; page_index < page_num; page_index++) {
3254 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3256 BLOCKINDEX(page_sva, &block);
3257 int coren = gc_block2core[block%(NUMCORES4GC*2)];
3258 tprintf("va: %x page_index: %d host: %d\n",
3259 (int)page_sva, page_index, coren);
3260 for(int i = 0; i < NUMCORESACTIVE; i++) {
3261 unsigned int * local_tbl = (unsigned int *)((void *)gccachesamplingtbl_r
3262 +size_cachesamplingtbl_local_r*i);
3263 unsigned int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3264 printf("%8d ",freq);
3268 printf("=================\n");
3269 } // gc_output_cache_sampling
3270 #endif // GC_CACHE_ADAPT
3272 inline void gc_collect(struct garbagelist * stackptr) {
3273 // inform the master that this core is at a gc safe point and is ready to
3275 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3276 self_numreceiveobjs, false);
3278 // core collector routine
3280 if(INITPHASE == gcphase) {
3284 #ifdef RAWPATH // TODO GC_DEBUG
3285 printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3288 #ifdef GC_CACHE_ADAPT
3289 // prepare for cache adaption:
3290 cacheAdapt_gc(true);
3291 #endif // GC_CACHE_ADAPT
3292 //send init finish msg to core coordinator
3293 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3296 if(MARKPHASE == gcphase) {
3300 #ifdef RAWPATH // TODO GC_DEBUG
3301 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3302 udn_tile_coord_y());
3304 mark(true, stackptr);
3305 #ifdef RAWPATH // TODO GC_DEBUG
3306 printf("(%x,%x) Finish mark phase, start compact phase\n",
3307 udn_tile_coord_x(), udn_tile_coord_y());
3310 #ifdef RAWPATH // TODO GC_DEBUG
3311 printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
3312 udn_tile_coord_y());
3316 if(MAPPHASE == gcphase) {
3320 #ifdef RAWPATH // TODO GC_DEBUG
3321 printf("(%x,%x) Start map phase\n", udn_tile_coord_x(),
3322 udn_tile_coord_y());
3325 #ifdef RAWPATH // TODO GC_DEBUG
3326 printf("(%x,%x) Finish map phase\n", udn_tile_coord_x(),
3327 udn_tile_coord_y());
3331 if(FLUSHPHASE == gcphase) {
3335 #ifdef RAWPATH // TODO GC_DEBUG
3336 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3337 udn_tile_coord_y());
3340 // send the num of obj/liveobj/forwardobj to the startupcore
3341 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3342 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3343 gc_num_liveobj, gc_num_forwardobj, false);
3346 #endif // GC_PROFLIE
3348 #ifdef RAWPATH // TODO GC_DEBUG
3349 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3350 udn_tile_coord_y());
3353 #ifdef GC_CACHE_ADAPT
3355 if(PREFINISHPHASE == gcphase) {
3359 #ifdef RAWPATH // TODO GC_DEBUG
3360 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3361 udn_tile_coord_y());
3363 // cache adapt phase
3364 cacheAdapt_mutator();
3365 cacheAdapt_gc(false);
3366 //send init finish msg to core coordinator
3367 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3368 #ifdef RAWPATH // TODO GC_DEBUG
3369 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3370 udn_tile_coord_y());
3372 #endif // GC_CACHE_ADAPT
3375 if(FINISHPHASE == gcphase) {
3379 #ifdef RAWPATH // TODO GC_DEBUG
3380 printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3382 } // void gc_collect(struct garbagelist * stackptr)
3384 inline void gc_nocollect(struct garbagelist * stackptr) {
3385 // inform the master that this core is at a gc safe point and is ready to
3387 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3388 self_numreceiveobjs, false);
3391 if(INITPHASE == gcphase) {
3395 #ifdef RAWPATH // TODO GC_DEBUG
3396 printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3399 #ifdef GC_CACHE_ADAPT
3400 // prepare for cache adaption:
3401 cacheAdapt_gc(true);
3402 #endif // GC_CACHE_ADAPT
3403 //send init finish msg to core coordinator
3404 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3407 if(MARKPHASE == gcphase) {
3411 #ifdef RAWPATH // TODO GC_DEBUG
3412 printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3413 udn_tile_coord_y());
3415 mark(true, stackptr);
3416 #ifdef RAWPATH // TODO GC_DEBUG
3417 printf("(%x,%x) Finish mark phase, wait for flush\n",
3418 udn_tile_coord_x(), udn_tile_coord_y());
3421 // non-gc core collector routine
3423 if(FLUSHPHASE == gcphase) {
3427 #ifdef RAWPATH // TODO GC_DEBUG
3428 printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3429 udn_tile_coord_y());
3432 if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3433 send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3434 gc_num_liveobj, gc_num_forwardobj, false);
3437 #endif // GC_PROFLIE
3439 #ifdef RAWPATH // TODO GC_DEBUG
3440 printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3441 udn_tile_coord_y());
3444 #ifdef GC_CACHE_ADAPT
3446 if(PREFINISHPHASE == gcphase) {
3450 #ifdef RAWPATH // TODO GC_DEBUG
3451 printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3452 udn_tile_coord_y());
3454 // cache adapt phase
3455 cacheAdapt_mutator();
3456 cacheAdapt_gc(false);
3457 //send init finish msg to core coordinator
3458 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3459 #ifdef RAWPATH // TODO GC_DEBUG
3460 printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3461 udn_tile_coord_y());
3463 #endif // GC_CACHE_ADAPT
3466 if(FINISHPHASE == gcphase) {
3470 #ifdef RAWPATH // TODO GC_DEBUG
3471 printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3473 } // void gc_collect(struct garbagelist * stackptr)
3475 inline void gc_master(struct garbagelist * stackptr) {
3477 gcphase = INITPHASE;
3479 waitconfirm = false;
3483 // Note: all cores need to init gc including non-gc cores
3484 for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3485 // send GC init messages to all cores
3486 send_msg_1(i, GCSTARTINIT, false);
3488 bool isfirst = true;
3489 bool allStall = false;
3491 #ifdef GC_CACHE_ADAPT
3492 // prepare for cache adaption:
3493 cacheAdapt_gc(true);
3494 #endif // GC_CACHE_ADAPT
3496 #ifdef RAWPATH // TODO GC_DEBUG
3497 printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3498 udn_tile_coord_y());
3501 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3503 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3504 if(gc_checkAllCoreStatus_I()) {
3505 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3508 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3513 #ifdef GC_CACHE_ADAPT_POLICY_OUTPUT
3514 gc_output_cache_sampling();
3515 #endif // GC_CACHE_ADAPT
3516 #ifdef RAWPATH // TODO GC_DEBUG
3517 printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3518 udn_tile_coord_y());
3520 // all cores have finished compacting
3521 // restore the gcstatus of all cores
3522 // Note: all cores have to do mark including non-gc cores
3523 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3524 for(i = 1; i < NUMCORESACTIVE; ++i) {
3525 gccorestatus[i] = 1;
3526 // send GC start messages to all cores
3527 send_msg_1(i, GCSTART, false);
3530 gcphase = MARKPHASE;
3532 while(MARKPHASE == gcphase) {
3533 mark(isfirst, stackptr);
3540 } // while(MARKPHASE == gcphase)
3541 // send msgs to all cores requiring large objs info
3542 // Note: only need to ask gc cores, non-gc cores do not host any objs
3543 numconfirm = NUMCORES4GC - 1;
3544 for(i = 1; i < NUMCORES4GC; ++i) {
3545 send_msg_1(i, GCLOBJREQUEST, false);
3547 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3552 } // wait for responses
3553 // check the heaptop
3554 if(gcheaptop < gcmarkedptrbound) {
3555 gcheaptop = gcmarkedptrbound;
3560 #ifdef RAWPATH // TODO GC_DEBUG
3561 printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3562 udn_tile_coord_y());
3564 // cache all large objs
3566 // no enough space to cache large objs
3567 BAMBOO_EXIT(0xb107);
3569 // predict number of blocks to fill for each core
3571 int numpbc = loadbalance(&tmpheaptop);
3573 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3574 #ifdef RAWPATH // TODO GC_DEBUG
3575 printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3576 udn_tile_coord_y());
3578 //int tmptopptr = 0;
3579 //BASEPTR(gctopcore, 0, &tmptopptr);
3581 //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3582 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3584 BAMBOO_DEBUGPRINT(0xabab);
3585 BAMBOO_DEBUGPRINT_REG(tmptopptr);
3587 for(i = 0; i < NUMCORES4GC; ++i) {
3589 BASEPTR(i, numpbc, &tmpcoreptr);
3590 //send start compact messages to all cores
3591 //TODO bug here, do not know if the direction is positive or negtive?
3592 if (tmpcoreptr < tmpheaptop) {
3593 gcstopblock[i] = numpbc + 1;
3594 if(i != STARTUPCORE) {
3595 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3597 gcblock2fill = numpbc+1;
3598 } // if(i != STARTUPCORE)
3600 gcstopblock[i] = numpbc;
3601 if(i != STARTUPCORE) {
3602 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3604 gcblock2fill = numpbc;
3605 } // if(i != STARTUPCORE)
3608 BAMBOO_DEBUGPRINT(0xf000+i);
3609 BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3610 BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3612 // init some data strutures for compact phase
3614 gcfilledblocks[i] = 0;
3615 gcrequiredmems[i] = 0;
3625 bool finalcompact = false;
3626 // initialize pointers for comapcting
3627 struct moveHelper * orig =
3628 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3629 struct moveHelper * to =
3630 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3631 initOrig_Dst(orig, to);
3632 int filledblocks = 0;
3633 INTPTR heaptopptr = 0;
3634 bool finishcompact = false;
3635 bool iscontinue = true;
3636 bool localcompact = true;
3637 while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3638 if((!finishcompact) && iscontinue) {
3640 BAMBOO_DEBUGPRINT(0xe001);
3641 BAMBOO_DEBUGPRINT_REG(numpbc);
3642 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3644 finishcompact = compacthelper(orig, to, &filledblocks,
3645 &heaptopptr, &localcompact);
3647 BAMBOO_DEBUGPRINT(0xe002);
3648 BAMBOO_DEBUGPRINT_REG(finishcompact);
3649 BAMBOO_DEBUGPRINT_REG(gctomove);
3650 BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3651 BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3652 BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3656 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3657 if(gc_checkCoreStatus_I()) {
3658 // all cores have finished compacting
3659 // restore the gcstatus of all cores
3660 for(i = 0; i < NUMCORES4GC; ++i) {
3661 gccorestatus[i] = 1;
3663 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3666 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3667 // check if there are spare mem for pending move requires
3668 if(COMPACTPHASE == gcphase) {
3670 BAMBOO_DEBUGPRINT(0xe003);
3672 resolvePendingMoveRequest();
3674 BAMBOO_DEBUGPRINT_REG(gctomove);
3678 BAMBOO_DEBUGPRINT(0xe004);
3682 } // if(gc_checkCoreStatus_I()) else ...
3686 BAMBOO_DEBUGPRINT(0xe005);
3687 BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3688 BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3689 BAMBOO_DEBUGPRINT_REG(gctomove);
3691 to->ptr = gcmovestartaddr;
3692 to->numblocks = gcblock2fill - 1;
3693 to->bound = (to->numblocks==0) ?
3694 BAMBOO_SMEM_SIZE_L :
3695 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3696 BASEPTR(gcdstcore, to->numblocks, &(to->base));
3697 to->offset = to->ptr - to->base;
3698 to->top = (to->numblocks==0) ?
3699 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3701 to->offset = BAMBOO_CACHE_LINE_SIZE;
3702 to->ptr += to->offset; // for header
3703 to->top += to->offset;
3704 if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3705 localcompact = true;
3707 localcompact = false;
3711 } else if(!finishcompact) {
3715 } // while(COMPACTPHASE == gcphase)
3719 #ifdef RAWPATH // TODO GC_DEBUG
3720 printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3721 udn_tile_coord_y());
3725 #ifdef RAWPATH // TODO GC_DEBUG
3726 printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
3727 udn_tile_coord_y());
3734 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3735 // Note: all cores should flush their runtime data including non-gc
3737 for(i = 1; i < NUMCORES4GC; ++i) {
3738 // send start flush messages to all cores
3739 gccorestatus[i] = 1;
3740 send_msg_1(i, GCSTARTMAPINFO, false);
3745 #ifdef RAWPATH // TODO GC_DEBUG
3746 printf("(%x,%x) Start map phase \n", udn_tile_coord_x(),
3747 udn_tile_coord_y());
3751 #ifdef RAWPATH // TODO GC_DEBUG
3752 printf("(%x,%x) Finish map phase \n", udn_tile_coord_x(),
3753 udn_tile_coord_y());
3755 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3756 while(MAPPHASE == gcphase) {
3757 // check the status of all cores
3758 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3759 if(gc_checkCoreStatus_I()) {
3760 // all cores have finished sending mapping info
3761 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3764 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3765 } // while(MAPPHASE == gcphase)
3767 gcphase = FLUSHPHASE;
3768 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3769 // Note: all cores should flush their runtime data including non-gc
3771 for(i = 1; i < NUMCORESACTIVE; ++i) {
3772 // send start flush messages to all cores
3773 gccorestatus[i] = 1;
3774 send_msg_1(i, GCSTARTFLUSH, false);
3779 #ifdef RAWPATH // TODO GC_DEBUG
3780 printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
3781 udn_tile_coord_y());
3786 #ifdef GC_CACHE_ADAPT
3787 // now the master core need to decide the new cache strategy
3788 cacheAdapt_master();
3789 #endif // GC_CACHE_ADAPT
3791 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3792 while(FLUSHPHASE == gcphase) {
3793 // check the status of all cores
3794 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3795 if(gc_checkAllCoreStatus_I()) {
3796 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3799 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3800 } // while(FLUSHPHASE == gcphase)
3801 #ifdef RAWPATH // TODO GC_DEBUG
3802 printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
3803 udn_tile_coord_y());
3806 #ifdef GC_CACHE_ADAPT
3810 gcphase = PREFINISHPHASE;
3811 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3812 // Note: all cores should flush their runtime data including non-gc
3814 for(i = 1; i < NUMCORESACTIVE; ++i) {
3815 // send start flush messages to all cores
3816 gccorestatus[i] = 1;
3817 send_msg_1(i, GCSTARTPREF, false);
3819 #ifdef RAWPATH // TODO GC_DEBUG
3820 printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
3821 udn_tile_coord_y());
3823 // cache adapt phase
3824 cacheAdapt_mutator();
3825 #ifdef GC_CACHE_ADAPT_OUTPUT
3826 bamboo_output_cache_policy();
3828 cacheAdapt_gc(false);
3829 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3830 while(PREFINISHPHASE == gcphase) {
3831 // check the status of all cores
3832 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3833 if(gc_checkAllCoreStatus_I()) {
3834 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3837 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3838 } // while(PREFINISHPHASE == gcphase)
3839 #endif // GC_CACHE_ADAPT
3841 gcphase = FINISHPHASE;
3843 // invalidate all shared mem pointers
3844 // put it here as it takes time to inform all the other cores to
3845 // finish gc and it might cause problem when some core resumes
3846 // mutator earlier than the other cores
3847 bamboo_cur_msp = NULL;
3848 bamboo_smem_size = 0;
3849 bamboo_smem_zero_top = NULL;
3851 gcprocessing = false;
3856 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3857 for(i = 1; i < NUMCORESACTIVE; ++i) {
3858 // send gc finish messages to all cores
3859 send_msg_1(i, GCFINISH, false);
3860 gccorestatus[i] = 1;
3862 #ifdef RAWPATH // TODO GC_DEBUG
3863 printf("(%x,%x) gc finished \n", udn_tile_coord_x(),
3864 udn_tile_coord_y());
3866 } // void gc_master(struct garbagelist * stackptr)
3868 inline bool gc(struct garbagelist * stackptr) {
3871 gcprocessing = false;
3875 #ifdef GC_CACHE_ADAPT
3876 #ifdef GC_CACHE_SAMPLING
3877 // disable the timer interrupt
3878 bamboo_mask_timer_intr();
3881 // core coordinator routine
3882 if(0 == BAMBOO_NUM_OF_CORE) {
3884 printf("(%x,%X) Check if can do gc or not\n", udn_tile_coord_x(),
3885 udn_tile_coord_y());
3887 bool isallstall = true;
3888 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3889 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3891 for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
3892 if(gccorestatus[ti] != 0) {
3898 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3899 // some of the cores are still executing the mutator and did not reach
3900 // some gc safe point, therefore it is not ready to do gc
3901 // in case that there are some pregc information msg lost, send a confirm
3902 // msg to the 'busy' core
3903 send_msg_1(ti, GCSTARTPRE, false);
3911 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
3912 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
3915 BAMBOO_DEBUGPRINT(0xec04);
3917 for(int i = 0; i < NUMCORESACTIVE; ++i) {
3918 sumsendobj += gcnumsendobjs[0][i];
3920 BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
3922 } // for(i = 1; i < NUMCORESACTIVE; ++i)
3924 BAMBOO_DEBUGPRINT(0xec05);
3925 BAMBOO_DEBUGPRINT_REG(sumsendobj);
3927 for(int i = 0; i < NUMCORESACTIVE; ++i) {
3928 sumsendobj -= gcnumreceiveobjs[0][i];
3930 BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
3932 } // for(i = 1; i < NUMCORESACTIVE; ++i)
3934 BAMBOO_DEBUGPRINT(0xec06);
3935 BAMBOO_DEBUGPRINT_REG(sumsendobj);
3937 if(0 != sumsendobj) {
3938 // there were still some msgs on the fly, wait until there
3939 // are some update pregc information coming and check it again
3941 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3949 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3952 #ifdef RAWPATH // TODO GC_DEBUG
3953 printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3955 // Zero out the remaining bamboo_cur_msp
3956 // Only zero out the first 4 bytes of the remaining memory
3957 // Move the operation here because for the GC_CACHE_ADAPT version,
3958 // we need to make sure during the gcinit phase the shared heap is not
3959 // touched. Otherwise, there would be problem when adapt the cache
3961 if((bamboo_cur_msp != 0)
3962 && (bamboo_smem_zero_top == bamboo_cur_msp)
3963 && (bamboo_smem_size > 0)) {
3964 *((int *)bamboo_cur_msp) = 0;
3966 #ifdef GC_FLUSH_DTLB
3967 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3968 BAMBOO_CLEAN_DTLB();
3969 gc_num_flush_dtlb++;
3972 #ifdef GC_CACHE_ADAPT
3973 #ifdef GC_CACHE_SAMPLING
3974 // get the sampling data
3975 bamboo_output_dtlb_sampling();
3976 #endif // GC_CACHE_SAMPLING
3977 #endif // GC_CACHE_ADAPT
3978 gcprocessing = true;
3979 gc_master(stackptr);
3980 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
3981 // Zero out the remaining bamboo_cur_msp
3982 // Only zero out the first 4 bytes of the remaining memory
3983 // Move the operation here because for the GC_CACHE_ADAPT version,
3984 // we need to make sure during the gcinit phase the shared heap is not
3985 // touched. Otherwise, there would be problem when adapt the cache
3987 if((bamboo_cur_msp != 0)
3988 && (bamboo_smem_zero_top == bamboo_cur_msp)
3989 && (bamboo_smem_size > 0)) {
3990 *((int *)bamboo_cur_msp) = 0;
3992 #ifdef GC_FLUSH_DTLB
3993 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3994 BAMBOO_CLEAN_DTLB();
3995 gc_num_flush_dtlb++;
3998 #ifdef GC_CACHE_ADAPT
3999 #ifdef GC_CACHE_SAMPLING
4000 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4001 // get the sampling data
4002 bamboo_output_dtlb_sampling();
4004 #endif // GC_CACHE_SAMPLING
4005 #endif // GC_CACHE_ADAPT
4006 gcprocessing = true;
4007 gc_collect(stackptr);
4009 // invalidate all shared mem pointers
4010 bamboo_cur_msp = NULL;
4011 bamboo_smem_size = 0;
4012 bamboo_smem_zero_top = NULL;
4014 gcprocessing = false;
4016 // Zero out the remaining bamboo_cur_msp
4017 // Only zero out the first 4 bytes of the remaining memory
4018 // Move the operation here because for the GC_CACHE_ADAPT version,
4019 // we need to make sure during the gcinit phase the shared heap is not
4020 // touched. Otherwise, there would be problem when adapt the cache
4022 if((bamboo_cur_msp != 0)
4023 && (bamboo_smem_zero_top == bamboo_cur_msp)
4024 && (bamboo_smem_size > 0)) {
4025 *((int *)bamboo_cur_msp) = 0;
4027 #ifdef GC_FLUSH_DTLB
4028 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4029 BAMBOO_CLEAN_DTLB();
4030 gc_num_flush_dtlb++;
4033 #ifdef GC_CACHE_ADAPT
4034 #ifdef GC_CACHE_SAMPLING
4035 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4036 // get the sampling data
4037 bamboo_output_dtlb_sampling();
4039 #endif // GC_CACHE_SAMPLING
4040 #endif // GC_CACHE_ADAPT
4041 // not a gc core, should wait for gcfinish msg
4042 gcprocessing = true;
4043 gc_nocollect(stackptr);
4045 // invalidate all shared mem pointers
4046 bamboo_cur_msp = NULL;
4047 bamboo_smem_size = 0;
4048 bamboo_smem_zero_top = NULL;
4050 gcprocessing = false;
4052 #ifdef GC_CACHE_ADAPT
4053 #ifdef GC_CACHE_SAMPLING
4054 // reset the sampling arrays
4055 bamboo_dtlb_sampling_reset();
4056 #endif // GC_CACHE_SAMPLING
4057 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4058 // zero out the gccachesamplingtbl
4059 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
4060 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
4061 size_cachesamplingtbl_local_r);
4062 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
4063 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
4066 #ifdef GC_CACHE_SAMPLING
4067 // enable the timer interrupt
4068 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
4069 bamboo_unmask_timer_intr();
4070 #endif // GC_CACHE_SAMPLING
4071 #endif // GC_CACHE_ADAPT
4073 } // void gc(struct garbagelist * stackptr)
4076 inline void gc_profileStart(void) {
4077 if(!gc_infoOverflow) {
4078 GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
4079 gc_infoArray[gc_infoIndex] = gcInfo;
4081 gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
4085 inline void gc_profileItem(void) {
4086 if(!gc_infoOverflow) {
4087 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4088 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4092 inline void gc_profileEnd(void) {
4093 if(!gc_infoOverflow) {
4094 GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4095 gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4096 gcInfo->time[gcInfo->index++] = gc_num_livespace;
4097 gcInfo->time[gcInfo->index++] = gc_num_freespace;
4098 gcInfo->time[gcInfo->index++] = gc_num_lobj;
4099 gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
4100 gcInfo->time[gcInfo->index++] = gc_num_obj;
4101 gcInfo->time[gcInfo->index++] = gc_num_liveobj;
4102 gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
4104 if(gc_infoIndex == GCINFOLENGTH) {
4105 gc_infoOverflow = true;
4106 //taskInfoIndex = 0;
4111 // output the profiling data
4112 void gc_outputProfileData() {
4115 unsigned long long totalgc = 0;
4117 #ifndef BAMBOO_MEMPROF
4118 BAMBOO_DEBUGPRINT(0xdddd);
4120 // output task related info
4121 for(i= 0; i < gc_infoIndex; i++) {
4122 GCInfo * gcInfo = gc_infoArray[i];
4123 #ifdef BAMBOO_MEMPROF
4124 unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
4126 unsigned long long tmp = 0;
4127 BAMBOO_DEBUGPRINT(0xddda);
4128 for(j = 0; j < gcInfo->index - 7; j++) {
4129 BAMBOO_DEBUGPRINT(gcInfo->time[j]);
4130 BAMBOO_DEBUGPRINT(gcInfo->time[j]-tmp);
4131 BAMBOO_DEBUGPRINT(0xdddb);
4132 tmp = gcInfo->time[j];
4134 tmp = (tmp-gcInfo->time[0]);
4135 BAMBOO_DEBUGPRINT_REG(tmp);
4136 BAMBOO_DEBUGPRINT(0xdddc);
4137 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 7]);
4138 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 6]);
4139 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 5]);
4140 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 4]);
4141 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 3]);
4142 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 2]);
4143 BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 1]);
4144 BAMBOO_DEBUGPRINT(0xddde);
4148 #ifndef BAMBOO_MEMPROF
4149 BAMBOO_DEBUGPRINT(0xdddf);
4151 BAMBOO_DEBUGPRINT_REG(totalgc);
4153 if(gc_infoOverflow) {
4154 BAMBOO_DEBUGPRINT(0xefee);
4157 #ifndef BAMBOO_MEMPROF
4158 BAMBOO_DEBUGPRINT(0xeeee);
4161 #endif // #ifdef GC_PROFILE