2 #include "structdefs.h"
3 #include "multicoregccompact.h"
4 #include "runtime_arch.h"
5 #include "multicoreruntime.h"
6 #include "multicoregarbage.h"
8 #include "multicoremem_helper.h"
11 int gc_countRunningCores() {
13 for(int i = 0; i < NUMCORES4GC; i++) {
21 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
23 to->localblocknum = 0;
24 BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
26 to->bound=to->base+BLOCKSIZE(to->localblocknum);
29 orig->localblocknum = 0;
30 orig->ptr=orig->base = to->base;
31 orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
33 to->pagebound=to->base+BAMBOO_PAGE_SIZE;
34 orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
38 void getSpaceLocally(struct moveHelper *to) {
39 //we have space on our core...just keep going
41 BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
43 to->bound = to->base + BLOCKSIZE(to->localblocknum);
45 to->pagebound=to->base+BAMBOO_PAGE_SIZE;
49 //This function is called on the master core only...and typically by
50 //the message interrupt handler
52 void handleReturnMem_I(unsigned int cnum, void *heaptop) {
53 unsigned int blockindex;
54 BLOCKINDEX(blockindex, heaptop);
55 unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
56 //this core is done as far as memory usage is concerned
59 struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
61 blockrecord->status=BS_FREE;
62 blockrecord->usedspace=(unsigned INTPTR)(heaptop-OFFSET2BASEVA(blockindex)-gcbaseva);
63 blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
64 /* Update the lowest free block */
65 if (blockindex < allocationinfo.lowestfreeblock) {
66 allocationinfo.lowestfreeblock=blockindex;
69 /* This is our own block...means we should mark other blocks above us as free*/
71 if (cnum==blockrecord->corenum) {
72 unsigned INTPTR nextlocalblocknum=localblocknum+1;
73 for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
74 unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
75 struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
76 nextblockrecord->status=BS_FREE;
77 nextblockrecord->usedspace=0;
78 //this is true because this cannot be the lowest block
79 nextblockrecord->freespace=BLOCKSIZE(1);
83 //this could be the last one....
84 int count=gc_countRunningCores();
85 if (gcmovepending==count) {
86 // All cores have stopped...hand out memory as necessary to handle all requests
87 handleMemoryRequests_I();
89 //see if returned memory blocks let us resolve requests
90 useReturnedMem(cnum, allocationinfo.lowestfreeblock);
94 void useReturnedMem(unsigned int retcorenum, block_t localblockindex) {
95 for(int i=0;i<NUMCORES4GC;i++) {
96 unsigned INTPTR requiredmem=gcrequiredmems[i];
98 unsigned INTPTR desiredmem=maxusefulmems[i];
99 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
100 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
103 for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
104 unsigned INTPTR blocknum=BLOCKINDEX2(retcorenum, nextlocalblocknum);
105 struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
106 if (nextblockrecord->status==BS_FREE) {
107 unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
108 if (freespace>=memcheck) {
109 nextblockrecord->status=BS_USED;
110 void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
111 unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
112 //taken care of one block
114 void *startaddr=blockptr+usedspace;
117 if (i==STARTUPCORE) {
119 gcmovestartaddr = startaddr;
120 } else if(BAMBOO_CHECK_SEND_MODE()) {
121 cache_msg_2_I(i,GCMOVESTART,startaddr);
123 send_msg_2_I(i,GCMOVESTART,startaddr);
132 void handleReturnMem(unsigned int cnum, void *heaptop) {
133 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
134 handleReturnMem_I(cnum, heaptop);
135 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
138 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
139 //need to get another block from elsewhere
140 //set flag to wait for memory
142 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
144 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
145 void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
146 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
149 gcmovestartaddr=startaddr;
155 //send request for memory
156 send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
157 //wait for flag to be set that we received message
163 to->ptr = gcmovestartaddr;
165 //set localblock number to high number to indicate this block isn't local
166 to->localblocknum = MAXBLOCK;
167 unsigned int globalblocknum;
168 BLOCKINDEX(globalblocknum, to->ptr);
169 to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
170 to->bound = gcbaseva + BOUNDPTR(globalblocknum);
171 #ifdef GC_CACHE_ADAPT
172 to->pagebound=(void *)((int)((int)(to->ptr)&(~(BAMBOO_PAGE_SIZE-1)))+BAMBOO_PAGE_SIZE);
176 void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
177 //need more space to compact into
178 if ((to->localblocknum+1) < gcblock2fill) {
181 getSpaceRemotely(to, minimumbytes);
185 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
186 bool senttopmessage=false;
188 if ((gccurr_heaptop < ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
189 //This block is the last for this core...let the startup know
190 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
191 handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
193 send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
195 //Only send the message once
198 unsigned int minimumbytes=COMPACTUNITS(orig, to);
199 if (orig->ptr==orig->bound) {
200 //need more data to compact
202 orig->localblocknum++;
203 BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
204 orig->ptr=orig->base;
205 orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
206 #ifdef GC_CACHE_ADAPT
207 orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
209 if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE)
212 if (minimumbytes!=0) {
213 getSpace(to, minimumbytes);
216 if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
217 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
218 handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
219 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
221 send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
225 void * checkNeighbors_I(int ncorenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
226 int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
227 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
228 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
230 for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
231 for(int i=0;i<NUM_CORES2TEST;i++) {
232 int neighborcore=core2test[ncorenum][i];
233 if (neighborcore!=-1) {
234 block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
235 struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
236 if (block->status==BS_FREE) {
237 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
238 if (memcheck<=freespace) {
241 block->status=BS_USED;
242 void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
243 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
244 return blockptr+usedspace;
253 void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
254 unsigned int firstfree=NOFREEBLOCK;
255 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
256 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
258 for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
259 struct blockrecord * block=&allocationinfo.blocktable[i];
260 if (block->status==BS_FREE) {
261 if(firstfree==NOFREEBLOCK)
263 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
264 if (memcheck<=freespace) {
267 block->status=BS_USED;
268 void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
269 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
270 allocationinfo.lowestfreeblock=firstfree;
271 return blockptr+usedspace;
275 allocationinfo.lowestfreeblock=firstfree;
279 void handleOneMemoryRequest(int core, unsigned int lowestblock) {
280 unsigned INTPTR requiredmem=gcrequiredmems[core];
281 unsigned INTPTR desiredmem=maxusefulmems[core];
282 block_t firstfree=NOFREEBLOCK;
283 unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
284 unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
286 for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
287 struct blockrecord * block=&allocationinfo.blocktable[searchblock];
288 if (block->status==BS_FREE) {
289 if(firstfree==NOFREEBLOCK)
290 firstfree=searchblock;
291 //don't take a block from another core that hasn't returned its memory yet
292 if (block->corenum!=core&&returnedmem[block->corenum])
295 unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
296 if (freespace>=memcheck) {
297 //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
301 block->status=BS_USED;
302 void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
303 unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
304 allocationinfo.lowestfreeblock=firstfree;
305 //taken care of one block
307 void *startaddr=blockptr+usedspace;
308 if(BAMBOO_CHECK_SEND_MODE()) {
309 cache_msg_2_I(core,GCMOVESTART,startaddr);
311 send_msg_2_I(core,GCMOVESTART,startaddr);
317 //this is bad...ran out of memory
318 printf("Out of memory. Was trying for %u bytes\n", threshold);
322 void handleMemoryRequests_I() {
323 unsigned int lowestblock=allocationinfo.lowestfreeblock;
324 if (lowestblock==NOFREEBLOCK) {
325 lowestblock=numblockspercore*NUMCORES4GC;
328 for(int i=0;i < NUMCORES4GC; i++) {
329 if (gcrequiredmems[i]) {
330 handleOneMemoryRequest(i, lowestblock);
331 lowestblock=allocationinfo.lowestfreeblock;
336 /* should be invoked with interrupt turned off */
338 void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
339 if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
340 //There are spare blocks
341 unsigned int topblock=numblockspercore*NUMCORES4GC;
344 if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
346 } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
351 // If we cannot find spare mem right now, hold the request
352 gcrequiredmems[requiredcore] = requiredmem;
353 maxusefulmems[requiredcore]=desiredmem;
356 int count=gc_countRunningCores();
357 if (gcmovepending==count) {
358 // All cores have stopped...hand out memory as necessary to handle all requests
359 handleMemoryRequests_I();
365 #ifdef GC_CACHE_ADAPT
366 /* To compute access rate per pages, we need to compact to page boundary
367 * instead of block boundary
369 unsigned int compactpages(struct moveHelper * orig, struct moveHelper * to) {
370 void *toptrinit=to->ptr;
371 void *toptr=toptrinit;
372 void *tobound=to->bound;
373 void *topagebound=to->pagebound;
374 void *origptr=orig->ptr;
375 void *origpagebound=orig->pagebound;
376 unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origpagebound-gcbaseva));
377 unsigned int objlength;
378 while((origptr<origpagebound)&&(toptr<topagebound)){
379 //Try to skip over stuff fast first
380 unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
381 unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
382 if (!gcmarktbl[arrayoffset]) {
385 if (arrayoffset>=origendoffset) {
386 //finished with a page...
387 origptr=origpagebound;
390 orig->pagebound+=BAMBOO_PAGE_SIZE;
391 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
392 // close the last destination page
393 CACHEADAPT_COMPLETE_PAGE_CONVERT(origptr, toptr, toptr);
396 } while(!gcmarktbl[arrayoffset]);
397 origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
400 //Scan more carefully next
401 objlength=getMarkedLength(origptr);
403 if (objlength!=NOTMARKED) {
404 unsigned int length=ALIGNSIZETOBYTES(objlength);
406 //code between this and next comment should be removed
410 gettype_size(origptr, &type, &size);
411 size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
414 tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
415 unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
416 unsigned INTPTR hibits=alignsize>>4;
417 unsigned INTPTR lobits=(alignsize&15)<<1;
418 tprintf("hibits=%x lobits=%x\n", hibits, lobits);
419 tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
422 //end of code to remove
424 void *endtoptr=toptr+length;
425 if (endtoptr>tobound) {
426 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
429 // close a destination page, update the revise profiling info
430 CACHEADAPT_COMPLETE_PAGE_CONVERT(origptr, tobound, toptr);
433 //good to move objects and update pointers
434 //tprintf("Decided to compact obj %x to %x\n", origptr, toptr);
436 gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
441 origptr+=ALIGNMENTSIZE;
445 orig->pagebound=(void *)((int)(((int)origptr)&(~(BAMBOO_PAGE_SIZE-1)))+BAMBOO_PAGE_SIZE);
446 to->pagebound=(void *)((int)(((int)toptr)&(~(BAMBOO_PAGE_SIZE-1)))+BAMBOO_PAGE_SIZE);
447 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
448 // close the last destination page
449 CACHEADAPT_COMPLETE_PAGE_CONVERT(origptr, toptr, toptr);
454 /* This function is performance critical... spend more time optimizing it */
456 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
457 void *toptrinit=to->ptr;
458 void *toptr=toptrinit;
459 void *tobound=to->bound;
460 void *origptr=orig->ptr;
461 void *origbound=orig->bound;
462 unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
463 unsigned int objlength;
464 while(origptr<origbound) {
465 //Try to skip over stuff fast first
466 unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
467 unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
468 if (!gcmarktbl[arrayoffset]) {
471 if (arrayoffset>=origendoffset) {
472 //finished with block...
476 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
479 } while(!gcmarktbl[arrayoffset]);
480 origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
483 //Scan more carefully next
484 objlength=getMarkedLength(origptr);
486 if (objlength!=NOTMARKED) {
487 unsigned int length=ALIGNSIZETOBYTES(objlength);
489 //code between this and next comment should be removed
493 gettype_size(origptr, &type, &size);
494 size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
497 tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
498 unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
499 unsigned INTPTR hibits=alignsize>>4;
500 unsigned INTPTR lobits=(alignsize&15)<<1;
501 tprintf("hibits=%x lobits=%x\n", hibits, lobits);
502 tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
505 //end of code to remove
507 void *endtoptr=toptr+length;
508 if (endtoptr>tobound) {
509 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
514 //good to move objects and update pointers
516 gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
521 origptr+=ALIGNMENTSIZE;
525 gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
532 BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
534 // initialize structs for compacting
535 struct moveHelper orig;
536 struct moveHelper to;
537 initOrig_Dst(&orig, &to);
539 CACHEADAPT_SAMPLING_DATA_REVISE_INIT(&orig, &to);
540 compacthelper(&orig, &to);
543 void master_compact() {
544 // predict number of blocks to fill for each core
545 numblockspercore = loadbalance()+1;
547 GC_PRINTF("mark phase finished \n");
549 gc_resetCoreStatus();
550 //initialize local data structures first....we don't want remote requests messing data up
551 unsigned int initblocks=numblockspercore*NUMCORES4GC;
552 allocationinfo.lowestfreeblock=NOFREEBLOCK;
555 for(int i=0;i<initblocks;i++) {
556 allocationinfo.blocktable[i].status=BS_USED;
560 for(int i=initblocks;i<GCNUMBLOCK;i++) {
561 allocationinfo.blocktable[i].status=BS_FREE;
562 allocationinfo.blocktable[i].usedspace=0;
563 //this is true because all cores have at least one block already...
564 allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
567 //start all of the cores
568 for(int i = 0; i < NUMCORES4GC; i++) {
569 // init some data strutures for compact phase
570 gcrequiredmems[i] = 0;
573 //send start compact messages to all cores
574 if(i != STARTUPCORE) {
575 send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
577 gcblock2fill = numblockspercore;
583 /* wait for all cores to finish compacting */
586 while(!gc_checkCoreStatus())
590 void *nextvalid=gcbaseva;
591 for(void *tmp=gcbaseva; tmp<gcbaseva+BAMBOO_SHARED_MEM_SIZE;tmp+=ALIGNMENTSIZE) {
592 unsigned int objlength=getMarkedLength(tmp);
593 void *forwarding=gcmappingtbl[OBJMAPPINGINDEX(tmp)];
594 if (tmp>=nextvalid&&((objlength!=0)!=(forwarding!=NULL))) {
595 tprintf("Maps disagree tmp=%x olength=%u forwarding=%x\n",tmp, objlength, forwarding);
597 if (tmp<nextvalid&&forwarding!=NULL) {
598 tprintf("Weird forwarding pointer\n");
600 if (tmp>=nextvalid&&(objlength!=0||forwarding!=NULL)) {
601 unsigned int length=ALIGNSIZETOBYTES(objlength);
604 nextvalid=tmp+length;
605 gettype_size(tmp, &type, &size);
606 size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
608 tprintf("Bad size in bitmap: tmp=%x length=%u size=%u type=%u\n", tmp, length, size, type);
611 BLOCKINDEX(blockindex, forwarding);
612 struct blockrecord * block=&allocationinfo.blocktable[blockindex];
613 void *blockptr=OFFSET2BASEVA(blockindex)+gcbaseva;
615 if (block->status==BS_FREE) {
616 if (forwarding>(blockptr+block->usedspace)) {
617 tprintf("Pointer references free space forwarding=%x tmp=%x length=%u type=%u blockindex=%u, baseptr=%x, usedspace=%u, status=%u\n", forwarding, tmp, length, type,blockindex, blockptr, block->usedspace, block->status);
626 //just in case we didn't get blocks back...
627 if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
628 allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
630 GC_PRINTF("compact phase finished \n");
633 #endif // MULTICORE_GC