various small changes
[IRC.git] / Robust / src / Runtime / bamboo / multicoregccompact.c
1 #ifdef MULTICORE_GC
2 #include "multicoregccompact.h"
3 #include "runtime_arch.h"
4 #include "multicoreruntime.h"
5 #include "multicoregarbage.h"
6 #include "markbit.h"
7 #include "multicoremem_helper.h"
8
9 int gc_countRunningCores() {
10   int count=0;
11   for(int i = 0; i < NUMCORES4GC; i++) {
12     if(returnedmem[i]) {
13       count++;
14     }
15   }
16   return count;
17 }
18
19 void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
20   // init the dst ptr
21   to->localblocknum = 0;
22   BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
23   to->ptr = to->base;
24   to->bound=to->base+BLOCKSIZE(to->localblocknum);
25   
26   // init the orig ptr
27   orig->localblocknum = 0;
28   orig->ptr=orig->base = to->base;
29   orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
30 }
31
32 void getSpaceLocally(struct moveHelper *to) {
33   //we have space on our core...just keep going
34   to->localblocknum++;
35   BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
36   to->ptr=to->base;
37   to->bound = to->base + BLOCKSIZE(to->localblocknum);
38 }
39
40 //This function is called on the master core only...and typically by
41 //the message interrupt handler
42
43 void handleReturnMem_I(unsigned int cnum, void *heaptop) {
44   unsigned int blockindex;
45   BLOCKINDEX(blockindex, heaptop);
46   unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
47
48   //this core is done as far as memory usage is concerned
49   returnedmem[cnum]=0;
50
51   struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
52
53   blockrecord->status=BS_FREE;
54   blockrecord->usedspace=(unsigned INTPTR)(heaptop-OFFSET2BASEVA(blockindex)-gcbaseva);
55   blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
56   /* Update the lowest free block */
57   if (blockindex < allocationinfo.lowestfreeblock) {
58     allocationinfo.lowestfreeblock=blockindex;
59   }
60
61   /* This is our own block...means we should mark other blocks above us as free*/
62   if (cnum==blockrecord->corenum) {
63     unsigned INTPTR nextlocalblocknum=localblocknum+1;
64     for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
65       unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
66       struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blockindex];
67       nextblockrecord->status=BS_FREE;
68       nextblockrecord->usedspace=0;
69       //this is true because this cannot be the lowest block
70       nextblockrecord->freespace=BLOCKSIZE(1);
71     }
72   }
73
74   //this could be the last one....
75   int count=gc_countRunningCores();
76   if (gcmovepending==count) {
77     // All cores have stopped...hand out memory as necessary to handle all requests
78     handleMemoryRequests_I();
79   } else {
80     //see if returned memory blocks let us resolve requests
81     useReturnedMem(cnum, allocationinfo.lowestfreeblock);
82   }
83 }
84
85 void useReturnedMem(unsigned int corenum, block_t localblockindex) {
86   for(int i=0;i<NUMCORES4GC;i++) {
87     unsigned INTPTR requiredmem=gcrequiredmems[i];
88     if (requiredmem) {
89       unsigned INTPTR desiredmem=maxusefulmems[i];
90       unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
91       unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
92
93
94       for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
95         unsigned INTPTR blocknum=BLOCKINDEX2(corenum, nextlocalblocknum);
96         struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
97         if (nextblockrecord->status==BS_FREE) {
98           unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
99
100           if (freespace>=memcheck) {
101             nextblockrecord->status=BS_USED;
102             void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
103             unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
104             //taken care of one block
105             gcmovepending--;
106             void *startaddr=blockptr+usedspace;
107             gcrequiredmems[i]=0;
108             maxusefulmems[i]=0;
109             if(BAMBOO_CHECK_SEND_MODE()) {
110               cache_msg_2_I(corenum,GCMOVESTART,startaddr);
111             } else {
112               send_msg_2_I(corenum,GCMOVESTART,startaddr);
113             }
114           }
115         }
116       }
117     }
118   }
119 }
120
121 void handleReturnMem(unsigned int cnum, void *heaptop) {
122   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
123   handleReturnMem_I(cnum, heaptop);
124   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
125 }
126
127 void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
128   //need to get another block from elsewhere
129   //set flag to wait for memory
130   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
131     gctomove=false;
132     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
133     void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
134     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
135     if (startaddr) {
136       gcmovestartaddr=startaddr;
137     } else {
138       while(!gctomove) ;
139     }
140   } else {
141     gctomove=false;
142     //send request for memory
143     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
144     //wait for flag to be set that we received message
145     while(!gctomove)
146       ;
147   }
148
149   //store pointer
150   to->ptr = gcmovestartaddr;
151
152   //set localblock number to high number to indicate this block isn't local
153   to->localblocknum = MAXBLOCK;
154   unsigned int globalblocknum;
155   BLOCKINDEX(globalblocknum, to->ptr);
156   to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
157   to->bound = gcbaseva + BOUNDPTR(globalblocknum);
158 }
159
160 void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
161   //need more space to compact into
162   if (to->localblocknum < gcblock2fill) {
163     getSpaceLocally(to);
164   } else {
165     getSpaceRemotely(to, minimumbytes);
166   }
167 }
168
169 void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
170   bool senttopmessage=false;
171   while(true) {
172     if ((gccurr_heaptop < ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
173       //This block is the last for this core...let the startup know
174       if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
175         handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
176       } else {
177         send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
178       }
179       //Only send the message once
180       senttopmessage=true;
181     }
182     unsigned int minimumbytes=compactblocks(orig, to);
183
184     if (orig->ptr==orig->bound) {
185       //need more data to compact
186       //increment the core
187       orig->localblocknum++;
188       BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
189       orig->ptr=orig->base;
190       orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
191       if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE)
192         break;
193     }
194     if (minimumbytes!=0) {
195       getSpace(to, minimumbytes);
196     }
197   }
198
199   if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
200     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
201     handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
202     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
203   } else {
204     send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
205   }
206 }
207
208 void * checkNeighbors_I(int corenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
209   int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
210   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
211   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
212
213   for(int i=0;i<NUM_CORES2TEST;i++) {
214     int neighborcore=core2test[corenum][i];
215     if (neighborcore!=-1) {
216       for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
217         block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
218         struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
219         if (block->status==BS_FREE) {
220           unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
221           if (memcheck<=freespace) {
222             //we have a block
223             //mark block as used
224             block->status=BS_USED;
225             void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
226             unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
227             return blockptr+usedspace;
228           }
229         }
230       }
231     }
232   }
233   return NULL;
234 }
235
236 void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
237   unsigned int firstfree=NOFREEBLOCK;
238   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
239   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
240
241   for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
242     struct blockrecord * block=&allocationinfo.blocktable[i];
243     if (block->status==BS_FREE) {
244       if(firstfree==NOFREEBLOCK)
245         firstfree=i;
246       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
247       if (memcheck<=freespace) {
248         //we have a block
249         //mark block as used
250         block->status=BS_USED;
251         void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
252         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
253         allocationinfo.lowestfreeblock=firstfree;
254         return blockptr+usedspace;
255       }
256     }
257   }
258   allocationinfo.lowestfreeblock=firstfree;
259   return NULL;
260 }
261
262 void handleOneMemoryRequest(int core, unsigned int lowestblock) {
263   unsigned INTPTR requiredmem=gcrequiredmems[core];
264   unsigned INTPTR desiredmem=maxusefulmems[core];
265   block_t firstfree=NOFREEBLOCK;
266   unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
267   unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
268
269   for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
270     struct blockrecord * block=&allocationinfo.blocktable[searchblock];
271     if (block->status==BS_FREE) {
272       if(firstfree==NOFREEBLOCK)
273         firstfree=searchblock;
274       unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
275       if (freespace>=memcheck) {
276         //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
277
278         //we have a block
279         //mark block as used
280         block->status=BS_USED;
281         void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
282         unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
283         allocationinfo.lowestfreeblock=firstfree;
284         //taken care of one block
285         gcmovepending--;
286         void *startaddr=blockptr+usedspace;
287         if(BAMBOO_CHECK_SEND_MODE()) {
288           cache_msg_2_I(core,GCMOVESTART,startaddr);
289         } else {
290           send_msg_2_I(core,GCMOVESTART,startaddr);
291         }
292         return;
293       }
294     }
295   }
296   //this is bad...ran out of memory
297   printf("Out of memory.  Was trying for %u bytes\n", threshold);
298   BAMBOO_EXIT();
299 }
300
301 void handleMemoryRequests_I() {
302   unsigned int lowestblock=allocationinfo.lowestfreeblock;
303   if (lowestblock==NOFREEBLOCK) {
304     lowestblock=numblockspercore*NUMCORES4GC;
305   }
306   
307   for(int i=0;i < NUMCORES4GC; i++) {
308     if (gcrequiredmems[i]) {
309       handleOneMemoryRequest(i, lowestblock);
310       lowestblock=allocationinfo.lowestfreeblock;
311     }
312   }
313 }
314
315 /* should be invoked with interrupt turned off */
316
317 void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
318   if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
319     //There are spare blocks
320     unsigned int topblock=numblockspercore*NUMCORES4GC;
321     void *memblock;
322     
323     if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
324       return memblock;
325     } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
326       return memblock;
327     }
328   }
329   
330   // If we cannot find spare mem right now, hold the request
331   gcrequiredmems[requiredcore] = requiredmem;
332   maxusefulmems[requiredcore]=desiredmem;
333   gcmovepending++;
334
335   int count=gc_countRunningCores();
336   if (gcmovepending==count) {
337     // All cores have stopped...hand out memory as necessary to handle all requests
338     handleMemoryRequests_I();
339   }
340
341   return NULL;
342
343
344 /* This function is performance critical...  spend more time optimizing it */
345
346 unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
347   void *toptrinit=to->ptr;
348   void *toptr=toptrinit;
349   void *tobound=to->bound;
350   void *origptr=orig->ptr;
351   void *origbound=orig->bound;
352   unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
353   unsigned int objlength;
354   while(origptr<origbound) {
355     //Try to skip over stuff fast first
356     unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
357     unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
358     if (!gcmarktbl[arrayoffset]) {
359       do {
360         arrayoffset++;
361         if (arrayoffset>=origendoffset) {
362           //finished with block...
363           origptr=origbound;
364           to->ptr=toptr;
365           orig->ptr=origptr;
366           gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
367           return 0;
368         }
369       } while(!gcmarktbl[arrayoffset]);
370       origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
371     }
372
373     //Scan more carefully next
374     objlength=getMarkedLength(origptr);
375
376     if (objlength!=NOTMARKED) {
377       unsigned int length=ALIGNSIZETOBYTES(objlength);
378
379       unsigned int size;
380       unsigned int type;
381       gettype_size(origptr, &type, &size);
382       size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
383       
384       if (size!=length) {
385         tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
386       }
387       
388       void *endtoptr=toptr+length;
389       if (endtoptr>tobound) {
390         gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
391         to->ptr=tobound;
392         orig->ptr=origptr;
393         return length;
394       }
395       //good to move objects and update pointers
396       //tprintf("Decided to compact obj %x to %x\n", origptr, toptr);
397
398       gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
399       origptr+=length;
400       toptr=endtoptr;
401     } else
402       origptr+=ALIGNMENTSIZE;
403   }
404   to->ptr=toptr;
405   orig->ptr=origptr;
406   gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
407   return 0;
408 }
409
410 void compact() {
411   BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
412   
413   // initialize structs for compacting
414   struct moveHelper orig={0,NULL,NULL,0,NULL,0,0,0,0};
415   struct moveHelper to={0,NULL,NULL,0,NULL,0,0,0,0};
416   initOrig_Dst(&orig, &to);
417
418   CACHEADAPT_SAMPLING_DATA_REVISE_INIT(&orig, &to);
419
420   compacthelper(&orig, &to);
421
422
423 void master_compact() {
424   // predict number of blocks to fill for each core
425   numblockspercore = loadbalance()+1;
426   
427   GC_PRINTF("mark phase finished \n");
428   
429   gc_resetCoreStatus();
430   //initialize local data structures first....we don't want remote requests messing data up
431   unsigned int initblocks=numblockspercore*NUMCORES4GC;
432   allocationinfo.lowestfreeblock=NOFREEBLOCK;
433
434   //assigned blocks
435   for(int i=0;i<initblocks;i++) {
436     allocationinfo.blocktable[i].status=BS_USED;
437   }
438
439   //free blocks
440   for(int i=initblocks;i<GCNUMBLOCK;i++) {
441     allocationinfo.blocktable[i].status=BS_FREE;
442     allocationinfo.blocktable[i].usedspace=0;
443     //this is true because all cores have at least one block already...
444     allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
445   }
446
447   //start all of the cores
448   for(int i = 0; i < NUMCORES4GC; i++) {
449     // init some data strutures for compact phase
450     gcrequiredmems[i] = 0;
451     gccorestatus[i] = 1;
452     returnedmem[i] = 1;
453     //send start compact messages to all cores
454     if(i != STARTUPCORE) {
455       send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
456     } else {
457       gcblock2fill = numblockspercore;
458     }
459   }
460   GCPROFILE_ITEM();
461   // compact phase
462   compact();
463   /* wait for all cores to finish compacting */
464   tprintf("MASTER WAITING\n");
465   
466
467   while(!gc_checkCoreStatus())
468     ;
469
470   tprintf("POST_WAIT\n");
471   GCPROFILE_ITEM();
472
473   //just in case we didn't get blocks back...
474   if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
475     allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
476
477   GC_PRINTF("compact phase finished \n");
478 }
479
480 #endif // MULTICORE_GC