changes, large objs processing not finished yet
[IRC.git] / Robust / src / Runtime / multicoregarbage.c
1 #ifdef MULTICORE_GC
2 #include "multicoregarbage.h"
3 #include "multicoreruntime.h"
4 #include "runtime_arch.h"
5 #include "SimpleHash.h"
6 #include "GenericHashtable.h"
7
8 extern struct genhashtable * activetasks;
9 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
10 extern struct taskparamdescriptor *currtpdo;
11
12 struct largeObjList {
13         struct largeObjItem * head;
14         struct largeObjItem * tail;
15 };
16
17 struct largeObjList lObjList;
18
19 #define NUMPTRS 100
20
21 void gc_enqueue(void *ptr) {
22   if (gcheadindex==NUMPTRS) {
23     struct pointerblock * tmp;
24     if (gcspare!=NULL) {
25       tmp=gcspare;
26       gcspare=NULL;
27     } else
28       tmp=malloc(sizeof(struct pointerblock));
29     gchead->next=tmp;
30     gchead=tmp;
31     gcheadindex=0;
32   }
33   gchead->ptrs[gcheadindex++]=ptr;
34 }
35
36 // dequeue and destroy the queue
37 void * gc_dequeue() {
38   if (gctailindex==NUMPTRS) {
39     struct pointerblock *tmp=tail;
40     gctail=gctail->next;
41     gctailindex=0;
42     if (gcspare!=NULL)
43       free(tmp);
44     else
45       gcspare=tmp;
46   }
47   return gctail->ptrs[gctailindex++];
48 }
49
50 // dequeue and do not destroy the queue
51 void * gc_dequeue2() {
52         if (gctailindex2==NUMPTRS) {
53     struct pointerblock *tmp=tail;
54     gctail2=gctail2->next;
55     gctailindex2=0;
56   }
57   return gctail2->ptrs[gctailindex2++];
58 }
59
60 int gc_moreItems() {
61   if ((gchead==gctail)&&(gctailindex==gcheadindex))
62     return 0;
63   return 1;
64 }
65
66 int gc_moreItems2() {
67   if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
68     return 0;
69   return 1;
70 }
71
72 INTPTR curr_heaptop = 0;
73 INTPTR curr_heapbound = 0;
74
75 bool isLarge(void * ptr, 
76                          int * ttype, 
77                                                  int * tsize) {
78         // check if a pointer is referring to a large object
79         int type = ((int *)ptr)[0];
80         int size = 0;
81         if(type < NUMCLASSES) {
82                 // a normal object
83                 size = classsize[type];
84         } else {        
85                 // an array 
86                 struct ArrayObject *ao=(struct ArrayObject *)ptr;
87                 int elementsize=classsize[type];
88                 int length=ao->___length___; 
89                 size=sizeof(struct ArrayObject)+length*elementsize;
90         }
91         *ttype = type;
92         *tsize = size;
93         return(!isLocal(ptr + size));
94 }
95
96 int hostcore(void * ptr) {
97         // check the host core of ptr
98         int host = 0;
99         int x = 0;
100         int y = 0;
101         RESIDECORE(ptr, &x, &y);
102         host = (x==0)?(x*bamboo_height+y):(x*bamboo_height+y-2);
103         return host;
104 }
105
106 bool isLocal(void * ptr) {
107         // check if a pointer is in shared heap on this core
108         return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
109 }
110
111 void transferMarkResults() {
112         // TODO, need distiguish between send and cache
113         // invoked inside interruptiong handler
114         int msgsize = 4;
115   int i = 0;
116
117         // TODO check large objs here
118
119   isMsgSending = true;
120   DynamicHeader msgHdr = tmc_udn_header_from_cpu(STARTUPCORE);
121
122         // send header
123   __tmc_udn_send_header_with_size_and_tag(msgHdr, msgsize, 
124                                                             UDN0_DEMUX_TAG);  
125 #ifdef DEBUG
126   BAMBOO_DEBUGPRINT(0xbbbb);
127   BAMBOO_DEBUGPRINT(0xb000 + STARTUPCORE);       // targetcore
128 #endif
129   udn_send(GCLOBJINFO);
130 #ifdef DEBUG
131   BAMBOO_DEBUGPRINT(GCLOBJINFO);
132 #endif
133   udn_send(msgsize);
134 #ifdef DEBUG
135   BAMBOO_DEBUGPRINT_REG(msgsize);
136 #endif
137         udn_send(BAMBOO_NUM_OF_CORE);
138 #ifdef DEBUG
139   BAMBOO_DEBUGPRINT_REG(BAMBOO_NUM_OF_CORE);
140 #endif
141         udn_send(curr_heaptop);
142 #ifdef DEBUG
143   BAMBOO_DEBUGPRINT_REG(curr_heaptop);
144 #endif
145         // TODO large objs here
146         
147 #ifdef DEBUG
148   BAMBOO_DEBUGPRINT(0xffff);
149 #endif
150
151   // end of sending this msg, set sand msg flag false
152   isMsgSending = false;
153   send_hanging_msg();
154 }
155
156 void transferCompactStart(int core) {
157         // send start compact messages to all cores
158         // TODO no large obj info
159   int msgsize = 3;
160   int i = 0;
161         int ismove = 0;
162         int movenum = 0;
163
164         // both lcore and rcore have the same action: either 
165         // move objs or have incoming objs
166         if(gcdeltal[core] > 0) {
167                 ismove = 0; // have incoming objs
168                 movenum++;
169         } else if(gcdeltal[core] < 0) {
170                 ismove = 1; // have objs to move
171                 movenum++;
172         } 
173         if(gcdeltar[core] > 0) {
174                 ismove = 0; // have incoming objs
175                 movenum++;
176         } else if(gcdeltar[core] < 0) {
177                 ismove = 1; // have objs to move
178                 movenum++;
179         }
180         msgsize += (movenum == 0) ? 0 : 2 + movenum * 2;
181
182   isMsgSending = true;
183   DynamicHeader msgHdr = tmc_udn_header_from_cpu(core);
184
185         // send header
186   __tmc_udn_send_header_with_size_and_tag(msgHdr, msgsize, 
187                                                             UDN0_DEMUX_TAG);  
188 #ifdef DEBUG
189   BAMBOO_DEBUGPRINT(0xbbbb);
190   BAMBOO_DEBUGPRINT(0xb000 + core);       // targetcore
191 #endif
192   udn_send(GCSTARTCOMPACT);
193 #ifdef DEBUG
194   BAMBOO_DEBUGPRINT(GCSTARTCOMPACT);
195 #endif
196   udn_send(msgsize);
197 #ifdef DEBUG
198   BAMBOO_DEBUGPRINT_REG(msgsize);
199 #endif
200         udn_send(gcreloads[core]);
201 #ifdef DEBUG
202   BAMBOO_DEBUGPRINT_REG(gcreloads[core]);
203 #endif
204         if(movenum > 0) {
205                 udn_send(movenum);
206 #ifdef DEBUG
207                 BAMBOO_DEBUGPRINT_REG(movenum);
208 #endif
209                 udn_send(ismove);
210 #ifdef DEBUG
211                 BAMBOO_DEBUGPRINT_REG(ismove);
212 #endif
213                 int dst = 0;
214                 if(gcdeltal[core] != 0) {
215                         LEFTNEIGHBOUR(core, &dst);
216                         udn_send(abs(gcdeltal[core]));
217 #ifdef DEBUG
218                         BAMBOO_DEBUGPRINT_REG(abs(gcdeltal[core]));
219 #endif
220                         udn_send(dst);
221 #ifdef DEBUG
222                         BAMBOO_DEBUGPRINT_REG(dst);
223 #endif
224                 }
225                 if(gcdeltar[core] != 0) {
226                         RIGHTNEIGHBOUR(core, &dst);
227                         udn_send(abs(gcdeltar[core]));
228 #ifdef DEBUG
229                         BAMBOO_DEBUGPRINT_REG(abs(gcdeltar[core]));
230 #endif
231                         udn_send(dst);
232 #ifdef DEBUG
233                         BAMBOO_DEBUGPRINT_REG(dst);
234 #endif
235                 }
236         }
237 #ifdef DEBUG
238   BAMBOO_DEBUGPRINT(0xffff);
239 #endif
240
241   // end of sending this msg, set sand msg flag false
242   isMsgSending = false;
243   send_hanging_msg();
244 }
245
246 void checkMarkStatue() {
247         if((!gcwaitconfirm) || 
248                         (waitconfirm && (numconfirm == 0))) {
249                 BAMBOO_START_CRITICAL_SECTION_STATUS();  
250                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
251                 gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
252                 gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
253                 // check the status of all cores
254                 bool allStall = true;
255                 for(i = 0; i < NUMCORES; ++i) {
256                         if(gccorestatus[i] != 0) {
257                                 allStall = false;
258                                 break;
259                         }
260                 }
261                 if(allStall) {
262                         // check if the sum of send objs and receive obj are the same
263                         // yes->check if the info is the latest; no->go on executing
264                         int sumsendobj = 0;
265                         for(i = 0; i < NUMCORES; ++i) {
266                                 sumsendobj += gcnumsendobjs[i];
267                         }               
268                         for(i = 0; i < NUMCORES; ++i) {
269                                 sumsendobj -= gcnumreceiveobjs[i];
270                         }
271                         if(0 == sumsendobj) {
272                                 if(!waitconfirm) {
273                                         // the first time found all cores stall
274                                         // send out status confirm msg to all other cores
275                                         // reset the corestatus array too
276                                         gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
277                                         waitconfirm = true;
278                                         numconfirm = NUMCORES - 1;
279                                         for(i = 1; i < NUMCORES; ++i) { 
280                                                 gccorestatus[i] = 1;
281                                                 // send mark phase finish confirm request msg to core i
282                                                 send_msg_1(i, GCMARKCONFIRM);
283                                         }
284                                 } else {
285                                         // all the core status info are the latest
286                                         // stop mark phase
287                                         gcphase = COMPACTPHASE;
288                                         // restore the gcstatus for all cores
289                                         for(i = 0; i < NUMCORES; ++i) {
290                                                 gccorestatus[i] = 1;
291                                         }
292                                 } // if(!gcwautconfirm) else()
293                         } // if(0 == sumsendobj)
294                 } // if(allStall)
295                 BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
296         } // if((!gcwaitconfirm)...
297 }
298
299 bool preGC() {
300         // preparation for gc
301         // make sure to clear all incoming msgs espacially transfer obj msgs
302         int i;
303         if((!waitconfirm) || 
304                                                   (waitconfirm && (numconfirm == 0))) {
305                 // send out status confirm msgs to all cores to check if there are
306                 // transfer obj msgs on-the-fly
307                 waitconfirm = true;
308                 numconfirm = NUMCORES - 1;
309                 for(i = 1; i < NUMCORES; ++i) { 
310                         corestatus[i] = 1;
311                         // send status confirm msg to core i
312                         send_msg_1(i, STATUSCONFIRM);
313                 }
314
315                 while(numconfirm != 0) {} // wait for confirmations
316                 numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
317                 numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
318                 int sumsendobj = 0;
319                 for(i = 0; i < NUMCORES; ++i) {
320                         sumsendobj += numsendobjs[i];
321                 }               
322                 for(i = 0; i < NUMCORES; ++i) {
323                         sumsendobj -= numreceiveobjs[i];
324                 }
325                 if(0 == sumsendobj) {
326                         return true;
327                 } else {
328                         // still have some transfer obj msgs on-the-fly, can not start gc
329                         return false;
330                 }
331         } else {
332                 // previously asked for status confirmation and do not have all the 
333                 // confirmations yet, can not start gc
334                 return false;
335         }
336 }
337
338 // compute load balance for all cores
339 void loadbalance() {
340         // compute load balance
341         // initialize the deltas
342         int i;
343         int delta = 1 << 32 -1;
344         int deltanew = 1 << 32 - 1;
345         int lcore = 0;
346         int rcore = 0;
347         bool stop = true;
348         for(i = 0; i < NUMCORES; i++) {
349                 gcdeltal[i] = gcdeltar[i] = 0;
350                 gcreloads[i] = gcloads[i];
351         }
352
353         // iteratively balance the loads
354         do {
355                 stop = true;
356                 delta = deltanew;
357                 // compute load balance
358                 for(i = 0; i < NUMCORES; i++) {
359                         if(gcreloads[i] > BAMBOO_SMEM_SIZE_L) {
360                                 // too much load, try to redirect some of it to its neighbours
361                                 LEFTNEIGHBOUR(i, &lcore);
362                                 RIGHTNEIGHBOUR(i, &rcore);
363                                 if(lcore != -1) {
364                                         int tmp = (gcreloads[lcore] - gcreloads[i]) / 2;
365                                         gcdeltal[i] = tmp;
366                                         gcdeltar[lcore] = 0-tmp;
367                                         deltanew += abs(gcreloads[lcore] - gcreloads[i]);
368                                 }
369                                 if(rcore != -1) {
370                                         int tmp = (gcreloads[rcore] - gcreloads[i]) / 2;
371                                         gcdeltar[i] = tmp;
372                                         gcdeltal[rcore] = 0-tmp;
373                                         deltanew += abs(gcreloads[rcore] - gcreloads[i]);
374                                 }
375                         }
376                 }
377                 deltanew /= 2;
378                 if((deltanew == 0) || (delta == deltanew)) {
379                         break;
380                 }
381                 // flush for new loads
382                 for(i = 0; i < NUMCORES; i++) {
383                         if((gcdeltal[i] != 0) || (gcdeltar[i] != 0)) {
384                                 stop = false;
385                                 gcreloads[i] += gcdeltal[i] + gcdeltar[i];
386                                 gcdeltal[i] = gcdeltar[i] = 0;
387                         }
388                 }
389         } while(!stop);
390
391         // decide how to do load balance
392         for(i = 0; i < NUMCORES; i++) {
393                 gcdeltal[i] = gcdeltar[i] = 0;
394         }
395         for(i = 0; i < NUMCORES; i++) {
396                 int tomove = (gcloads[i] - gcreloads[i]);
397                 if(tomove > 0) {
398                         LEFTNEIGHBOUR(i, &lcore);
399                         RIGHTNEIGHBOUR(i, &rcore);
400                         int lmove = 0;
401                         int rmove = 0;
402                         if(lcore != -1) {
403                                 lmove = (gcreloads[lcore] - gcloads[lcore] - gcdeltal[lcore]);
404                                 if(lmove < 0) {
405                                         lmove = 0;
406                                 }
407                         }
408                         if(rcore != -1) {
409                                 rmove = (gcreloads[rcore] - gcloads[rcore] - gcdeltar[rcore]);
410                                 if(rmove < 0) {
411                                         rmove = 0;
412                                 }
413                         }
414                         // the one with bigger gap has higher priority
415                         if(lmove > rmove) {
416                                 int ltomove = (lmove > tomove)? tomove:lmove;
417                                 gcdeltar[lcore] = ltomove;
418                                 gcdeltal[i] = 0-ltomove;
419                                 gcdeltal[rcore] = tomove - ltomove;
420                                 gcdeltar[i] = ltomove - tomove;
421                         } else {
422                                 int rtomove = (rmove > tomove)? tomove:rmove;
423                                 gcdeltal[rcore] = rtomove;
424                                 gcdeltar[i] = 0-rtomove;
425                                 gcdeltar[lcore] = tomove - rtomove;
426                                 gcdeltal[i] = rtomove - tomove;
427                         }
428                 }
429         }
430 }
431
432 void gc(struct garbagelist * stackptr) {
433         // check if do gc
434         if(!gcflag) {
435                 return;
436         }
437
438         // core coordinator routine
439         if(0 == BAMBOO_NUM_OF_CORE) {
440                 if(!preGC()) {
441                         // not ready to do gc
442                         gcflag = true;
443                         return;
444                 }
445
446                 gcprocessing = true;
447                 int i = 0;
448                 gcwaitconfirm = false;
449                 gcwaitconfirm = 0;
450                 gcphase = MARKPHASE;
451                 for(i = 1; i < NUMCORES - 1; i++) {
452                         // send GC start messages to all cores
453                         send_msg_1(i, GCSTART);
454                 }
455                 bool isfirst = true;
456                 bool allStall = false;
457
458                 // mark phase
459                 while(MARKPHASE == gcphase) {
460                         mark(isfirst, stackptr);
461                         if(isfirst) {
462                                 isfirst = false;
463                         }
464
465                         // check gcstatus
466                         checkMarkStatue(); 
467                 }  // while(MARKPHASE == gcphase)
468                 // send msgs to all cores requiring large objs info
469                 numconfirm = NUMCORES - 1;
470                 for(i = 1; i < NUMCORES; ++i) {
471                         send_msg_1(i, GCLOBJREQUEST);
472                 }       
473                 while(numconfirm != 0) {} // wait for responses
474                 loadbalance();
475                 // TODO need to decide where to put large objects
476                 // TODO cache all large objects
477
478                 for(i = 1; i < NUMCORES; ++i) {
479                         //send start compact messages to all cores
480                         transferCompactStart(i);
481                 }
482
483                 // compact phase
484                 compact();
485                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
486                 while(COMPACTPHASE == gcphase) {
487                         // check the status of all cores
488                         allStall = true;
489                         for(i = 0; i < NUMCORES; ++i) {
490                                 if(gccorestatus[i] != 0) {
491                                         allStall = false;
492                                         break;
493                                 }
494                         }       
495                         if(allStall) {
496                                 // restore the gcstatus of all cores
497                                 for(i = 0; i < NUMCORES; ++i) {
498                                         gccorestatus[i] = 1;
499                                 }
500                                 break;
501                         }
502                 } // while(COMPACTPHASE == gcphase)
503
504                 gcphase = FLUSHPHASE;
505                 for(i = 1; i < NUMCORES; ++i) {
506                         // send start flush messages to all cores
507                         send_msg_1(i, GCSTARTFLUSH);
508                 }
509
510                 // flush phase
511                 flush();
512                 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
513                 while(FLUSHPHASE == gcphase) {
514                         // check the status of all cores
515                         allStall = true;
516                         for(i = 0; i < NUMCORES; ++i) {
517                                 if(gccorestatus[i] != 0) {
518                                         allStall = false;
519                                         break;
520                                 }
521                         }       
522                         if(allStall) {
523                                 break;
524                         }
525                 } // while(FLUSHPHASE == gcphase)
526                 gcphase = FINISHPHASE;
527                 for(i = 1; i < NUMCORES; ++i) {
528                         // send gc finish messages to all cores
529                         send_msg_1(i, GCFINISH);
530                 }
531
532                 // need to create free memory list and invalidate all 
533                 // shared mem pointers TODO
534
535                 gcflag = false;
536                 gcprocessing = false;
537                 return;
538         } else {
539                 gcprocessing = true;
540                 gc_collect(stackptr);
541         }
542         // invalidate all shared mem pointers
543         bamboo_cur_msp = NULL;
544         bamboo_smem_size = 0;
545         gcflag = false;
546         gcprocessing = false;
547
548 }
549
550 // enqueue root objs
551 void tomark(struct garbagelist * stackptr) {
552         if(MARKPHASE != gcphase) {
553                 BAMBOO_EXIT(0xb002);
554         }
555         gcbusystatus = 1;
556         // initialize queue
557         if (gchead==NULL) {
558                 gcheadindex=0;
559                 gctailindex=0;
560                 gctailindex2 = 0;
561                 gchead=gctail=gctail2=malloc(sizeof(struct pointerblock));
562         }
563         int i;
564         // enqueue current stack 
565         while(stackptr!=NULL) {
566                 for(i=0; i<stackptr->size; i++) {
567                         gc_enqueue(stackptr->array[i]);
568                 }
569                 stackptr=stackptr->next;
570         }
571         // enqueue objectsets
572         for(i=0; i<NUMCLASSES; i++) {
573                 struct parameterwrapper ** queues = 
574                         objectqueues[BAMBOO_NUM_OF_CORE][i];
575                 int length = numqueues[BAMBOO_NUM_OF_CORE][i];
576                 for(j = 0; j < length; ++j) {
577                         struct parameterwrapper * parameter = queues[j];
578                         struct ObjectHash * set=parameter->objectset;
579                         struct ObjectNode * ptr=set->listhead;
580                         while(ptr!=NULL) {
581                                 gc_enqueue((void *)ptr->key);
582                                 ptr=ptr->lnext;
583                         }
584                 }
585         }
586         // euqueue current task descriptor
587         for(i=0; i<currtpd->numParameters; i++) {
588                 gc_enqueue(currtpd->parameterArray[i]);
589         }
590         // euqueue active tasks
591         struct genpointerlist * ptr=activetasks->list;
592         while(ptr!=NULL) {
593                 struct taskparamdescriptor *tpd=ptr->src;
594                 int i;
595                 for(i=0; i<tpd->numParameters; i++) {
596                         gc_enqueue(tpd->parameterArray[i]);
597                 }
598                 ptr=ptr->inext;
599         }
600         // enqueue cached transferred obj
601         struct QueueItem * tmpobjptr =  getHead(&objqueue);
602         while(tmpobjptr != NULL) {
603                 struct transObjInfo * objInfo = 
604                         (struct transObjInfo *)(tmpobjptr->objectptr); 
605                 gc_enqueue(objInfo->objptr);
606                 getNextQueueItem(tmpobjptr);
607         }
608 }
609
610 void mark(bool isfirst, 
611                       struct garbagelist * stackptr) {
612         if(isfirst) {
613                 // enqueue root objs
614                 tomark(stackptr);
615                 curr_heaptop = BAMBOO_CACHE_LINE_SIZE;
616                 curr_heapbound = BAMBOO_SMEM_SIZE_L;
617                 markedptrbound = 0;
618         }
619
620         int isize = 0;
621         // mark phase
622         while(MARKPHASE == gcphase) {
623                 while(gc_moreItems2()) {
624                         voit * ptr = gc_dequeue2();
625                         int size = 0;
626                         int type = 0;
627                         if(isLarge(ptr, &type, &size)) {
628                                 // ptr is a large object
629                                 struct largeObjItem * loi = 
630                                         (struct largeObjItem*)RUNMALLOC(sizeof(struct largeObjItem)); 
631                                 loi->orig = (INTPTR)ptr;
632                                 loi->dst = (INTPTR)0;
633                                 loi->length = size;
634                                 if(lObjList.head == NULL) {
635                                         lObjList.head = lObjList.tail = loi;
636                                 } else {
637                                         lObjList.tail->next = loi;
638                                         lObjList.tail = loi;
639                                 }
640                         } else if (isLocal(ptr)) {
641                                 // ptr is an active object on this core
642                                 if(type == -1) {
643                                         // nothing to do 
644                                 }
645                                 ALIGNSIZE(size, &isize);
646                                 curr_heaptop += isize;
647                                 if(curr_heaptop > curr_heapbound) {
648                                         // change to another block
649                                         curr_heaptop = curr_heapbound+BAMBOO_CACHE_LINE_SIZE+isize;
650                                         curr_heapbound += BAMBOO_SMEM_SIZE;
651                                 }
652                                 // mark this obj
653                                 ((int *)ptr)[6] = 1;
654                                 if(ptr > markedptrbound) {
655                                         markedptrbound = ptr;
656                                 }
657                         }
658                         // scan all pointers in ptr
659                         unsigned INTPTR * pointer;
660                         pointer=pointerarray[type];
661                         if (pointer==0) {
662                                 /* Array of primitives */
663                                 /* Do nothing */
664                         } else if (((INTPTR)pointer)==1) {
665                                 /* Array of pointers */
666                                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
667                                 int length=ao->___length___;
668                                 int j;
669                                 for(j=0; j<length; j++) {
670                                         void *objptr = 
671                                                 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
672                                         int host = hostcore(objptr);
673                                         if(BAMBOO_NUM_OF_CORE == host) {
674                                                 // on this core
675                                                 gc_enqueue(objptr);  
676                                         } else {
677                                                 // send a msg to host informing that objptr is active
678                                                 send_msg_2(host, GCMARKEDOBJ, objptr);
679                                                 gcself_numsendobjs++;
680                                         }
681                                 }
682                         } else {
683                                 INTPTR size=pointer[0];
684                                 int i;
685                                 for(i=1; i<=size; i++) {
686                                         unsigned int offset=pointer[i];
687                                         void * objptr=*((void **)(((char *)ptr)+offset));
688                                         int host = hostcore(objptr);
689                                         if(BAMBOO_NUM_OF_CORE == host) {
690                                                 // on this core
691                                                 gc_enqueue(objptr);  
692                                         } else {
693                                                 // send a msg to host informing that objptr is active
694                                                 send_msg_2(host, GCMARKEDOBJ, objptr);
695                                                 gcself_numsendobjs++;
696                                         }
697                                 }
698                         }
699                 } // while(!isEmpty(gctomark))
700                 gcbusystatus = false;
701                 // send mark finish msg to core coordinator
702                 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
703                                        gcself_numsendobjs, gcself_numreceiveobjs); 
704
705                 if(BAMBOO_NUM_OF_CORE == 0) {
706                         return;
707                 }
708         } // while(MARKPHASE == gcphase)
709 } // mark()
710
711 struct moveHelper {
712         int numblocks; // block num for heap
713         INTPTR base; // base virtual address of current heap block
714         INTPTR ptr; // virtual address of current heap top
715         int offset; // offset in current heap block
716         int blockbase; // virtual address of current small block to check
717         int blockbound; // bound virtual address of current small blcok 
718         int top; // real size of current heap block to check
719         int bound; // bound size of current heap block to check
720 };
721
722 void nextSBlock(struct moveHelper * orig) {
723         orig->blockbase = orig->blockbound;
724         if(orig->blockbase == orig->bound) {
725                 // end of current heap block, jump to next one
726                 orig->numblocks++;
727                 BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
728                 orig->bound = orig->base + BAMBOO_SMEM_SIZE;
729                 orig->blockbase = orig->base;
730         }
731         orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
732         orig->offset = BAMBOO_CACHE_LINE_SIZE;
733         orig->ptr = orig->blockbase + orig->offset;
734 }
735
736 void nextBlock(struct moveHelper * to) {
737         to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
738         to->bound += BAMBOO_SMEM_SIZE;
739         to->numblocks++;
740         BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
741         to->offset = BAMBOO_CACHE_LINE_SIZE;
742         to->ptr = to->base + to->offset;
743 }
744
745 // endaddr does not contain spaces for headers
746 bool moveobj(struct moveHelper * orig, 
747                          struct moveHelper * to, 
748                                                  INTPTR * endaddr) {
749         int type = 0;
750         int size = 0;
751         int mark = 0;
752         int isize = 0;
753 innermoveobj:
754         while((*((int*)(orig->ptr))) == -2) {
755                 orig->ptr++;
756                 if(orig->ptr == orig->blockbound) {
757                         nextSBlock(orig);
758                         goto innermoveobj;
759                 }
760         }
761         // check the obj's type, size and mark flag
762         type = ((int *)(orig->ptr))[0];
763         size = 0;
764         if(type == -1) {
765                 // end of this block, go to next one
766                 nextSBlock(orig);
767                 goto innermoveobj;
768         } else if(type < NUMCLASSES) {
769                 // a normal object
770                 size = classsize[type];
771         } else {        
772                 // an array 
773                 struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
774                 int elementsize=classsize[type];
775                 int length=ao->___length___; 
776                 size=sizeof(struct ArrayObject)+length*elementsize;
777         }
778         mark = ((int *)(orig->ptr))[6];
779         if(mark == 1) {
780                 // marked obj, copy it to current heap top
781                 // check to see if remaining space is enough
782                 ALIGNSIZE(size, &isize);
783                 if((endaddr != NULL) && (to->top + isize > *endaddr)) {
784                         // reached the endaddr 
785                         // fill offset to the endaddr for later configuration of header
786                         to->offset += *endaddr - to->top;
787                         to->top += *endaddr - to->top;
788                         return true;
789                 }
790                 if(to->top + isize > to->bound) {
791                         // fill the header of this block and then go to next block
792         to->offset += to->bound - to->top;
793                         (*((int*)(to->base))) = to->offset;
794                         if(endaddr != NULL) {
795                                 *endaddr = *endaddr + BAMBOO_CACHE_LINE_SIZE; 
796                         }
797                         nextBlock(to);
798                 }
799                 memcpy(to->ptr, orig->ptr, size);
800                 // store mapping info
801                 RuntimeHashadd(pointertbl, orig->ptr, to->ptr); 
802                 to->ptr += isize;
803                 to->offset += isize;
804                 to->top += isize;
805         } 
806         // move to next obj
807         orig->ptr += size;
808         if(orig->ptr == orig->blockbound) {
809                 nextSBlock(orig);
810         }
811         return false;
812 }
813
814 void migrateobjs(struct moveHelper * orig) {
815         int num_dsts = cinstruction->movenum;
816         while(num_dsts > 0) {
817                 while(!gctomove) {}
818                 // start moving objects to other cores
819                 gctomove = false;
820                 struct moveHelper * into = 
821                         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
822                 for(int j = 0; j < cinstruction->movenum; j++) {
823                         if(cinstruction->moveflag[j] == 1) {
824                                 // can start moving to corresponding core
825                                 int dst = cinstruction->dsts[j];
826                                 num_dsts--;
827                                 into->ptr = cinstruction->startaddrs[j];
828                                 BLOCKINDEX(into->ptr, &(into->numblocks));
829                                 into->bound = (into->numblocks==0)?
830                                         BAMBOO_SMEM_SIZE_L:
831                                         BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*into->numblocks;
832                                 BASEPTR(BAMBOO_NUM_OF_CORE, into->numblocks, &(into->base));
833                                 into->offset = into->ptr - into->base;
834                                 into->top = (into->numblocks==0)?
835                                         (into->offset):(into->bound-BAMBOO_SMEM_SIZE+into->offset);
836                                 into->base = into->ptr;
837                                 into->offset = BAMBOO_CACHE_LINE_SIZE;
838                                 into->ptr += into->offset; // for header
839                                 into->top += into->offset;
840                                 int endaddr = into->top + cinstruction->endaddrs[j];
841                                 do {
842                                         bool stop = moveobj(orig, into, &endaddr);
843                                         if(stop) {
844                                                 // all objs before endaddr have been moved
845                                                 // STOP the loop
846                                                 break;
847                                         }                                                       
848                                 } while(orig->ptr < markedptrbound + 1);
849                                 // set the flag indicating move finished
850                                 cinstruction->moveflag[j] = 2; 
851                                 // fill the header of this blockk
852                                 (*((int*)(into->base))) = into->offset;
853                         } // if(cinstruction->moveflag[j] == 1)
854                 } // for(int j = 0; j < cinstruction->movenum; j++)
855                 RUNFREE(into);
856         } // while(num_dsts > 0)
857 }
858
859 void compact() {
860         if(COMPACTPHASE != gcphase) {
861                 BAMBOO_EXIT(0xb003);
862         }
863
864         INTPTR heaptopptr = 0;
865
866         // initialize pointers for comapcting
867         struct moveHelper * orig = 
868                 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
869         struct moveHelper * to = 
870                 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
871         to->numblocks = 0;
872         to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
873         to->bound = BAMBOO_SMEM_SIZE_L;
874         BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
875         to->ptr = to->base + to->offset;
876         orig->numblocks = 0;
877         orig->ptr = to->ptr;
878         orig->base = to->base;
879         orig->bound = to->bound;
880         orig->blockbase = to->base;
881         orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
882
883         // scan over all objs in this block, compact those scheduled to 
884         // reside on this core
885         // loop stop when finishing either scanning all active objs or moving
886         // all objs to reside on this core
887         int endaddr = cinstruction->loads;
888         do {
889                 bool stop = moveobj(orig, to, &endaddr);
890                 curr_heaptop = to->top;
891                 curr_heapbound = to->bound;
892                 if(stop && (cinstruction->movenum != 0)) {
893                         // all objs to reside on this core have been moved
894                         // the remainging objs should be moved to other cores
895                         // STOP the loop
896                         break;
897                 }
898         } while(orig->ptr < markedptrbound + 1); 
899         // fill the header of this block
900         (*((int*)(to->base))) = to->offset;
901         heaptopptr = to->ptr;
902
903         // move objs
904         if(cinstruction->movenum != 0) {
905                 if(cinstruction->ismove) {
906                         // have objs to move to other cores
907                         migrateobjs(orig);
908
909                         // might still have objs left, compact them to this core
910                         // leave space for header
911                         if(orig->ptr < markedptrbound + 1) {
912                                 if(to->top + BAMBOO_CACHE_LINE_SIZE > to->bound) {
913                                         // fill the left part of current block
914                                         memset(to->top, -2, to->bound - to->top);
915                                         // go to next block
916                                         nextBlock(to);
917                                 } else {
918                                         to->top += BAMBOO_CACHE_LINE_SIZE; // for header
919                                         to->offset = BAMBOO_CACHE_LINE_SIZE;
920                                         to->base = to->ptr;
921                                         to->ptr += BAMBOO_CACHE_LINE_SIZE;
922                                 }
923                                 while(orig->ptr < markedptrbound + 1) {
924                                         moveobj(orig, to, NULL);
925                                         curr_heaptop = to->top;
926                                         curr_heapbound = to->bound;
927                                 }
928                                 // fill the header of this blockk
929                                 (*((int*)(to->base))) = to->offset;
930                         }
931                         heaptopptr = to->ptr;
932                 } else {
933                         // have incoming objs, send messages to corresponding cores 
934                         // to start moving
935                         INTPTR startaddr = 0;
936                         INTPTR endaddr = 0;
937                         int heapptr = curr_heapptr;
938                         int top = curr_heaptop;
939                         int bound = curr_heapbound;
940                         for(int j = 0; j < cinstruction->movenum; j++) {
941                                 startaddr = heapptr;
942                                 top = top+cinstruction->size2move[j]+BAMBOO_CACHE_LINE_SIZE;
943                                 if(top > bound) {
944                                         // will cross block boundary
945                                         int numb = (top - bound) / BAMBOO_SMEM_SIZE + 1;
946                                         top += numb * BAMBOO_CACHE_LINE_SIZE;
947                                         BASEPTR(BAMBOO_NUM_OF_CORE, numblocks + numb, &endaddr);
948                                         endaddr += 
949                                                 (top-bound)%BAMBOO_SMEM_SIZE+BAMBOO_CACHE_LINE_SIZE;
950                                         heapptr = endaddr;
951                                         bound += BAMBOO_SMEM_SIZE * numb;
952                                 } else {
953                                         endaddr = 
954                                                 heapptr+cinstruction->size2move[j]+BAMBOO_CACHE_LINE_SIZE;
955                                         heapptr = endaddr;
956                                 }
957                                 send_msg_4(cinstruction->dsts[j], GCMOVESTART, 
958                                                        BAMBOO_NUM_OF_CORE, startaddr, 
959                                                                          cinstruction->size2move[j]);
960                         }
961                         heaptopptr = heapptr;
962                 } // if(cinstruction->ismove) 
963         } // if(cinstruction->movenum != 0)
964         
965         // TODO large obj
966         /*
967         if((cinstruction != NULL) && (cinstruction->largeobjs != NULL)) {
968                 // move all large objects
969                 do {
970                         // dequeue the first large obj
971                         struct largeObjItem * loi = cinstruction->largeobjs;
972                         cinstruction->largeobjs = loi->next;
973                         // move this large obj
974                         memcpy(loi->dst, loi->orig, loi->length);
975                         RuntimeHashadd(pointertbl, loi->orig, loi->dst);
976                         RUNFREE(loi);
977                 }while(cinstruction->largeobjs != NULL);
978         }*/
979         // send compact finish message to core coordinator
980         send_msg_3(STARTUPCORE, GCFINISHCOMPACT, 
981                                BAMBOO_NUM_OF_CORE, to->ptr);
982
983         RUNFREE(orig);
984         RUNFREE(to);
985 } // compact()
986
987 void flush() {
988         while(gc_moreItems()) {
989                 voit * ptr = gc_dequeue();
990                 int type = ((int *)(ptr))[0];
991                 // scan all pointers in ptr
992                 unsigned INTPTR * pointer;
993                 pointer=pointerarray[type];
994                 if (pointer==0) {
995                         /* Array of primitives */
996                         /* Do nothing */
997                 } else if (((INTPTR)pointer)==1) {
998                         /* Array of pointers */
999                         struct ArrayObject *ao=(struct ArrayObject *) ptr;
1000                         int length=ao->___length___;
1001                         int j;
1002                         for(j=0; j<length; j++) {
1003                                 void *objptr=
1004                                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1005                                 // change to new address
1006                                 void *dstptr = NULL;
1007                                 RuntimeHashget(pointertbl, objptr, &dstptr);
1008                                 if(NULL == dstptr) {
1009                                         // send msg to host core for the mapping info
1010                                         obj2map = (int)objptr;
1011                                         ismapped = false;
1012                                         mappedobj = NULL;
1013                                         send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
1014                                                                BAMBOO_NUM_OF_CORE);
1015                                         while(!ismapped) {}
1016                                         RuntimeHashget(pointertbl, objptr, &dstptr);
1017                                 }
1018                                 ((void **)(((char *)&ao->___length___)+sizeof(int)))[j]=dstptr;
1019                         }
1020                 } else {
1021                         INTPTR size=pointer[0];
1022                         int i;
1023                         for(i=1; i<=size; i++) {
1024                                 unsigned int offset=pointer[i];
1025                                 void * objptr=*((void **)(((char *)ptr)+offset));
1026                                 // change to new address
1027                                 void *dstptr = NULL;
1028                                 RuntimeHashget(pointertbl, objptr, &dstptr);
1029                                 if(NULL == dstptr) {
1030                                         // send msg to host core for the mapping info
1031                                         obj2map = (int)objptr;
1032                                         ismapped = false;
1033                                         mappedobj = NULL;
1034                                         send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
1035                                                                BAMBOO_NUM_OF_CORE);
1036                                         while(!ismapped) {}
1037                                         RuntimeHashget(pointertbl, objptr, &dstptr);
1038                                 }
1039                                 *((void **)(((char *)ptr)+offset)) = dstptr;
1040                         }
1041                 }
1042         } // while(moi != NULL)
1043         // send flush finish message to core coordinator
1044         send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE);
1045 } // flush()
1046
1047 void gc_collect(struct garbagelist * stackptr) {
1048         // core collector routine
1049         mark(true, stackptr);
1050         compact();
1051         while(FLUSHPHASE != gcphase) {}
1052         flush();
1053
1054         while(FINISHPHASE != gcphase) {}
1055 }
1056
1057 #endif