From 73b1f4b8d531caef574e78431481837a7490bcd3 Mon Sep 17 00:00:00 2001 From: jzhou Date: Thu, 20 Aug 2009 01:29:31 +0000 Subject: [PATCH] finish gc codes and fix some bug --- Robust/src/IR/Flat/BuildCode.java | 10 +- Robust/src/Main/Main.java | 10 +- Robust/src/Runtime/RAW/task_arch.c | 52 --- Robust/src/Runtime/multicoregarbage.c | 640 ++++++++++++++++++-------- Robust/src/Runtime/multicoregarbage.h | 24 +- Robust/src/Runtime/multicoreruntime.c | 4 + Robust/src/Runtime/multicoreruntime.h | 26 +- Robust/src/Runtime/multicoretask.c | 156 +++++-- Robust/src/buildscript | 19 +- 9 files changed, 631 insertions(+), 310 deletions(-) diff --git a/Robust/src/IR/Flat/BuildCode.java b/Robust/src/IR/Flat/BuildCode.java index 240e6c5f..d0db3db7 100644 --- a/Robust/src/IR/Flat/BuildCode.java +++ b/Robust/src/IR/Flat/BuildCode.java @@ -594,7 +594,7 @@ public class BuildCode { } else { outclassdefs.println(" int version;"); outclassdefs.println(" int * lock;"); // lock entry for this obj - outclassdefs.println(" void * mutex;"); + outclassdefs.println(" int mutex;"); outclassdefs.println(" int lockcount;"); if(state.MULTICOREGC) { outclassdefs.println(" int marked;"); @@ -1303,13 +1303,13 @@ public class BuildCode { if((!state.MULTICORE) || (cn.getSymbol().equals("TagDescriptor"))) { classdefout.println(" void * flagptr;"); } else if (state.MULTICORE) { - if(state.MULTICOREGC) { - classdefout.println(" int marked;"); - } classdefout.println(" int version;"); classdefout.println(" int * lock;"); // lock entry for this obj - classdefout.println(" void * mutex;"); + classdefout.println(" int mutex;"); classdefout.println(" int lockcount;"); + if(state.MULTICOREGC) { + classdefout.println(" int marked;"); + } } if (state.OPTIONAL) { classdefout.println(" int numfses;"); diff --git a/Robust/src/Main/Main.java b/Robust/src/Main/Main.java index 6df19dfc..b1b34092 100644 --- a/Robust/src/Main/Main.java +++ b/Robust/src/Main/Main.java @@ -401,16 +401,16 @@ public class Main { if(isDistributeInfo) { mcImplSynthesis.distribution(isDisAll, startnum); } else { - double timeStartAnalysis = (double) System.nanoTime(); + //double timeStartAnalysis = (double) System.nanoTime(); mcImplSynthesis.setScheduleThreshold(20); mcImplSynthesis.setProbThreshold(0); mcImplSynthesis.setGenerateThreshold(30); Vector scheduling = mcImplSynthesis.synthesis(); - double timeEndAnalysis = (double) System.nanoTime(); - double dt = (timeEndAnalysis - timeStartAnalysis)/(Math.pow( 10.0, 9.0 ) ); - System.err.println("The analysis took" + dt + "sec."); - System.exit(0); + //double timeEndAnalysis = (double) System.nanoTime(); + //double dt = (timeEndAnalysis - timeStartAnalysis)/(Math.pow( 10.0, 9.0 ) ); + //System.err.println("The analysis took" + dt + "sec."); + //System.exit(0); // generate multicore codes if(state.MULTICORE) { diff --git a/Robust/src/Runtime/RAW/task_arch.c b/Robust/src/Runtime/RAW/task_arch.c index 7751ece9..40c42655 100644 --- a/Robust/src/Runtime/RAW/task_arch.c +++ b/Robust/src/Runtime/RAW/task_arch.c @@ -838,9 +838,7 @@ bool getreadlock(void * ptr) { return true; } -void releasewritelock_r(void * lock, void * redirectlock); bool getreadlock_I_r(void * ptr, void * redirectlock, int core, bool cache); - bool getwritelock_I_r(void* lock, void* redirectlock, int core, bool cache); void releasereadlock(void * ptr) { @@ -1063,56 +1061,6 @@ void releasewritelock(void * ptr) { } } -void releasewritelock_r(void * lock, void * redirectlock) { - int targetcore = 0; - int reallock = (int)lock; - targetcore = (reallock >> 5) % BAMBOO_TOTALCORE; - -#ifdef DEBUG - BAMBOO_DEBUGPRINT(0xe671); - BAMBOO_DEBUGPRINT_REG((int)lock); - BAMBOO_DEBUGPRINT_REG(reallock); - BAMBOO_DEBUGPRINT_REG(targetcore); -#endif - - if(targetcore == BAMBOO_NUM_OF_CORE) { - BAMBOO_START_CRITICAL_SECTION_LOCK(); -#ifdef DEBUG - BAMBOO_DEBUGPRINT(0xf001); -#endif - // reside on this core - if(!RuntimeHashcontainskey(locktbl, reallock)) { - // no locks for this object, something is wrong - BAMBOO_EXIT(0xa01d); - } else { - int rwlock_obj = 0; - struct LockValue * lockvalue = NULL; -#ifdef DEBUG - BAMBOO_DEBUGPRINT(0xe672); -#endif - RuntimeHashget(locktbl, reallock, &rwlock_obj); - lockvalue = (struct LockValue *)rwlock_obj; -#ifdef DEBUG - BAMBOO_DEBUGPRINT_REG(lockvalue->value); -#endif - lockvalue->value++; - lockvalue->redirectlock = (int)redirectlock; -#ifdef DEBUG - BAMBOO_DEBUGPRINT_REG(lockvalue->value); -#endif - } - BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(); -#ifdef DEBUG - BAMBOO_DEBUGPRINT(0xf000); -#endif - return; - } else { - // send lock release with redirect info msg - // for 32 bit machine, the size is always 4 words - send_msg_4(targetcore, REDIRECTRELEASE, 1, (int)lock, (int)redirectlock); - } -} - bool getwritelock_I(void * ptr) { int targetcore = 0; lockobj = (int)ptr; diff --git a/Robust/src/Runtime/multicoregarbage.c b/Robust/src/Runtime/multicoregarbage.c index d609d2a8..22a6783e 100644 --- a/Robust/src/Runtime/multicoregarbage.c +++ b/Robust/src/Runtime/multicoregarbage.c @@ -250,7 +250,18 @@ inline void transferMarkResults() { BAMBOO_DEBUGPRINT(0xffff); #endif } // if(isMsgSending) -} // void transferMarkResults() +} // void transferMarkResults() + +inline bool gc_checkCoreStatus() { + bool allStall = true; + for(int i = 0; i < NUMCORES; ++i) { + if(gccorestatus[i] != 0) { + allStall = false; + break; + } // if(gccorestatus[i] != 0) + } // for(i = 0; i < NUMCORES; ++i) + return allStall; +} inline void checkMarkStatue() { if((!waitconfirm) || @@ -260,13 +271,7 @@ inline void checkMarkStatue() { gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs; gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs; // check the status of all cores - bool allStall = true; - for(i = 0; i < NUMCORES; ++i) { - if(gccorestatus[i] != 0) { - allStall = false; - break; - } // if(gccorestatus[i] != 0) - } // for(i = 0; i < NUMCORES; ++i) + bool allStall = gc_checkCoreStatus(); if(allStall) { // check if the sum of send objs and receive obj are the same // yes->check if the info is the latest; no->go on executing @@ -344,14 +349,58 @@ inline bool preGC() { } // if((!waitconfirm) || } // bool preGC() +inline void initGC() { + for(i = 0; i < NUMCORES; ++i) { + gccorestatus[i] = 1; + gcnumsendobjs[i] = 0; + gcnumreceiveobjs[i] = 0; + gcloads[i] = 0; + gcrequiredmems[i] = 0; + gcfilledblocks[i] = 0; + gcstopblock[i] = 0; + } // for(i = 0; i < NUMCORES; ++i) + gcself_numsendobjs = 0; + gcself_numreceiveobjs = 0; + gcmarkedptrbound = 0; + gcobj2map = 0; + gcmappedobj = 0; + gcismapped = false; + gcnumlobjs = 0; + gcheaptop = 0; + gctopcore = 0; + gcheapdirection = 1; + gcreservedsb = 0; + gcmovestartaddr = 0; + gctomove = false; + gcblock2fill = 0; + gcmovepending = 0; + + // initialize queue + if (gchead==NULL) { + gcheadindex=0; + gctailindex=0; + gctailindex2 = 0; + gchead=gctail=gctail2=malloc(sizeof(struct pointerblock)); + } + // initialize the large obj queues + if (gclobjhead==NULL) { + gclobjheadindex=0; + gclobjtailindex=0; + gclobjtailindex2 = 0; + gclobjhead=gclobjtail=gclobjtail2= + malloc(sizeof(struct lobjpointerblock)); + } +} // void initGC() + // compute load balance for all cores inline int loadbalance(int heaptop) { // compute load balance int i; // get the total loads - gcloads[0]+=BAMBOO_SMEM_SIZE*gcreservedsb;//reserved sblocks for sbstartbl - int tloads = gcloads[0]; + gcloads[STARTUPCORE]+= + BAMBOO_SMEM_SIZE*gcreservedsb;//reserved sblocks for sbstartbl + int tloads = gcloads[STARTUPCORE]; for(i = 1; i < NUMCORES; i++) { tloads += gcloads[i]; } @@ -409,8 +458,22 @@ inline bool cacheLObjs() { inline void moveLObjs() { // find current heap top + // flush all gcloads to indicate the real heap top on one core + // previous it represents the next available ptr on a core + if((gcloads[0] > BAMBOO_BASE_VA+BAMBOO_SMEM_SIZE_L) + && (gcloads[0] % BAMBOO_SMEM_SIZE == 0)) { + // edge of a block, check if this is exactly the heaptop + BASEPTR(0, gcfilledblocks[0]-1, &gcloads[0]); + gcloads[0]+=(gcfilledblocks[0]>1?BAMBOO_SMEM_SIZE:BAMBOO_SMEM_SIZE_L); + } int tmpheaptop = gcloads[0]; for(int i = 1; i < NUMCORES; i++) { + if((gcloads[i] > BAMBOO_BASE_VA+BAMBOO_SMEM_SIZE_L) + && (gcloads[i] % BAMBOO_SMEM_SIZE == 0)) { + // edge of a block, check if this is exactly the heaptop + BASEPTR(0, gcfilledblocks[i]-1, &gcloads[i]); + gcloads[i]+=(gcfilledblocks[i]>1?BAMBOO_SMEM_SIZE:BAMBOO_SMEM_SIZE_L); + } if(tmpheaptop < gcloads[i]) { tmpheaptop = gcloads[i]; } @@ -470,7 +533,7 @@ inline void moveLObjs() { inline void updateFreeMemList() { int i = 0; - int tmptop = gcloads[0]; + int tmptop = gcloads[0]; struct freeMemItem * tochange = bamboo_free_mem_list->head; if(tochange == NULL) { bamboo_free_mem_list->head = tochange = @@ -507,137 +570,6 @@ inline void updateFreeMemList() { bamboo_free_mem_list->tail = tochange; } // void updateFreeMemList() -inline void gc(struct garbagelist * stackptr) { - // check if do gc - if(!gcflag) { - return; - } - - // core coordinator routine - if(0 == BAMBOO_NUM_OF_CORE) { - if(!preGC()) { - // not ready to do gc - gcflag = true; - return; - } - - gcprocessing = true; - int i = 0; - waitconfirm = false; - waitconfirm = 0; - gcphase = MARKPHASE; - for(i = 1; i < NUMCORES - 1; i++) { - // send GC start messages to all cores - send_msg_1(i, GCSTART); - } - bool isfirst = true; - bool allStall = false; - - // mark phase - while(MARKPHASE == gcphase) { - mark(isfirst, stackptr); - if(isfirst) { - isfirst = false; - } - - // check gcstatus - checkMarkStatue(); - } // while(MARKPHASE == gcphase) - // send msgs to all cores requiring large objs info - numconfirm = NUMCORES - 1; - for(i = 1; i < NUMCORES; ++i) { - send_msg_1(i, GCLOBJREQUEST); - } - while(numconfirm != 0) {} // wait for responses - if(!cacheLObjs()) { - // no enough space to cache large objs - BAMBOO_EXIT(0xd001); - } - int numpbc = loadbalance(); - - if((gcheapdirection) && (0 <= gctopcore) - || ((!gcheapdirection) && (0 == gctopcore))) { - gcstopblock = numpbc + 1; - } else { - gcstopblock = numpbc; - } - for(i = 1; i < NUMCORES; ++i) { - //send start compact messages to all cores - if((gcheapdirection) && (i <= gctopcore) - || ((!gcheapdirection) && (i >= gctopcore))) { - send_msg_2(i, GCSTARTCOMPACT, numpbc+1); - } else { - send_msg_2(i, GCSTARTCOMPACT, numpbc); - } - } - - // compact phase - compact(); - gccorestatus[BAMBOO_NUM_OF_CORE] = 0; - while(COMPACTPHASE == gcphase) { - // check the status of all cores - allStall = true; - for(i = 0; i < NUMCORES; ++i) { - if(gccorestatus[i] != 0) { - allStall = false; - break; - } - } - if(allStall) { - // restore the gcstatus of all cores - for(i = 0; i < NUMCORES; ++i) { - gccorestatus[i] = 1; - } - break; - } - } // while(COMPACTPHASE == gcphase) - // move largeObjs - moveLObjs(); - - gcphase = FLUSHPHASE; - for(i = 1; i < NUMCORES; ++i) { - // send start flush messages to all cores - send_msg_1(i, GCSTARTFLUSH); - } - - // flush phase - flush(); - gccorestatus[BAMBOO_NUM_OF_CORE] = 0; - while(FLUSHPHASE == gcphase) { - // check the status of all cores - allStall = true; - for(i = 0; i < NUMCORES; ++i) { - if(gccorestatus[i] != 0) { - allStall = false; - break; - } - } - if(allStall) { - break; - } - } // while(FLUSHPHASE == gcphase) - gcphase = FINISHPHASE; - for(i = 1; i < NUMCORES; ++i) { - // send gc finish messages to all cores - send_msg_1(i, GCFINISH); - } - - // need to create free memory list - updateFreeMemList(); - } else { - gcprocessing = true; - gc_collect(stackptr); - } - - // invalidate all shared mem pointers - bamboo_cur_msp = NULL; - bamboo_smem_size = 0; - - gcflag = false; - gcprocessing = false; - -} // void gc(struct garbagelist * stackptr) - // enqueue root objs inline void tomark(struct garbagelist * stackptr) { if(MARKPHASE != gcphase) { @@ -790,6 +722,126 @@ inline void mark(bool isfirst, } // while(MARKPHASE == gcphase) } // mark() +inline void compact2Heaptop() { + // no cores with spare mem and some cores are blocked with pending move + // find the current heap top and make them move to the heap top + int p; + if(gcheapdirection) { + gctopcore++; + } else { + gctopcore--; + } + int numblocks = gcfilledblocks[gctopcore]; + BASEPTR(gctopcore, numblocks, &p); + int b; + BLOCKINDEX(p, &b); + int remain = b 0)) { + int memneed = gcrequiredmems[i] + BAMBOO_CACHE_LINE_SIZE; + if(STARTUPCORE == i) { + gctomove = true; + gcmovestartaddr = p; + gcdstcore = gctopcore; + gcblock2fill = numblocks + 1; + } else { + send_msg_4(i, GCMOVESTART, gctopcore, p, numblocks + 1); + } + if(memneed < remain) { + p += memneed; + gcrequiredmems[i] = 0; + gcmovepending--; + gcloads[gctopcore] += memneed; + } else { + // next available block + p += remain; + gcfilledblocks[gctopcore] += 1; + int newbase = 0; + BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase); + gcloads[gctopcore] = newbase; + gcrequiredmems[i] -= remain - BAMBOO_CACHE_LINE_SIZE; + gcstopblock[gctopcore]++; + if(gcheapdirection) { + gctopcore++; + } else { + gctopcore--; + } + numblocks = gcstopblock[gctopcore]; + BASEPTR(gctopcore, numblocks, &p); + BLOCKINDEX(p, &p); + remain = b 0)) + } // for(i = 0; i < NUMCORES; i++) +} // void compact2Heaptop() + +inline void resolvePendingMoveRequest() { + int i; + int j; + bool nosparemem = true; + bool haspending = false; + bool hasrunning = false; + bool noblock = false; + int dstcore = 0; + int sourcecore = 0; + for(i = j = 0; (i < NUMCORES) && (j < NUMCORES);) { + if(nosparemem) { + // check if there are cores with spare mem + if(gccorestatus[i] == 0) { + // finished working, check if it still have spare mem + if(gcfilledblocks[i] < gcstopblock[i]) { + // still have spare mem + nosparemem = false; + dstcore = i; + } else { + i++; + } // if(gcfilledblocks[i] < gcstopblock[i]) else ... + } + } // if(nosparemem) + if(!haspending) { + if(gccorestatus[j] != 0) { + // not finished, check if it has pending move requests + if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) { + sourcecore = j; + haspending = true; + } else { + j++; + hasrunning = true; + } // if((gcfilledblocks[i] == gcstopblock[i])...) else ... + } // if(gccorestatus[i] == 0) else ... + } // if(!haspending) + if(!nosparemem && haspending) { + // find match + int tomove = 0; + int startaddr = 0; + gcrequiredmems[dstcore] = assignSpareMem(sourcecore, + gcrequiredmems[dstcore], + &tomove, + &startaddr); + if(STARTUPCORE == dstcore) { + gcdstcore = sourcecore; + gctomove = true; + gcmovestartaddr = startaddr; + gcblock2fill = tomove; + } else { + send_msg_4(dstcore, GCMOVESTART, sourcecore, startaddr, tomove); + } + if(gcrequiredmems[dstcore] == 0) { + gcmovepending--; + } + nosparemem = true; + haspending = false; + noblock = true; + } + } // for(i = 0; i < NUMCORES; i++) + + if(!hasrunning && !noblock) { + gcphase = SUBTLECOMPACTPHASE; + compact2Heaptop(); + } + +} // void resovePendingMoveRequest() + struct moveHelper { int numblocks; // block num for heap INTPTR base; // base virtual address of current heap block @@ -951,99 +1003,110 @@ innermoveobj: return false; } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr) +inline int assignSpareMem(int sourcecore, + int * requiredmem, + int * tomove, + int * startaddr) { + int b = 0; + BLOCKINDEX(gcloads[sourcecore], &b); + int boundptr = bptr < gcmarkedptrbound); // fill the header of this block (*((int*)(to->base))) = to->offset; - heaptopptr = to->ptr; + if(*localcompact) { + *heaptopptr = to->ptr; + *filledblocks = to->numblocks; + } // send msgs to core coordinator indicating that the compact is finishing // send compact finish message to core coordinator if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { - gcnumblocks[0] = to->numblocks; + gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks; + gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr; if(orig->ptr < gcmarkedptrbound) { // ask for more mem gctomove = false; - if(findSpareMem(&gcmovestartaddr, &gcstopblock, curr_heaptop)) { + if(findSpareMem(&gcmovestartaddr, &gcblock2fill, &gcdstcore, + curr_heaptop, BAMBOO_NUM_OF_CORE)) { gctomove = true; } else { - // TODO hold the request + return false; } } else { gccorestatus[BAMBOO_NUM_OF_CORE] = 0; - gcloads[BAMBOO_NUM_OF_CORE] = to->ptr; + return true; } } else { if(orig->ptr < gcmarkedptrbound) { // ask for more mem gctomove = false; send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, - to->numblocks, 0, curr_heaptop); + *filledblocks, *heaptopptr, curr_heaptop); } else { // finish compacting send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, - to->numblocks, 1, to->ptr); + *filledblocks, *heaptopptr, 0); } } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) if(orig->ptr < gcmarkedptrbound) { // still have unpacked obj while(!gctomove) {}; + gctomove = false; to->ptr = gcmovestartaddr; - to->numblocks = gcstopblock - 1; + to->numblocks = gcblock2fill - 1; to->bound = (to->numblocks==0)? BAMBOO_SMEM_SIZE_L: BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks; @@ -1055,9 +1118,33 @@ innercompact: to->offset = BAMBOO_CACHE_LINE_SIZE; to->ptr += to->offset; // for header to->top += to->offset; + if(gcdstcore == BAMBOO_NUM_OF_CORE) { + *localcompact = true; + } else { + *localcompact = false; + } goto innercompact; } - // TODO finish? + return true; +} // void compacthelper() + +inline void compact() { + if(COMPACTPHASE != gcphase) { + BAMBOO_EXIT(0xb003); + } + + // initialize pointers for comapcting + struct moveHelper * orig = + (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper)); + struct moveHelper * to = + (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper)); + + initOrig_Dst(orig, to); + + int filledblocks = 0; + INTPTR heaptopptr = 0; + bool localcompact = true; + compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact); RUNFREE(orig); RUNFREE(to); @@ -1135,4 +1222,183 @@ inline void gc_collect(struct garbagelist * stackptr) { while(FINISHPHASE != gcphase) {} } // void gc_collect(struct garbagelist * stackptr) +inline void gc(struct garbagelist * stackptr) { + // check if do gc + if(!gcflag) { + return; + } + + // core coordinator routine + if(0 == BAMBOO_NUM_OF_CORE) { + if(!preGC()) { + // not ready to do gc + gcflag = true; + return; + } + + initGC(); + + gcprocessing = true; + int i = 0; + waitconfirm = false; + waitconfirm = 0; + gcphase = MARKPHASE; + for(i = 1; i < NUMCORES - 1; i++) { + // send GC start messages to all cores + send_msg_1(i, GCSTART); + } + bool isfirst = true; + bool allStall = false; + + // mark phase + while(MARKPHASE == gcphase) { + mark(isfirst, stackptr); + if(isfirst) { + isfirst = false; + } + + // check gcstatus + checkMarkStatue(); + } // while(MARKPHASE == gcphase) + // send msgs to all cores requiring large objs info + numconfirm = NUMCORES - 1; + for(i = 1; i < NUMCORES; ++i) { + send_msg_1(i, GCLOBJREQUEST); + } + while(numconfirm != 0) {} // wait for responses + // cache all large objs + if(!cacheLObjs()) { + // no enough space to cache large objs + BAMBOO_EXIT(0xd001); + } + // predict number of blocks to fill for each core + int numpbc = loadbalance(); + for(i = 0; i < NUMCORES; ++i) { + //send start compact messages to all cores + if((gcheapdirection) && (i < gctopcore) + || ((!gcheapdirection) && (i > gctopcore))) { + gcstopblock[i] =numpbc + 1; + if(i != STARTUPCORE) { + send_msg_2(i, GCSTARTCOMPACT, numpbc+1); + } + } else { + gcstopblock[i] = numpbc; + if(i != STARTUPCORE) { + send_msg_2(i, GCSTARTCOMPACT, numpbc); + } + } + // init some data strutures for compact phase + gcloads[i] = 0; + gcfilledblocks[i] = 0; + gcrequiredmems[i] = 0; + } + + // compact phase + bool finalcompact = false; + // initialize pointers for comapcting + struct moveHelper * orig = + (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper)); + struct moveHelper * to = + (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper)); + initOrig_Dst(orig, to); + int filledblocks = 0; + INTPTR heaptopptr = 0; + bool finishcompact = false; + bool iscontinue = true; + while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) { + if((!finishcompact) && iscontinue) { + finishcompact = compacthelper(orig, to, &filledblocks, + &heaptopptr, &localcompact); + } + + if(gc_checkCoreStatus()) { + // all cores have finished compacting + // restore the gcstatus of all cores + for(i = 0; i < NUMCORES; ++i) { + gccorestatus[i] = 1; + } + break; + } else { + // check if there are spare mem for pending move requires + if(COMPACTPHASE == gcphase) { + resolvePendingMoveRequest(); + } else { + compact2Heaptop(); + } + } // if(gc_checkCoreStatus()) else ... + + if(gctomove) { + to->ptr = gcmovestartaddr; + to->numblocks = gcblock2fill - 1; + to->bound = (to->numblocks==0)? + BAMBOO_SMEM_SIZE_L: + BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks; + BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base)); + to->offset = to->ptr - to->base; + to->top = (to->numblocks==0)? + (to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset); + to->base = to->ptr; + to->offset = BAMBOO_CACHE_LINE_SIZE; + to->ptr += to->offset; // for header + to->top += to->offset; + if(gcdstcore == BAMBOO_NUM_OF_CORE) { + *localcompact = true; + } else { + *localcompact = false; + } + gctomove = false; + iscontinue = true; + } else if(!finishcompact) { + // still pending + iscontinue = false; + } // if(gctomove) + + } // while(COMPACTPHASE == gcphase) + // move largeObjs + moveLObjs(); + + gcphase = FLUSHPHASE; + for(i = 1; i < NUMCORES; ++i) { + // send start flush messages to all cores + send_msg_1(i, GCSTARTFLUSH); + } + + // flush phase + flush(); + gccorestatus[BAMBOO_NUM_OF_CORE] = 0; + while(FLUSHPHASE == gcphase) { + // check the status of all cores + allStall = true; + for(i = 0; i < NUMCORES; ++i) { + if(gccorestatus[i] != 0) { + allStall = false; + break; + } + } + if(allStall) { + break; + } + } // while(FLUSHPHASE == gcphase) + gcphase = FINISHPHASE; + for(i = 1; i < NUMCORES; ++i) { + // send gc finish messages to all cores + send_msg_1(i, GCFINISH); + } + + // need to create free memory list + updateFreeMemList(); + } else { + gcprocessing = true; + gc_collect(stackptr); + } + + // invalidate all shared mem pointers + bamboo_cur_msp = NULL; + bamboo_smem_size = 0; + + gcflag = false; + gcprocessing = false; + +} // void gc(struct garbagelist * stackptr) + #endif diff --git a/Robust/src/Runtime/multicoregarbage.h b/Robust/src/Runtime/multicoregarbage.h index 5908fae9..f081e765 100644 --- a/Robust/src/Runtime/multicoregarbage.h +++ b/Robust/src/Runtime/multicoregarbage.h @@ -3,12 +3,7 @@ #include "Queue.h" // data structures for GC -#define BAMBOO_NUM_PAGES 1024 * 512 -#define BAMBOO_PAGE_SIZE 4096 -#define BAMBOO_SHARED_MEM_SIZE BAMBOO_PAGE_SIZE * BAMBOO_NUM_PAGES -#define BAMBOO_BASE_VA 0xd000000 -#define BAMBOO_SMEM_SIZE 16 * BAMBOO_PAGE_SIZE -#define BAMBOO_SMEM_SIZE_L 512 * BAMBOO_PAGE_SIZE +#define BAMBOO_SMEM_SIZE_L 32 * BAMBOO_SMEM_SIZE #define BAMBOO_LARGE_SMEM_BOUND BAMBOO_SMEM_SIZE_L*NUMCORES // NUMCORES=62 #define NUMPTRS 100 @@ -58,12 +53,13 @@ int gclobjtailindex2=0; struct lobjpointerblock *gclobjspare=NULL; int gcnumlobjs = 0; -enum GCPHASETYPE { +typedef enum { MARKPHASE = 0x0, // 0x0 COMPACTPHASE, // 0x1 - FLUSHPHASE, // 0x2 - FINISHPHASE // 0x3 -}; + SUBTLECOMPACTPHASE,// 0x2 + FLUSHPHASE, // 0x3 + FINISHPHASE // 0x4 +} GCPHASETYPE; volatile bool gcflag; volatile bool gcprocessing; @@ -86,11 +82,15 @@ bool gcheapdirection; // 0: decrease; 1: increase // compact instruction INTPTR gcmarkedptrbound; -int gcstopblock; // indicate when to stop compact phase -int gcnumblocks[NUMCORES]; // indicate how many blocks have been fulfilled +int gcblock2fill; +int gcstopblock[NUMCORES]; // indicate when to stop compact phase +int gcfilledblocks[NUMCORES]; //indicate how many blocks have been fulfilled // move instruction; INTPTR gcmovestartaddr; +int gcdstcore; bool gctomove; +int gcrequiredmems[NUMCORES]; //record pending mem requests +int gcmovepending; // mapping of old address to new address struct RuntimeHash * gcpointertbl; diff --git a/Robust/src/Runtime/multicoreruntime.c b/Robust/src/Runtime/multicoreruntime.c index 0e84d7e3..3b1c9cf4 100644 --- a/Robust/src/Runtime/multicoreruntime.c +++ b/Robust/src/Runtime/multicoreruntime.c @@ -194,6 +194,7 @@ void * allocate_new(void * ptr, int type) { v->type=type; v->version = 0; v->lock = NULL; + initlock(v); return v; } @@ -208,6 +209,7 @@ struct ArrayObject * allocate_newarray(void * ptr, int type, int length) { return NULL; } v->___length___=length; + initlock(v); return v; } @@ -218,6 +220,7 @@ void * allocate_new(int type) { v->version = 0; //v->numlocks = 0; v->lock = NULL; + initlock(v); return v; } @@ -230,6 +233,7 @@ struct ArrayObject * allocate_newarray(int type, int length) { //v->numlocks = 0; v->lock = NULL; v->___length___=length; + initlock(v); return v; } #endif diff --git a/Robust/src/Runtime/multicoreruntime.h b/Robust/src/Runtime/multicoreruntime.h index 15255eaf..64cae114 100644 --- a/Robust/src/Runtime/multicoreruntime.h +++ b/Robust/src/Runtime/multicoreruntime.h @@ -130,7 +130,7 @@ volatile bool isMsgSending; * 20 + orig large obj ptr + new large obj ptr * (size is always 3 * sizeof(int)) */ -enum MSGTYPE { +typedef enum { TRANSOBJ = 0x0, // 0x0 TRANSTALL, // 0x1 LOCKREQUEST, // 0x2 @@ -167,7 +167,7 @@ enum MSGTYPE { GCLOBJMAPPING, // 0x20 #endif MSGEND -}; +} MSGTYPE; // data structures of status for termination int corestatus[NUMCORES]; // records status of each core @@ -202,6 +202,12 @@ bool lockflag; struct Queue objqueue; // data structures for shared memory allocation +#define BAMBOO_NUM_PAGES 1024 * 512 +#define BAMBOO_PAGE_SIZE 4096 +#define BAMBOO_SHARED_MEM_SIZE BAMBOO_PAGE_SIZE * BAMBOO_NUM_PAGES +#define BAMBOO_BASE_VA 0xd000000 +#define BAMBOO_SMEM_SIZE 16 * BAMBOO_PAGE_SIZE + #ifdef MULTICORE_GC #include "multicoregarbage.h" @@ -221,15 +227,9 @@ struct freeMemList * bamboo_free_mem_list; INTPTR bamboo_cur_msp; int bamboo_smem_size; #else -#define BAMBOO_NUM_PAGES 1024 * 512 -#define BAMBOO_PAGE_SIZE 4096 -#define BAMBOO_SHARED_MEM_SIZE BAMBOO_PAGE_SIZE * BAMBOO_PAGE_SIZE -#define BAMBOO_BASE_VA 0xd000000 -#define BAMBOO_SMEM_SIZE 16 * BAMBOO_PAGE_SIZE - bool smemflag; mspace bamboo_free_msp; -mspace bamboo_cur_msp; +INTPTR bamboo_cur_msp; int bamboo_smem_size; #endif @@ -285,6 +285,7 @@ inline void initialization(void) __attribute__((always_inline)); inline void initCommunication(void) __attribute__((always_inline)); inline void fakeExecution(void) __attribute__((always_inline)); inline void terminate(void) __attribute__((always_inline)); +inline void initlock(struct ___Object___ * v) __attribute__((always_inline)); // lock related functions bool getreadlock(void* ptr); @@ -293,6 +294,9 @@ bool getwritelock(void* ptr); void releasewritelock(void* ptr); bool getwritelock_I(void* ptr); void releasewritelock_I(void * ptr); +#ifndef MULTICORE_GC +void releasewritelock_r(void * lock, void * redirectlock); +#endif /* this function is to process lock requests. * can only be invoked in receiveObject() */ // if return -1: the lock request is redirected @@ -303,11 +307,11 @@ inline int processlockrequest(int locktype, int obj, int requestcore, int rootrequestcore, - bool cache) __attribute_((always_inline)); + bool cache) __attribute__((always_inline)); inline void processlockrelease(int locktype, int lock, int redirectlock, - bool isredirect) __attribute_((always_inline)); + bool isredirect) __attribute__((always_inline)); // msg related functions inline void send_hanging_msg() __attribute__((always_inline)); diff --git a/Robust/src/Runtime/multicoretask.c b/Robust/src/Runtime/multicoretask.c index 924f7021..71d8acc5 100644 --- a/Robust/src/Runtime/multicoretask.c +++ b/Robust/src/Runtime/multicoretask.c @@ -19,6 +19,7 @@ int enqueuetasks_I(struct parameterwrapper *parameter, int numenterflags); inline void initruntimedata() { + int i; // initialize the arrays if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { // startup core to initialize corestatus[] @@ -35,6 +36,8 @@ inline void initruntimedata() { gcnumsendobjs[i] = 0; gcnumreceiveobjs[i] = 0; gcloads[i] = 0; + gcrequiredmems[i] = 0; + gcstopblock[i] = 0; #endif } // for(i = 0; i < NUMCORES; ++i) numconfirm = 0; @@ -82,27 +85,20 @@ inline void initruntimedata() { gcheaptop = 0; gctopcore = 0; gcheapdirection = 1; - gcstopblock = 0; gcreservedsb = 0; gcmovestartaddr = 0; gctomove = false; gcstopblock = 0; - - // initialize queue - if (gchead==NULL) { - gcheadindex=0; - gctailindex=0; - gctailindex2 = 0; - gchead=gctail=gctail2=malloc(sizeof(struct pointerblock)); - } - // initialize the large obj queues - if (gclobjhead==NULL) { - gclobjheadindex=0; - gclobjtailindex=0; - gclobjtailindex2 = 0; - gclobjhead=gclobjtail=gclobjtail2= - malloc(sizeof(struct lobjpointerblock)); - } + gchead = gctail = gctail2 = NULL; + gclobjhead = gclobjtail = gclobjtail2 = NULL; + gcheadindex=0; + gctailindex=0; + gctailindex2 = 0; + gclobjheadindex=0; + gclobjtailindex=0; + gclobjtailindex2 = 0; + gcmovepending = 0; + gcblocks2fill = 0; #else // create the lock table, lockresult table and obj queue locktable.size = 20; @@ -282,7 +278,7 @@ objqueuebreak: return rflag; } -inline void checkCoreStatue() { +inline void checkCoreStatus() { bool allStall = false; int i = 0; int sumsendobj = 0; @@ -824,7 +820,6 @@ struct ___TagDescriptor___ * allocate_tag(void *ptr, struct ___TagDescriptor___ * allocate_tag(int index) { struct ___TagDescriptor___ * v=FREEMALLOC(classsize[TAGTYPE]); #endif - struct ___TagDescriptor___ * v=FREEMALLOC(classsize[TAGTYPE]); v->type=TAGTYPE; v->flag=index; return v; @@ -1167,7 +1162,7 @@ void * smemalloc(int size, if(freemem != NULL) { void * mem = (void *)(freemem->ptr); *allocsize = size; - freemem->ptr += size; + freemem->ptr = ((void*)freemem->ptr) + size; freemem->size -= size; // check how many blocks it acrosses int b = 0; @@ -1700,7 +1695,7 @@ msg: case GCSTARTCOMPACT: { // a compact phase start msg - gcstopblock = msgdata[1]; + gcblocks2fill = msgdata[1]; gcphase = COMPACTPHASE; break; } @@ -1737,23 +1732,57 @@ msg: BAMBOO_DEBUGPRINT_REG(msgdata[1]); #endif BAMBOO_EXIT(0xb006); - } - if(msgdata[1] < NUMCORES) { - gcnumblocks[msgdata[1]] = msgdata[2]; - if(msgdata[3] == 0) { + } + int cnum = msgdata[1]; + int filledblocks = msgdata[2]; + int heaptop = msgdata[3]; + int data4 = msgdata[4]; + if(cnum < NUMCORES) { + if(COMPACTPHASE == gcphase) { + gcfilledblocks[cnum] = filledblocks; + gcloads[cnum] = heaptop; + } + if(data4 > 0) { // ask for more mem int startaddr = 0; int tomove = 0; - if(findSpareMem(&startaddr, &tomove, msgdata[2])) { - send_msg_4(msgdata[1], GCMOVESTART, k, startaddr, tomove); - } else { - // TODO if not success + int dstcore = 0; + if(findSpareMem(&startaddr, &tomove, &dstcore, data4, cnum)) { + send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove); } } else { - gccorestatus[msgdata[1]] = 0; - gcloads[msgdata[1]] = msgdata[4]; - } - } + gccorestatus[cnum] = 0; + // check if there is pending move request + if(gcmovepending > 0) { + int j; + for(j = 0; j < NUMCORES; j++) { + if(gcrequiredmems[j]>0) { + break; + } + } + if(j < NUMCORES) { + // find match + int tomove = 0; + int startaddr = 0; + gcrequiredmems[j] = assignSpareMem(cnum, + gcrequiredmems[j], + &tomove, + &startaddr); + if(STARTUPCORE == j) { + gcdstcore = cnum; + gctomove = true; + gcmovestartaddr = startaddr; + gcblock2fill = tomove; + } else { + send_msg_4(j, GCMOVESTART, cnum, startaddr, tomove); + } // if(STARTUPCORE == j) + if(gcrequiredmems[j] == 0) { + gcmovepending--; + } + } // if(j < NUMCORES) + } // if(gcmovepending > 0) + } // if(flag == 0) + } // if(cnum < NUMCORES) break; } @@ -1829,8 +1858,9 @@ msg: case GCMOVESTART: { // received a start moving objs msg gctomove = true; + gcdstcore = msgdata[1]; gcmovestartaddr = msgdata[2]; - gcstopblock = msgdata[3]; + gcblock2fill = msgdata[3]; break; } @@ -2122,6 +2152,58 @@ backtrackinc: int containstag(struct ___Object___ *ptr, struct ___TagDescriptor___ *tag); +#ifndef MULTICORE_GC +void releasewritelock_r(void * lock, void * redirectlock) { + int targetcore = 0; + int reallock = (int)lock; + targetcore = (reallock >> 5) % BAMBOO_TOTALCORE; + +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xe671); + BAMBOO_DEBUGPRINT_REG((int)lock); + BAMBOO_DEBUGPRINT_REG(reallock); + BAMBOO_DEBUGPRINT_REG(targetcore); +#endif + + if(targetcore == BAMBOO_NUM_OF_CORE) { + BAMBOO_START_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif + // reside on this core + if(!RuntimeHashcontainskey(locktbl, reallock)) { + // no locks for this object, something is wrong + BAMBOO_EXIT(0xa01d); + } else { + int rwlock_obj = 0; + struct LockValue * lockvalue = NULL; +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xe672); +#endif + RuntimeHashget(locktbl, reallock, &rwlock_obj); + lockvalue = (struct LockValue *)rwlock_obj; +#ifdef DEBUG + BAMBOO_DEBUGPRINT_REG(lockvalue->value); +#endif + lockvalue->value++; + lockvalue->redirectlock = (int)redirectlock; +#ifdef DEBUG + BAMBOO_DEBUGPRINT_REG(lockvalue->value); +#endif + } + BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif + return; + } else { + // send lock release with redirect info msg + // for 32 bit machine, the size is always 4 words + send_msg_4(targetcore, REDIRECTRELEASE, 1, (int)lock, (int)redirectlock); + } +} +#endif + void executetasks() { void * taskpointerarray[MAXTASKPARAMS+OFFSET]; int numparams=0; @@ -2424,18 +2506,22 @@ execute: BAMBOO_DEBUGPRINT(0xe999); #endif for(i = 0; i < locklen; ++i) { - void * ptr = (void *)(locks[i].redirectlock); + void * ptr = (void *)(locks[i].redirectlock); int * lock = (int *)(locks[i].value); #ifdef DEBUG BAMBOO_DEBUGPRINT_REG((int)ptr); BAMBOO_DEBUGPRINT_REG((int)lock); #endif +#ifndef MULTICORE_GC if(RuntimeHashcontainskey(lockRedirectTbl, (int)lock)) { int redirectlock; RuntimeHashget(lockRedirectTbl, (int)lock, &redirectlock); RuntimeHashremovekey(lockRedirectTbl, (int)lock); releasewritelock_r(lock, (int *)redirectlock); } else { +#else + { +#endif releasewritelock(ptr); } } diff --git a/Robust/src/buildscript b/Robust/src/buildscript index 6d9ae67e..d87399f2 100755 --- a/Robust/src/buildscript +++ b/Robust/src/buildscript @@ -40,6 +40,7 @@ echo "-tilera generate tilera version binary (should be used together with -mult echo "-tileraconfig config tilera simulator/pci as nxm (should be used together with -tilera)" echo "-raw generate raw version binary (should be used together with -multicore)" echo "-rawconfig config raw simulator as 4xn (should be used together with -raw)" +echo "-multcoregc enable garbage collection in multicore version" echo -threadsimulate generate multi-thread simulate version binary echo -optional enable optional echo -debug generate debug symbols @@ -83,6 +84,7 @@ RECOVERFLAG=false MLP_ON=false MLPDEBUG=false MULTICOREFLAG=false +MULTICOREGCFLAG=false RAWFLAG=false TILERAFLAG=false TILERACONFIG='' @@ -237,6 +239,10 @@ elif [[ $1 = '-multicore' ]] then MULTICOREFLAG=true JAVAOPTS="$JAVAOPTS -multicore" +elif [[ $1 = '-multicoregc' ]] +then +MULTICOREGCFLAG=true +JAVAOPTS="$JAVAOPTS -multicoregc" elif [[ $1 = '-numcore' ]] then JAVAOPTS="$JAVAOPTS -numcore $2" @@ -604,10 +610,18 @@ then #INTERRUPT version TILERACFLAGS="${TILERACFLAGS} -DINTERRUPT" fi #INTERRUPT version +if $MULTICOREGCFLAG +then #MULTICORE_GC version +TILERACFLAGS="${TILERACFLAGS} -DMULTICORE_GC" +fi #MULTICORE_GC version + cp $ROBUSTROOT/Tilera/Runtime/$MAKEFILE ./Makefile cp $ROBUSTROOT/Tilera/Runtime/$SIMHVC ./sim.hvc cp $ROBUSTROOT/Tilera/Runtime/$PCIHVC ./pci.hvc cp $ROBUSTROOT/Tilera/Runtime/bamboo-vmlinux-pci.hvc ./bamboo-vmlinux-pci.hvc +cp ../Tilera/Runtime/*.c ./ +cp ../Tilera/Runtime/*.h ./ +cp ../Tilera/lib/* ./ cp ../Runtime/multicoretask.c ./ cp ../Runtime/multicoreruntime.c ./ cp ../Runtime/Queue.c ./ @@ -619,6 +633,7 @@ cp ../Runtime/SimpleHash.c ./ cp ../Runtime/ObjectHash.c ./ cp ../Runtime/socket.c ./ cp ../Runtime/mem.c ./ +cp ../Runtime/multicoregarbage.c ./ cp ../Runtime/GenericHashtable.h ./ cp ../Runtime/mem.h ./ cp ../Runtime/multicoreruntime.h ./ @@ -627,9 +642,7 @@ cp ../Runtime/ObjectHash.h ./ cp ../Runtime/Queue.h ./ cp ../Runtime/runtime.h ./ cp ../Runtime/SimpleHash.h ./ -cp ../Tilera/Runtime/*.c ./ -cp ../Tilera/Runtime/*.h ./ -cp ../Tilera/lib/* ./ +cp ../Runtime/multicoregarbage.h ./ cp ../tmpbuilddirectory/*.c ./ cp ../tmpbuilddirectory/*.h ./ -- 2.34.1