From a1f3184e7150ec8e135005d5498b21e8585b56fa Mon Sep 17 00:00:00 2001 From: jjenista Date: Tue, 19 Oct 2010 00:04:52 +0000 Subject: [PATCH] switch to a thread local stall sem for parent to give to children, tinker with prefetch in pool alloc, tinker with safe and fast garbage collection at scheduler level --- Robust/src/Benchmarks/oooJava/raytracer/runs | 2 + Robust/src/IR/Flat/BuildCode.java | 104 +++++++++++-------- Robust/src/Runtime/memPool.h | 10 +- Robust/src/Runtime/mlp_runtime.c | 3 + Robust/src/Runtime/mlp_runtime.h | 11 +- Robust/src/Runtime/psemaphore.c | 42 +++++--- Robust/src/Runtime/psemaphore.h | 8 +- Robust/src/Runtime/workschedule.c | 64 ++++++++---- 8 files changed, 154 insertions(+), 90 deletions(-) create mode 100755 Robust/src/Benchmarks/oooJava/raytracer/runs diff --git a/Robust/src/Benchmarks/oooJava/raytracer/runs b/Robust/src/Benchmarks/oooJava/raytracer/runs new file mode 100755 index 00000000..bc53383d --- /dev/null +++ b/Robust/src/Benchmarks/oooJava/raytracer/runs @@ -0,0 +1,2 @@ +#!/bin/bash +time ./tests.bin 1 diff --git a/Robust/src/IR/Flat/BuildCode.java b/Robust/src/IR/Flat/BuildCode.java index c8ad8077..f38efb4c 100644 --- a/Robust/src/IR/Flat/BuildCode.java +++ b/Robust/src/IR/Flat/BuildCode.java @@ -1991,7 +1991,7 @@ public class BuildCode { } } - } + } /* Check to see if we need to do a GC if this is a @@ -2349,7 +2349,7 @@ public class BuildCode { // initialize thread-local var to a the task's record, which is fused // with the param list output.println(" "); - output.println(" /* code of this task's body should use this to access the running task record */"); + output.println(" // code of this task's body should use this to access the running task record"); output.println(" runningSESE = &(___params___->common);"); output.println(" "); @@ -2963,21 +2963,23 @@ public class BuildCode { output.println(" "+pair.getSESE().getSESErecordName()+"* child = ("+ pair.getSESE().getSESErecordName()+"*) "+pair+";"); - output.println(" SESEcommon* common = (SESEcommon*) "+pair+";"); + output.println(" SESEcommon* childCom = (SESEcommon*) "+pair+";"); + if( state.COREPROF ) { output.println("#ifdef CP_EVENTID_TASKSTALLVAR"); output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_BEGIN );"); output.println("#endif"); } - output.println(" pthread_mutex_lock( &(common->lock) );"); - output.println(" if( common->doneExecuting == FALSE ) {"); - output.println(" stopforgc((struct garbagelist *)&___locals___);"); - output.println(" do {"); - output.println(" pthread_cond_wait( &(common->doneCond), &(common->lock) );"); - output.println(" } while( common->doneExecuting == FALSE );"); - output.println(" restartaftergc();"); - output.println(" }"); - output.println(" pthread_mutex_unlock( &(common->lock) );"); + + output.println(" pthread_mutex_lock( &(childCom->lock) );"); + output.println(" if( childCom->doneExecuting == FALSE ) {"); + output.println(" psem_reset( &runningSESEstallSem );"); + output.println(" childCom->parentsStallSem = &runningSESEstallSem;"); + output.println(" pthread_mutex_unlock( &(childCom->lock) );"); + output.println(" psem_take( &runningSESEstallSem, (struct garbagelist *)&___locals___ );"); + output.println(" } else {"); + output.println(" pthread_mutex_unlock( &(childCom->lock) );"); + output.println(" }"); // copy things we might have stalled for Iterator tdItr = cp.getCopySet( vst ).iterator(); @@ -2992,11 +2994,13 @@ public class BuildCode { output.println(" "+generateTemp( fmContext, td, null )+ " = child->"+vst.getAddrVar().getSafeSymbol()+";"); } + if( state.COREPROF ) { output.println("#ifdef CP_EVENTID_TASKSTALLVAR"); output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_END );"); output.println("#endif"); } + output.println(" }"); } @@ -3009,13 +3013,24 @@ public class BuildCode { // otherwise the dynamic write nodes will have the local var up-to-date output.println(" {"); output.println(" if( "+dynVar+"_srcSESE != NULL ) {"); - output.println(" SESEcommon* common = (SESEcommon*) "+dynVar+"_srcSESE;"); + + output.println(" SESEcommon* childCom = (SESEcommon*) "+dynVar+"_srcSESE;"); + if( state.COREPROF ) { output.println("#ifdef CP_EVENTID_TASKSTALLVAR"); output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_BEGIN );"); output.println("#endif"); } - output.println(" psem_take( &(common->stallSem) );"); + + output.println(" pthread_mutex_lock( &(childCom->lock) );"); + output.println(" if( childCom->doneExecuting == FALSE ) {"); + output.println(" psem_reset( &runningSESEstallSem );"); + output.println(" childCom->parentsStallSem = &runningSESEstallSem;"); + output.println(" pthread_mutex_unlock( &(childCom->lock) );"); + output.println(" psem_take( &runningSESEstallSem, (struct garbagelist *)&___locals___ );"); + output.println(" } else {"); + output.println(" pthread_mutex_unlock( &(childCom->lock) );"); + output.println(" }"); FlatMethod fmContext; if( currentSESE.getIsCallerSESEplaceholder() ) { @@ -3024,7 +3039,7 @@ public class BuildCode { fmContext = currentSESE.getfmBogus(); } - TypeDescriptor type=dynVar.getType(); + TypeDescriptor type = dynVar.getType(); String typeStr; if( type.isNull() ) { typeStr = "void*"; @@ -3037,11 +3052,13 @@ public class BuildCode { output.println(" "+generateTemp( fmContext, dynVar, null )+ " = *(("+typeStr+"*) ((void*)"+ dynVar+"_srcSESE + "+dynVar+"_srcOffset));"); + if( state.COREPROF ) { output.println("#ifdef CP_EVENTID_TASKSTALLVAR"); output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_END );"); output.println("#endif"); } + output.println(" }"); output.println(" }"); } @@ -3137,7 +3154,7 @@ public class BuildCode { output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_BEGIN );"); output.println("#endif"); } - output.println(" psem_take( &(rentry->parentStallSem) );"); + output.println(" psem_take( &(rentry->parentStallSem), (struct garbagelist *)&___locals___ );"); if( state.COREPROF ) { output.println("#ifdef CP_EVENTID_TASKSTALLMEM"); output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_END );"); @@ -3192,7 +3209,7 @@ public class BuildCode { output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_BEGIN );"); output.println("#endif"); } - output.println(" psem_take( &(rentry->parentStallSem) );"); + output.println(" psem_take( &(rentry->parentStallSem), (struct garbagelist *)&___locals___ );"); if( state.COREPROF ) { output.println("#ifdef CP_EVENTID_TASKSTALLMEM"); output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_END );"); @@ -3813,10 +3830,9 @@ public class BuildCode { // fill in common data output.println(" int localCount=0;"); output.println(" seseToIssue->common.classID = "+fsen.getIdentifier()+";"); - output.println(" psem_init( &(seseToIssue->common.stallSem) );"); + output.println(" seseToIssue->common.parentsStallSem = NULL;"); output.println(" seseToIssue->common.forwardList = createQueue();"); output.println(" seseToIssue->common.unresolvedDependencies = 10000;"); - output.println(" pthread_cond_init( &(seseToIssue->common.doneCond), NULL );"); output.println(" seseToIssue->common.doneExecuting = FALSE;"); output.println(" pthread_cond_init( &(seseToIssue->common.runningChildrenCond), NULL );"); output.println(" seseToIssue->common.numRunningChildren = 0;"); @@ -4323,17 +4339,16 @@ public class BuildCode { output.println("#endif"); } - String com = paramsprefix+"->common"; // this SESE cannot be done until all of its children are done // so grab your own lock with the condition variable for watching // that the number of your running children is greater than zero - output.println(" pthread_mutex_lock( &("+com+".lock) );"); - output.println(" if ( "+com+".numRunningChildren > 0 ) {"); - output.println(" stopforgc((struct garbagelist *)&___locals___);"); + output.println(" pthread_mutex_lock( &(runningSESE->lock) );"); + output.println(" if( runningSESE->numRunningChildren > 0 ) {"); + output.println(" stopforgc( (struct garbagelist *)&___locals___ );"); output.println(" do {"); - output.println(" pthread_cond_wait( &("+com+".runningChildrenCond), &("+com+".lock) );"); - output.println(" } while( "+com+".numRunningChildren > 0 );"); + output.println(" pthread_cond_wait( &(runningSESE->runningChildrenCond), &(runningSESE->lock) );"); + output.println(" } while( runningSESE->numRunningChildren > 0 );"); output.println(" restartaftergc();"); output.println(" }"); @@ -4375,16 +4390,25 @@ public class BuildCode { " = "+from+";"); } - // mark yourself done, your SESE data is now read-only - output.println(" "+com+".doneExecuting = TRUE;"); - output.println(" pthread_cond_signal( &("+com+".doneCond) );"); - output.println(" pthread_mutex_unlock( &("+com+".lock) );"); + // mark yourself done, your task data is now read-only + output.println(" runningSESE->doneExecuting = TRUE;"); + + // if parent is stalling on you, let them know you're done + if( (state.MLP && fsexn.getFlatEnter() != mlpa.getMainSESE()) || + (state.OOOJAVA && fsexn.getFlatEnter() != oooa.getMainSESE()) + ) { + output.println(" if( runningSESE->parentsStallSem != NULL ) {"); + output.println(" psem_give( runningSESE->parentsStallSem );"); + output.println(" }"); + } + + output.println(" pthread_mutex_unlock( &(runningSESE->lock) );"); // decrement dependency count for all SESE's on your forwarding list // FORWARD TODO - output.println(" while( !isEmpty( "+com+".forwardList ) ) {"); - output.println(" SESEcommon* consumer = (SESEcommon*) getItem( "+com+".forwardList );"); + output.println(" while( !isEmpty( runningSESE->forwardList ) ) {"); + output.println(" SESEcommon* consumer = (SESEcommon*) getItem( runningSESE->forwardList );"); output.println(" if(consumer->rentryIdx>0){"); @@ -4396,7 +4420,7 @@ public class BuildCode { output.println(" }"); - output.println(" if( atomic_sub_and_test(1, &(consumer->unresolvedDependencies)) ){"); + output.println(" if( atomic_sub_and_test( 1, &(consumer->unresolvedDependencies) ) ){"); output.println(" workScheduleSubmit( (void*)consumer );"); output.println(" }"); output.println(" }"); @@ -4420,19 +4444,13 @@ public class BuildCode { } - // if parent is stalling on you, let them know you're done - if( (state.MLP && fsexn.getFlatEnter() != mlpa.getMainSESE()) || - (state.OOOJAVA && fsexn.getFlatEnter() != oooa.getMainSESE()) - ) { - output.println(" psem_give( &("+paramsprefix+"->common.stallSem) );"); - } // last of all, decrement your parent's number of running children - output.println(" if( "+paramsprefix+"->common.parent != NULL ) {"); - output.println(" if (atomic_sub_and_test(1, &"+paramsprefix+"->common.parent->numRunningChildren)) {"); - output.println(" pthread_mutex_lock( &("+paramsprefix+"->common.parent->lock) );"); - output.println(" pthread_cond_signal( &("+paramsprefix+"->common.parent->runningChildrenCond) );"); - output.println(" pthread_mutex_unlock( &("+paramsprefix+"->common.parent->lock) );"); + output.println(" if( runningSESE->parent != NULL ) {"); + output.println(" if( atomic_sub_and_test( 1, &(runningSESE->parent->numRunningChildren) ) ) {"); + output.println(" pthread_mutex_lock ( &(runningSESE->parent->lock) );"); + output.println(" pthread_cond_signal ( &(runningSESE->parent->runningChildrenCond) );"); + output.println(" pthread_mutex_unlock( &(runningSESE->parent->lock) );"); output.println(" }"); output.println(" }"); diff --git a/Robust/src/Runtime/memPool.h b/Robust/src/Runtime/memPool.h index 90102e4e..51fbc1f2 100644 --- a/Robust/src/Runtime/memPool.h +++ b/Robust/src/Runtime/memPool.h @@ -109,14 +109,14 @@ static inline void* poolalloc( MemPool* p ) { // executed by the thread that owns the pool, so // it doesn't require an atomic op MemPoolItem* headCurrent = p->head; + MemPoolItem* next=headCurrent->next; - if( headCurrent->next == NULL ) { + if(next == NULL) { // only one item, so don't take from pool return RUNMALLOC( p->itemSize ); } - p->head = headCurrent->next; - + p->head = next; ////////////////////////////////////////////////////////// // @@ -131,8 +131,8 @@ static inline void* poolalloc( MemPool* p ) { // // but this built-in gcc one seems the most portable: ////////////////////////////////////////////////////////// - __builtin_prefetch( &(p->head->next) ); - + //__builtin_prefetch( &(p->head->next) ); + asm volatile( "prefetcht0 %0" :: "m" (next)); return headCurrent; } diff --git a/Robust/src/Runtime/mlp_runtime.c b/Robust/src/Runtime/mlp_runtime.c index ed2a11f7..3fab39ad 100644 --- a/Robust/src/Runtime/mlp_runtime.c +++ b/Robust/src/Runtime/mlp_runtime.c @@ -12,6 +12,9 @@ __thread SESEcommon* runningSESE; +__thread psemaphore runningSESEstallSem; + + void* mlpAllocSESErecord( int size ) { void* newrec = RUNMALLOC( size ); diff --git a/Robust/src/Runtime/mlp_runtime.h b/Robust/src/Runtime/mlp_runtime.h index e6482ba0..c7dce30f 100644 --- a/Robust/src/Runtime/mlp_runtime.h +++ b/Robust/src/Runtime/mlp_runtime.h @@ -85,9 +85,10 @@ typedef struct SESEcommon_t { // the task record so task dispatch works correctly! int classID; + // a parent waits on this semaphore when stalling on // this child, the child gives it at its SESE exit - psemaphore stallSem; + psemaphore* parentsStallSem; // the lock guards the following data SESE's @@ -110,9 +111,6 @@ typedef struct SESEcommon_t { struct SESEcommon_t* parent; - //psemaphore parentStallSem; - //pthread_cond_t stallDone; - int numMemoryQueue; int rentryIdx; int unresolvedRentryIdx; @@ -138,6 +136,11 @@ typedef struct SESEcommon_t { // running task extern __thread SESEcommon* runningSESE; +// there only needs to be one stall semaphore +// per thread, just give a reference to it to +// the task you are about to block on +extern __thread psemaphore runningSESEstallSem; + typedef struct REntry_t{ diff --git a/Robust/src/Runtime/psemaphore.c b/Robust/src/Runtime/psemaphore.c index 59ee4990..9ba7daa8 100644 --- a/Robust/src/Runtime/psemaphore.c +++ b/Robust/src/Runtime/psemaphore.c @@ -1,28 +1,42 @@ +#include +#include #include "psemaphore.h" -int psem_init( psemaphore* sem ) { - if( pthread_mutex_init( &(sem->lock), NULL ) == -1 ) { return -1; } - if( pthread_cond_init ( &(sem->cond), NULL ) == -1 ) { return -1; } +void psem_init( psemaphore* sem ) { + pthread_mutex_init( &(sem->lock), NULL ); + pthread_cond_init ( &(sem->cond), NULL ); sem->signaled = 0; - return 0; } -int psem_take( psemaphore* sem ) { - if( pthread_mutex_lock ( &(sem->lock) ) == -1 ) { return -1; } - while( !sem->signaled ) { - if( pthread_cond_wait ( &(sem->cond), &(sem->lock) ) == -1 ) { return -1; } +void psem_take( psemaphore* sem, struct garbagelist* gl ) { + pthread_mutex_lock( &(sem->lock) ); + if( !sem->signaled ) { + stopforgc( gl ); + do { + pthread_cond_wait( &(sem->cond), &(sem->lock) ); + } while( !sem->signaled ); + restartaftergc(); } - if( pthread_mutex_unlock( &(sem->lock) ) == -1 ) { return -1; } - return 0; + pthread_mutex_unlock( &(sem->lock) ); } -int psem_give( psemaphore* sem ) { - if( pthread_mutex_lock ( &(sem->lock) ) == -1 ) { return -1; } +void psem_give( psemaphore* sem ) { + pthread_mutex_lock ( &(sem->lock) ); sem->signaled = 1; - if( pthread_cond_signal ( &(sem->cond) ) == -1 ) { return -1; } - if( pthread_mutex_unlock( &(sem->lock) ) == -1 ) { return -1; } + pthread_cond_signal ( &(sem->cond) ); + pthread_mutex_unlock( &(sem->lock) ); +} + +void psem_reset( psemaphore* sem ) { + // this should NEVER BE CALLED if it is possible + // the semaphore is still in use, NEVER + if( pthread_mutex_trylock( &(sem->lock) ) == EBUSY ) { + exit( -1 ); + } + pthread_mutex_unlock( &(sem->lock) ); + sem->signaled = 0; } diff --git a/Robust/src/Runtime/psemaphore.h b/Robust/src/Runtime/psemaphore.h index 1575ff87..11bea196 100644 --- a/Robust/src/Runtime/psemaphore.h +++ b/Robust/src/Runtime/psemaphore.h @@ -2,6 +2,7 @@ #define ___PSEMAPHORE_H__ #include +#include "garbage.h" typedef struct psemaphore_t { @@ -11,9 +12,10 @@ typedef struct psemaphore_t { } psemaphore; -int psem_init( psemaphore* sem ); -int psem_take( psemaphore* sem ); -int psem_give( psemaphore* sem ); +void psem_init ( psemaphore* sem ); +void psem_take ( psemaphore* sem, struct garbagelist* gl ); +void psem_give ( psemaphore* sem ); +void psem_reset( psemaphore* sem ); #endif // ___PSEMAPHORE_H__ diff --git a/Robust/src/Runtime/workschedule.c b/Robust/src/Runtime/workschedule.c index 8ed10ef0..3595cb7e 100644 --- a/Robust/src/Runtime/workschedule.c +++ b/Robust/src/Runtime/workschedule.c @@ -5,6 +5,7 @@ #include "mem.h" #include "workschedule.h" #include "mlp_runtime.h" +#include "psemaphore.h" #include "coreprof/coreprof.h" #ifdef RCR #include "rcr_runtime.h" @@ -70,6 +71,7 @@ void* workerMain( void* arg ) { WorkerData* myData = (WorkerData*) arg; int oldState; int haveWork; + struct garbagelist emptygarbagelist={0,NULL}; // once-per-thread stuff CP_CREATE(); @@ -80,6 +82,12 @@ void* workerMain( void* arg ) { // oid with value 0 indicates an invalid object oid = myData->id + 1; + // each thread has a single semaphore that a running + // task should hand off to children threads it is + // going to stall on + psem_init( &runningSESEstallSem ); + + #ifdef RCR //allocate task record queue pthread_t thread; @@ -100,6 +108,20 @@ void* workerMain( void* arg ) { //pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, &oldState ); // then continue to process work + //NOTE: ADD US TO THE GC LIST + + pthread_mutex_lock(&gclistlock); + threadcount++; + litem.prev=NULL; + litem.next=list; + if(list!=NULL) + list->prev=&litem; + list=&litem; + pthread_mutex_unlock(&gclistlock); + + + //ALSO CREATE EMPTY GARBAGELIST TO PASS TO COLLECTOR + while( 1 ) { // wait for work @@ -108,9 +130,12 @@ void* workerMain( void* arg ) { #endif haveWork = FALSE; while( !haveWork ) { + //NOTE...Fix these things... pthread_mutex_lock( &systemLockOut ); if( headqi->next == NULL ) { pthread_mutex_unlock( &systemLockOut ); + //NOTE: Do a check to see if we need to collect.. + if (unlikely(needtocollect)) checkcollect(&emptygarbagelist); sched_yield(); continue; } else { @@ -126,32 +151,29 @@ void* workerMain( void* arg ) { CP_LOGEVENT( CP_EVENTID_WORKSCHEDGRAB, CP_EVENTTYPE_END ); #endif - pthread_mutex_lock(&gclistlock); - threadcount++; + //let GC see current work litem.seseCommon=(void*)workUnit; - litem.prev=NULL; - litem.next=list; - if(list!=NULL) - list->prev=&litem; - list=&litem; - seseCommon=(SESEcommon*)workUnit; - pthread_mutex_unlock(&gclistlock); + + //unclear how useful this is + if (unlikely(needtocollect)) checkcollect(&emptygarbagelist); workFunc( workUnit ); - - pthread_mutex_lock(&gclistlock); - threadcount--; - if (litem.prev==NULL) { - list=litem.next; - } else { - litem.prev->next=litem.next; - } - if (litem.next!=NULL) { - litem.next->prev=litem.prev; - } - pthread_mutex_unlock(&gclistlock); } + //NOTE: Remove from GC LIST DOWN HERE.... + pthread_mutex_lock(&gclistlock); + threadcount--; + if (litem.prev==NULL) { + list=litem.next; + } else { + litem.prev->next=litem.next; + } + if (litem.next!=NULL) { + litem.next->prev=litem.prev; + } + pthread_mutex_unlock(&gclistlock); + + //pthread_cleanup_pop( 0 ); return NULL; -- 2.34.1