From f8c7aef268e9007ecf8d55b9d4a522e81d6f8ad9 Mon Sep 17 00:00:00 2001 From: jzhou Date: Fri, 23 Jul 2010 16:28:06 +0000 Subject: [PATCH] some bug fixes --- .../Scheduling/GC/Fibheaps/FibHeapsBench.java | 2 +- .../Scheduling/GC/GCBench/GCBench.java | 31 +++++++--- .../Scheduling/GC/MTree/macrotest_tree.java | 4 +- .../Scheduling/GC/RayTracer/Composer.java | 16 +++-- .../Scheduling/GC/RayTracer/RayTracer.java | 4 +- .../GC/RayTracer/RayTracerBench.java | 5 +- .../Scheduling/GC/RayTracer/Scene.java | 2 +- .../Scheduling/GC/RayTracer/Sphere.java | 2 +- .../Scheduling/GC/RayTracer/TestRunner.java | 2 +- Robust/src/Runtime/GCSharedHash.c | 11 ++-- Robust/src/Runtime/MGCHash.c | 44 +++++++------ Robust/src/Runtime/mem.c | 8 ++- Robust/src/Runtime/multicoregarbage.c | 62 ++++++++++++++----- Robust/src/Runtime/multicoregarbage.h | 6 +- Robust/src/Runtime/multicoreruntime.c | 15 +++-- Robust/src/Runtime/multicoreruntime.h | 16 ++++- Robust/src/Runtime/multicoretask.c | 14 +++-- Robust/src/buildscript | 17 ++++- 18 files changed, 180 insertions(+), 81 deletions(-) diff --git a/Robust/src/Benchmarks/Scheduling/GC/Fibheaps/FibHeapsBench.java b/Robust/src/Benchmarks/Scheduling/GC/Fibheaps/FibHeapsBench.java index 056a870a..38497c76 100644 --- a/Robust/src/Benchmarks/Scheduling/GC/Fibheaps/FibHeapsBench.java +++ b/Robust/src/Benchmarks/Scheduling/GC/Fibheaps/FibHeapsBench.java @@ -7,7 +7,7 @@ task t1(StartupObject s{initialstate}) { //System.printString("task t1\n"); - int threadnum = 62; + int threadnum = 56; //62; for(int i = 0; i < threadnum; ++i) { TestRunner tr = new TestRunner(){run}; } diff --git a/Robust/src/Benchmarks/Scheduling/GC/GCBench/GCBench.java b/Robust/src/Benchmarks/Scheduling/GC/GCBench/GCBench.java index 52376d3c..f5f5958c 100644 --- a/Robust/src/Benchmarks/Scheduling/GC/GCBench/GCBench.java +++ b/Robust/src/Benchmarks/Scheduling/GC/GCBench/GCBench.java @@ -50,7 +50,7 @@ task t1(StartupObject s{initialstate}) { //System.printString("task t1\n"); - int threadnum = 62; + int threadnum = 56; //62; for(int i = 0; i < threadnum; ++i) { TestRunner gcb = new TestRunner(){run}; } @@ -83,11 +83,11 @@ public class TestRunner { public static final int kMaxTreeDepth;// = 16; public TestRunner() { - kStretchTreeDepth = 15;// 1Mb 18; // about 16Mb - kLongLivedTreeDepth = 14; // 1/4Mb 16; // about 4Mb - kArraySize = 125000; // 1/4Mb 500000; // about 4Mb + kStretchTreeDepth = 16;// 4Mb 18; // about 16Mb + kLongLivedTreeDepth = 14; // 1Mb 16; // about 4Mb + kArraySize = 250000; // 1Mb 500000; // about 4Mb kMinTreeDepth = 4; - kMaxTreeDepth = 16; + kMaxTreeDepth = 14; } // Nodes used by a tree of a given size @@ -131,6 +131,17 @@ public class TestRunner { + lTotalMemory + " bytes"); System.out.println(" Free memory=" + lFreeMemory + " bytes"); }*/ + + void tc1(int depth) { + Node tempTree = new Node(); + Populate(depth, tempTree); + tempTree = null; + } + + void tc2(int depth) { + Node tempTree = MakeTree(depth); + tempTree = null; + } void TimeConstruction(int depth) { Node root; @@ -142,17 +153,19 @@ public class TestRunner { " trees of depth " + depth); tStart = System.currentTimeMillis();*/ for (int i = 0; i < iNumIters; ++i) { - tempTree = new Node(); + /*tempTree = new Node(); Populate(depth, tempTree); - tempTree = null; + tempTree = null;*/ + tc1(depth); } /*tFinish = System.currentTimeMillis(); System.out.println("\tTop down construction took " + (tFinish - tStart) + "msecs"); tStart = System.currentTimeMillis();*/ for (int i = 0; i < iNumIters; ++i) { - tempTree = MakeTree(depth); - tempTree = null; + /*tempTree = MakeTree(depth); + tempTree = null;*/ + tc2(depth); } /*tFinish = System.currentTimeMillis(); System.out.println("\tBottom up construction took " diff --git a/Robust/src/Benchmarks/Scheduling/GC/MTree/macrotest_tree.java b/Robust/src/Benchmarks/Scheduling/GC/MTree/macrotest_tree.java index aeee2353..ecae8ed6 100644 --- a/Robust/src/Benchmarks/Scheduling/GC/MTree/macrotest_tree.java +++ b/Robust/src/Benchmarks/Scheduling/GC/MTree/macrotest_tree.java @@ -1,8 +1,8 @@ task t1(StartupObject s{initialstate}) { //System.printString("task t1\n"); - int threadnum = 62; - int size = 20000; + int threadnum = 56; //62; + int size = 40000; int nodenum = size*10; for(int i = 0; i < threadnum; ++i) { TestRunner tr = new TestRunner(i, size, nodenum){run}; diff --git a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Composer.java b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Composer.java index 0e1d97af..87cb8353 100644 --- a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Composer.java +++ b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Composer.java @@ -4,8 +4,10 @@ public class Composer { int numCore; int num_composed; - int image[][]; + //int image[][]; int heightPerCore; + public long result; + public long result1; public Composer(int numCore, int size) { @@ -14,16 +16,22 @@ public class Composer { heightPerCore = size/this.numCore; // set image size - this.image=new int[size][]; + //this.image=new int[size][]; + this.result = 0; + this.result1 = 0; } public boolean compose(TestRunner tr) { this.num_composed++; - int startidx=heightPerCore * tr.id; - int endidx=startidx + heightPerCore; + int startidx=0; //heightPerCore * tr.id; + int endidx=this.heightPerCore; //startidx + heightPerCore; for(int i = startidx; i < endidx; i++) { //this.image[i] = tr.image[i]; + for(int j = 0; j < this.heightPerCore*this.numCore; j++) { + this.result += tr.image[i][j]; + } } + this.result1 += tr.checksum; return this.num_composed == this.numCore; } } \ No newline at end of file diff --git a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/RayTracer.java b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/RayTracer.java index c457ec74..99f94bc1 100644 --- a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/RayTracer.java +++ b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/RayTracer.java @@ -71,7 +71,7 @@ public class RayTracer { // int datasizes[] = { 150, 500 }; int datasizes[]; - long checksum; + public long checksum; int size; @@ -232,7 +232,7 @@ public class RayTracer { row[x]= alpha | (red << 16) | (green << 8) | (blue); } // end for (x) - image[y]=row; + image[y-interval.yfrom]=row; } // end for (y) diff --git a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/RayTracerBench.java b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/RayTracerBench.java index 82b5ee74..ff9d1c4d 100644 --- a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/RayTracerBench.java +++ b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/RayTracerBench.java @@ -1,8 +1,8 @@ task t1(StartupObject s{initialstate}) { //System.printString("task t1\n"); - int threadnum = 62; - int size = threadnum * 30; + int threadnum = 56; //62; + int size = threadnum * 25; Composer comp = new Composer(threadnum, size){compose}; RayTracer rt = new RayTracer(); Scene scene = rt.createScene(); @@ -22,6 +22,7 @@ task t2(TestRunner tr{run}) { task t3(Composer comp{compose}, TestRunner tr{compose}) { //System.printString("task t3\n"); if(comp.compose(tr)) { + long r = comp.result; taskexit(comp{!compose}, tr{!compose}); } else { taskexit(tr{!compose}); diff --git a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Scene.java b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Scene.java index 483b1c99..38b3e409 100644 --- a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Scene.java +++ b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Scene.java @@ -23,7 +23,7 @@ -import java.util.Vector; +//import java.util.Vector; public class Scene //implements java.io.Serializable diff --git a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Sphere.java b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Sphere.java index 8fc39a83..24957b77 100644 --- a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Sphere.java +++ b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/Sphere.java @@ -61,7 +61,7 @@ public class Sphere extends Primitive if (disc < 0.0) { return null; } - disc = (float) Math.sqrt(disc); + disc = (float) Math.sqrtf((float)disc); t = (b - disc < 1e-6) ? b + disc : b - disc; if (t < 1e-6) { return null; diff --git a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/TestRunner.java b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/TestRunner.java index fd54ffd0..6adaf50f 100644 --- a/Robust/src/Benchmarks/Scheduling/GC/RayTracer/TestRunner.java +++ b/Robust/src/Benchmarks/Scheduling/GC/RayTracer/TestRunner.java @@ -41,7 +41,7 @@ public class TestRunner extends RayTracer { } public void init() { - this.image=new int[this.size][]; + this.image=new int[this.size/this.numCore][]; } public void JGFvalidate() { diff --git a/Robust/src/Runtime/GCSharedHash.c b/Robust/src/Runtime/GCSharedHash.c index 92f1a61c..04e92ffb 100755 --- a/Robust/src/Runtime/GCSharedHash.c +++ b/Robust/src/Runtime/GCSharedHash.c @@ -21,6 +21,7 @@ #define INLINE inline __attribute__((always_inline)) #endif // #ifndef INLINE +#define GC_SHIFT_BITS 4 /* GCSHARED HASH ********************************************************/ @@ -302,7 +303,7 @@ mgcsharedhashtbl_t * mgcsharedhashCreate(unsigned int size, ctable->loadfactor = loadfactor; ctable->threshold = size*loadfactor; - ctable->mask = (size << 6)-1; + ctable->mask = (size << (GC_SHIFT_BITS))-1; ctable->structs = NULL ; //FREEMALLOC_NGC(1*sizeof(mgcliststruct_t)); ctable->numelements = 0; // Initial number of elements in the hash @@ -334,7 +335,7 @@ mgcsharedhashtbl_t * mgcsharedhashCreate_I(unsigned int size, ctable->loadfactor = loadfactor; ctable->threshold = size*loadfactor; - ctable->mask = (size << 6)-1; + ctable->mask = (size << (GC_SHIFT_BITS))-1; ctable->structs = NULL ; //FREEMALLOC_NGC(1*sizeof(mgcliststruct_t)); ctable->numelements = 0; // Initial number of elements in the hash @@ -386,7 +387,7 @@ int mgcsharedhashInsert(mgcsharedhashtbl_t * tbl, void * key, void * val) { //int keyto = ((unsigned INTPTR)key) % (tbl->size); //ptr=&tbl->table[keyto]; - ptr=&tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>6]; + ptr=&tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>(GC_SHIFT_BITS)]; if(ptr->key==0) { // the first time insert a value for the key @@ -420,7 +421,7 @@ int mgcsharedhashInsert_I(mgcsharedhashtbl_t * tbl, void * key, void * val) { //int keyto = ((unsigned INTPTR)key) % (tbl->size); //ptr=&tbl->table[keyto]; - ptr=&tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>6]; + ptr=&tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>(GC_SHIFT_BITS)]; if(ptr->key==0) { // the first time insert a value for the key @@ -454,7 +455,7 @@ INLINE void * mgcsharedhashSearch(mgcsharedhashtbl_t * tbl, void * key) { //int keyto = ((unsigned INTPTR)key) % (tbl->size); //mgcsharedhashlistnode_t * node=&tbl->table[keyto]; mgcsharedhashlistnode_t * node = - &tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>6]; + &tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>(GC_SHIFT_BITS)]; mgcsharedhashlistnode_t *top = &tbl->table[tbl->size]; do { diff --git a/Robust/src/Runtime/MGCHash.c b/Robust/src/Runtime/MGCHash.c index 6a5b2cc3..d765c02f 100755 --- a/Robust/src/Runtime/MGCHash.c +++ b/Robust/src/Runtime/MGCHash.c @@ -18,6 +18,7 @@ #endif #endif +#define GC_SHIFT_BITS 4 /* mgchash ********************************************************/ mgchashtable_t * mgchashCreate(unsigned int size, double loadfactor) { @@ -49,7 +50,7 @@ mgchashtable_t * mgchashCreate(unsigned int size, double loadfactor) { ctable->size = size; ctable->threshold=size*loadfactor; - ctable->mask = (size << 6)-1; + ctable->mask = (size << (GC_SHIFT_BITS))-1; //ctable->list = NULL; ctable->structs = (mgcliststruct_t*)RUNMALLOC(1*sizeof(mgcliststruct_t)); ctable->numelements = 0; // Initial number of elements in the hash @@ -77,13 +78,13 @@ void mgchashreset(mgchashtable_t * tbl) { BAMBOO_MEMSET_WH(tbl->table, '\0', sizeof(mgchashlistnode_t)*tbl->size); //} // TODO now never release any allocated memory, may need to be changed - mgcliststruct_t * next = tbl->structs; - while(/*tbl->structs->*/next!=NULL) { - /*mgcliststruct_t * next = tbl->structs->next; + //mgcliststruct_t * next = tbl->structs; + while(tbl->structs->next!=NULL) { + mgcliststruct_t * next = tbl->structs->next; RUNFREE(tbl->structs); - tbl->structs=next;*/ - next->num = 0; - next = next->next; + tbl->structs=next; + /*next->num = 0; + next = next->next;*/ } //tbl->structs->num = 0; tbl->numelements = 0; @@ -99,7 +100,7 @@ void mgchashInsert(mgchashtable_t * tbl, void * key, void *val) { mgchashResize(tbl, newsize); } - ptr=&tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>6]; + ptr=&tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>(GC_SHIFT_BITS)]; tbl->numelements++; if(ptr->key==0) { @@ -160,7 +161,7 @@ mgchashtable_t * mgchashCreate_I(unsigned int size, double loadfactor) { ctable->size = size; ctable->threshold=size*loadfactor; - ctable->mask = (size << 6)-1; + ctable->mask = (size << (GC_SHIFT_BITS))-1; //ctable->list = NULL; ctable->structs = (mgcliststruct_t*)RUNMALLOC_I(1*sizeof(mgcliststruct_t)); ctable->numelements = 0; // Initial number of elements in the hash @@ -177,7 +178,7 @@ void mgchashInsert_I(mgchashtable_t * tbl, void * key, void *val) { mgchashResize_I(tbl, newsize); } - ptr = &tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>6]; + ptr = &tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>(GC_SHIFT_BITS)]; tbl->numelements++; if(ptr->key==0) { @@ -212,7 +213,8 @@ void mgchashInsert_I(mgchashtable_t * tbl, void * key, void *val) { // Search for an address for a given oid INLINE void * mgchashSearch(mgchashtable_t * tbl, void * key) { //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE] - mgchashlistnode_t *node = &tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>6]; + mgchashlistnode_t *node = + &tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>(GC_SHIFT_BITS)]; do { if(node->key == key) { @@ -245,7 +247,7 @@ unsigned int mgchashResize(mgchashtable_t * tbl, unsigned int newsize) { tbl->table = node; //Update the global hashtable upon resize() tbl->size = newsize; tbl->threshold = newsize * tbl->loadfactor; - mask = tbl->mask = (newsize << 6) - 1; + mask = tbl->mask = (newsize << (GC_SHIFT_BITS)) - 1; //tbl->list = NULL; for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table @@ -260,7 +262,7 @@ unsigned int mgchashResize(mgchashtable_t * tbl, unsigned int newsize) { break; //key = val =0 for element if not present within the hash table } - index = (((unsigned INTPTR)key) & mask) >> 6; + index = (((unsigned INTPTR)key) & mask) >> (GC_SHIFT_BITS); tmp=&node[index]; next = curr->next; // Insert into the new table @@ -320,7 +322,7 @@ unsigned int mgchashResize_I(mgchashtable_t * tbl, unsigned int newsize) { tbl->table = node; //Update the global hashtable upon resize() tbl->size = newsize; tbl->threshold = newsize * tbl->loadfactor; - mask = tbl->mask = (newsize << 6)-1; + mask = tbl->mask = (newsize << (GC_SHIFT_BITS))-1; //tbl->list = NULL; for(i = 0; i < oldsize; i++) { //Outer loop for each bin in hash table @@ -335,7 +337,7 @@ unsigned int mgchashResize_I(mgchashtable_t * tbl, unsigned int newsize) { break; //key = val =0 for element if not present within the hash table } - index = (((unsigned INTPTR)key) & mask) >>6; + index = (((unsigned INTPTR)key) & mask) >> (GC_SHIFT_BITS); tmp=&node[index]; next = curr->next; // Insert into the new table @@ -426,7 +428,9 @@ int MGCHashadd(struct MGCHash * thisvar, int data) { unsigned int hashkey; struct MGCNode *ptr; - hashkey = (unsigned int)data % thisvar->size; + int mask = (thisvar->size << (GC_SHIFT_BITS))-1; + hashkey = (((unsigned INTPTR)data)&mask)>>(GC_SHIFT_BITS); + //hashkey = (unsigned int)data % thisvar->size; ptr = &thisvar->bucket[hashkey]; struct MGCNode * prev = NULL; @@ -476,7 +480,9 @@ int MGCHashadd_I(struct MGCHash * thisvar, int data) { unsigned int hashkey; struct MGCNode *ptr; - hashkey = (unsigned int)data % thisvar->size; + int mask = (thisvar->size << (GC_SHIFT_BITS))-1; + hashkey = (((unsigned INTPTR)data)&mask)>>(GC_SHIFT_BITS); + //hashkey = (unsigned int)data % thisvar->size; ptr = &thisvar->bucket[hashkey]; struct MGCNode * prev = NULL; @@ -502,7 +508,9 @@ int MGCHashadd_I(struct MGCHash * thisvar, int data) { #endif int MGCHashcontains(struct MGCHash *thisvar, int data) { - unsigned int hashkey = (unsigned int)data % thisvar->size; + int mask = (thisvar->size << (GC_SHIFT_BITS))-1; + unsigned int hashkey = (((unsigned INTPTR)data)&mask)>>(GC_SHIFT_BITS); + //unsigned int hashkey = (unsigned int)data % thisvar->size; struct MGCNode *ptr = thisvar->bucket[hashkey].next; struct MGCNode *prev = NULL; diff --git a/Robust/src/Runtime/mem.c b/Robust/src/Runtime/mem.c index 0277ab7b..2263113c 100644 --- a/Robust/src/Runtime/mem.c +++ b/Robust/src/Runtime/mem.c @@ -22,12 +22,13 @@ void * mycalloc_share(struct garbagelist * stackptr, int m, int size) { void * p = NULL; - int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK); + //int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK); + int isize = (size & (~(BAMBOO_CACHE_LINE_MASK))) + (BAMBOO_CACHE_LINE_SIZE); bool hasgc = false; memalloc: BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); #ifdef DEBUG - tprintf("ask for shared mem: %x \n", isize); + tprintf("ask for shared mem: %x, %x, %x \n", isize, size, BAMBOO_CACHE_LINE_MASK); #endif p = BAMBOO_SHARE_MEM_CALLOC_I(m, isize); // calloc(m, isize); #ifdef DEBUG @@ -97,7 +98,8 @@ void mycalloc_free_ngc_I(void * ptr) { void * mycalloc_share(int m, int size) { void * p = NULL; - int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK); + //int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK); + int isize = (size & (~(BAMBOO_CACHE_LINE_MASK))) + (BAMBOO_CACHE_LINE_SIZE); BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); p = BAMBOO_SHARE_MEM_CALLOC_I(m, isize); // calloc(m, isize); if(p == NULL) { diff --git a/Robust/src/Runtime/multicoregarbage.c b/Robust/src/Runtime/multicoregarbage.c index 59525ed0..aeadde7e 100644 --- a/Robust/src/Runtime/multicoregarbage.c +++ b/Robust/src/Runtime/multicoregarbage.c @@ -666,7 +666,11 @@ inline void initGC() { freeRuntimeHash(gcpointertbl); gcpointertbl = allocateRuntimeHash(20); #else + // TODO + //tprintf("++local hash table element: %x \n", gcpointertbl->numelements); mgchashreset(gcpointertbl); + // TODO + //tprintf("==local hash table element: %x \n", gcpointertbl->numelements); #endif //gcpointertbl = allocateMGCHash(20); @@ -682,7 +686,9 @@ inline void initGC() { } // Zero out the remaining bamboo_cur_msp // Only zero out the first 4 bytes of the remaining memory - if((bamboo_cur_msp != 0) && (bamboo_smem_zero_top == bamboo_cur_msp)) { + if((bamboo_cur_msp != 0) + && (bamboo_smem_zero_top == bamboo_cur_msp) + && (bamboo_smem_size > 0)) { *((int *)bamboo_cur_msp) = 0; } #ifdef GC_PROFILE @@ -697,8 +703,10 @@ inline void initGC() { gc_num_freespace = 0; gc_num_lobj = 0; gc_num_lobjspace = 0; -#endif -#ifdef GC_PROFILE_S +//#endif +//#ifdef GC_PROFILE_S + gc_num_liveobj = 0; + gc_num_forwardobj = 0; gc_num_profiles = NUMCORESACTIVE - 1; #endif } // void initGC() @@ -714,6 +722,7 @@ inline int loadbalance(int * heaptop) { tloads += gcloads[i]; } *heaptop = gcbaseva + tloads; + #ifdef DEBUG BAMBOO_DEBUGPRINT(0xdddd); BAMBOO_DEBUGPRINT_REG(tloads); @@ -802,12 +811,13 @@ inline bool cacheLObjs() { #endif // check if there are enough space to cache these large objs INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize; - if(gcheaptop > dst) { + if((unsigned long long)gcheaptop > (unsigned long long)dst) { // do not have enough room to cache large objs #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe802); BAMBOO_DEBUGPRINT_REG(dst); BAMBOO_DEBUGPRINT_REG(gcheaptop); + BAMBOO_DEBUGPRINT_REG(sumsize); #endif return false; } @@ -1232,9 +1242,9 @@ inline void markObj(void * objptr) { /* marktime += BAMBOO_GET_EXE_TIME() - ttime; num_markrequest++;*/ -#ifdef GC_PROFILE_S +//#ifdef GC_PROFILE_S gc_num_forwardobj++; -#endif // GC_PROFILE_S +//#endif // GC_PROFILE_S #endif // GC_PROFILE gcself_numsendobjs++; MGCHashadd(gcforwardobjtbl, (int)objptr); @@ -1988,7 +1998,7 @@ innermoveobj: #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe204); #endif -#ifdef GC_PROFILE_S +#ifdef GC_PROFILE//_S gc_num_liveobj++; #endif // marked obj, copy it to current heap top @@ -2827,7 +2837,7 @@ inline void gc_collect(struct garbagelist * stackptr) { printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(), udn_tile_coord_y()); #endif -#ifdef GC_PROFILE_S +#ifdef GC_PROFILE//_S /*BAMBOO_DEBUGPRINT(0xaaaa); BAMBOO_DEBUGPRINT_REG(gc_num_obj); BAMBOO_DEBUGPRINT_REG(gc_num_liveobj); @@ -2838,6 +2848,7 @@ inline void gc_collect(struct garbagelist * stackptr) { send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj, gc_num_liveobj, gc_num_forwardobj, false); } + gc_num_obj = 0; #endif // GC_PROFLIE_S flush(stackptr); #ifdef RAWPATH // TODO GC_DEBUG @@ -2891,7 +2902,7 @@ inline void gc_nocollect(struct garbagelist * stackptr) { printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(), udn_tile_coord_y()); #endif -#ifdef GC_PROFILE_S +#ifdef GC_PROFILE//_S /*BAMBOO_DEBUGPRINT(0xaaaa); BAMBOO_DEBUGPRINT_REG(gc_num_obj); BAMBOO_DEBUGPRINT_REG(gc_num_liveobj); @@ -2901,6 +2912,7 @@ inline void gc_nocollect(struct garbagelist * stackptr) { send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj, gc_num_liveobj, gc_num_forwardobj, false); } + gc_num_obj = 0; #endif // GC_PROFLIE_S flush(stackptr); #ifdef RAWPATH // TODO GC_DEBUG @@ -3290,7 +3302,8 @@ inline void gc(struct garbagelist * stackptr) { udn_tile_coord_y()); //dumpSMem(); #endif -#ifdef GC_PROFILE_S + //BAMBOO_DEBUGPRINT(0x1111); // TODO +/*#ifdef GC_PROFILE_S BAMBOO_DEBUGPRINT(0xaaaa); BAMBOO_DEBUGPRINT_REG(gc_num_obj); BAMBOO_DEBUGPRINT_REG(gc_num_liveobj); @@ -3303,7 +3316,10 @@ inline void gc(struct garbagelist * stackptr) { BAMBOO_DEBUGPRINT_REG(gc_num_freespace); BAMBOO_DEBUGPRINT(0xaaad); } -#endif // GC_PROFLIE_S + gc_num_obj = gc_num_liveobj; + gc_num_liveobj = 0; + gc_num_forwardobj = 0; +#endif // GC_PROFLIE_S*/ } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) { gcprocessing = true; gc_collect(stackptr); @@ -3353,6 +3369,9 @@ inline void gc_profileEnd(void) { gcInfo->time[gcInfo->index++] = gc_num_freespace; gcInfo->time[gcInfo->index++] = gc_num_lobj; gcInfo->time[gcInfo->index++] = gc_num_lobjspace; + gcInfo->time[gcInfo->index++] = gc_num_obj; + gcInfo->time[gcInfo->index++] = gc_num_liveobj; + gcInfo->time[gcInfo->index++] = gc_num_forwardobj; gc_infoIndex++; if(gc_infoIndex == GCINFOLENGTH) { gc_infoOverflow = true; @@ -3363,7 +3382,7 @@ inline void gc_profileEnd(void) { // output the profiling data void gc_outputProfileData() { -#ifdef USEIO +/*#ifdef USEIO int i,j; unsigned long long totalgc = 0; @@ -3386,18 +3405,23 @@ void gc_outputProfileData() { } printf("\n\n total gc time: %lld \n", totalgc); -#else +#else*/ int i = 0; int j = 0; unsigned long long totalgc = 0; +#ifndef BAMBOO_MEMPROF BAMBOO_DEBUGPRINT(0xdddd); +#endif // output task related info for(i= 0; i < gc_infoIndex; i++) { GCInfo * gcInfo = gc_infoArray[i]; - unsigned long long tmp = 0; +#ifdef BAMBOO_MEMPROF + unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0; +#else + unsigned long long tmp = 0; BAMBOO_DEBUGPRINT(0xddda); - for(j = 0; j < gcInfo->index - 4; j++) { + for(j = 0; j < gcInfo->index - 7; j++) { BAMBOO_DEBUGPRINT(gcInfo->time[j]); BAMBOO_DEBUGPRINT(gcInfo->time[j]-tmp); BAMBOO_DEBUGPRINT(0xdddb); @@ -3406,22 +3430,30 @@ void gc_outputProfileData() { tmp = (tmp-gcInfo->time[0]); BAMBOO_DEBUGPRINT_REG(tmp); BAMBOO_DEBUGPRINT(0xdddc); + BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 7]); + BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 6]); + BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 5]); BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 4]); BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 3]); BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 2]); BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 1]); BAMBOO_DEBUGPRINT(0xddde); +#endif totalgc += tmp; } +#ifndef BAMBOO_MEMPROF BAMBOO_DEBUGPRINT(0xdddf); +#endif BAMBOO_DEBUGPRINT_REG(totalgc); if(gc_infoOverflow) { BAMBOO_DEBUGPRINT(0xefee); } +#ifndef BAMBOO_MEMPROF BAMBOO_DEBUGPRINT(0xeeee); #endif +//#endif } #endif // #ifdef GC_PROFILE diff --git a/Robust/src/Runtime/multicoregarbage.h b/Robust/src/Runtime/multicoregarbage.h index 5e5849f8..b7f73b9a 100644 --- a/Robust/src/Runtime/multicoregarbage.h +++ b/Robust/src/Runtime/multicoregarbage.h @@ -27,7 +27,7 @@ #define GCINFOLENGTH 100 typedef struct gc_info { - unsigned long long time[12]; + unsigned long long time[15]; int index; } GCInfo; @@ -43,12 +43,12 @@ unsigned int gc_num_lobj; /*unsigned long long flushstalltime; unsigned long long flushstalltime_i; int num_mapinforequest_i;*/ -#ifdef GC_PROFILE_S +//#ifdef GC_PROFILE_S unsigned int gc_num_liveobj; unsigned int gc_num_obj; unsigned int gc_num_forwardobj; int gc_num_profiles; -#endif // GC_PROFILE_S +//#endif // GC_PROFILE_S #endif // GC_PROFILE diff --git a/Robust/src/Runtime/multicoreruntime.c b/Robust/src/Runtime/multicoreruntime.c index 7d278e2c..1746c33f 100644 --- a/Robust/src/Runtime/multicoreruntime.c +++ b/Robust/src/Runtime/multicoreruntime.c @@ -212,15 +212,15 @@ void CALL01(___System______printString____L___String___,struct ___String___ * __ void * allocate_new(void * ptr, int type) { struct ___Object___ * v=(struct ___Object___ *)FREEMALLOC((struct garbagelist *) ptr, classsize[type]); #ifdef DEBUG - printf("(%x,%x): new object: %x \n", udn_tile_coord_x(), - udn_tile_coord_y(), v); + printf("(%x,%x): new object: %x (%d, %x) \n", udn_tile_coord_x(), + udn_tile_coord_y(), (int)v, type, classsize[type]); #endif v->type=type; v->version = 0; v->lock = NULL; v->lockcount = 0; initlock(v); -#ifdef GC_PROFILE_S +#ifdef GC_PROFILE//_S extern unsigned int gc_num_obj; gc_num_obj++; #endif @@ -232,8 +232,9 @@ void * allocate_new(void * ptr, int type) { struct ArrayObject * allocate_newarray(void * ptr, int type, int length) { struct ArrayObject * v=(struct ArrayObject *)FREEMALLOC((struct garbagelist *) ptr, sizeof(struct ArrayObject)+length*classsize[type]); #ifdef DEBUG - printf("(%x,%x): new array object: %x \n", udn_tile_coord_x(), - udn_tile_coord_y(), v); + printf("(%x,%x): new array object: %x (%d, %x)\n", udn_tile_coord_x(), + udn_tile_coord_y(), (int)v, type, + sizeof(struct ArrayObject)+length*classsize[type]); #endif v->type=type; v->version = 0; @@ -243,7 +244,7 @@ struct ArrayObject * allocate_newarray(void * ptr, int type, int length) { } v->___length___=length; initlock(v); -#ifdef GC_PROFILE_S +#ifdef GC_PROFILE//_S extern unsigned int gc_num_obj; gc_num_obj++; #endif @@ -316,6 +317,8 @@ void failedboundschk() { #ifndef MULTICORE printf("Array out of bounds\n"); longjmp(error_handler,2); +#else + BAMBOO_EXIT(0xa001); #endif #endif } diff --git a/Robust/src/Runtime/multicoreruntime.h b/Robust/src/Runtime/multicoreruntime.h index 304f2ced..80e77da3 100644 --- a/Robust/src/Runtime/multicoreruntime.h +++ b/Robust/src/Runtime/multicoreruntime.h @@ -213,7 +213,7 @@ typedef enum { GCLOBJREQUEST, // 0xF4 GCLOBJINFO, // 0xF5 GCLOBJMAPPING, // 0xF6 -#ifdef GC_PROFILE_S +#ifdef GC_PROFILE//_S GCPROFILES, // 0xF7 #endif #endif @@ -278,6 +278,13 @@ struct Queue * totransobjqueue; // queue to hold objs to be transferred #define BAMBOO_BASE_VA 0xd000000 #endif // MULTICORE_GC #endif // TILERA_BME + +#ifdef BAMBOO_MEMPROF +#define GC_BAMBOO_NUMCORES 56 +#else +#define GC_BAMBOO_NUMCORES 62 +#endif + #ifdef GC_DEBUG #include "structdefs.h" #define BAMBOO_NUM_PAGES (NUMCORES4GC*(2+1)+3) @@ -286,9 +293,9 @@ struct Queue * totransobjqueue; // queue to hold objs to be transferred #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) *(BAMBOO_NUM_PAGES)) #else #ifdef GC_LARGESHAREDHEAP -#define BAMBOO_NUM_PAGES (62*(2+7)) +#define BAMBOO_NUM_PAGES ((GC_BAMBOO_NUMCORES)*(2+2)) #else -#define BAMBOO_NUM_PAGES (62*(2+3)) //(15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G +#define BAMBOO_NUM_PAGES ((GC_BAMBOO_NUMCORES)*(2+3)) //(15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G #endif #ifdef GC_LARGEPAGESIZE #define BAMBOO_PAGE_SIZE (4 * 1024 * 1024) // (4096) @@ -410,6 +417,9 @@ INLINE void initCommunication(void); INLINE void fakeExecution(void); INLINE void terminate(void); INLINE void initlock(struct ___Object___ * v); +#ifdef BAMBOO_MEMPROF +INLINE void terminatememprof(void); +#endif // lock related functions bool getreadlock(void* ptr); diff --git a/Robust/src/Runtime/multicoretask.c b/Robust/src/Runtime/multicoretask.c index 84e54abf..f7a830b5 100644 --- a/Robust/src/Runtime/multicoretask.c +++ b/Robust/src/Runtime/multicoretask.c @@ -303,7 +303,7 @@ void initruntimedata() { -bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8); gcmem_mixed_usedmem = 0; #endif -#ifdef GC_PROFILE_S +#ifdef GC_PROFILE//_S gc_num_obj = 0; gc_num_liveobj = 0; gc_num_forwardobj = 0; @@ -599,7 +599,9 @@ void checkCoreStatus() { BAMBOO_DEBUGPRINT(BAMBOO_GET_EXE_TIME() - bamboo_start_time); //BAMBOO_DEBUGPRINT_REG(total_num_t6); // TODO for test +#ifndef BAMBOO_MEMPROF BAMBOO_DEBUGPRINT(0xbbbbbbbb); +#endif #endif // profile mode, send msgs to other cores to request pouring // out progiling data @@ -650,6 +652,9 @@ void checkCoreStatus() { #endif // gc_profile mode, ourput gc prfiling data +#ifdef BAMBOO_MEMPROF + //terminatememprof(); +#endif // #ifndef BAMBOO_MEMPROF #ifdef MULTICORE_GC #ifdef GC_PROFILE gc_outputProfileData(); @@ -698,6 +703,7 @@ inline void run(void * arg) { BAMBOO_DEBUGPRINT_REG(corenum); BAMBOO_DEBUGPRINT(STARTUPCORE); #endif + //BAMBOO_DEBUGPRINT(0xeeee); // TODO // initialize runtime data structures initruntimedata(); @@ -1940,7 +1946,7 @@ INLINE int checkMsgLength_I(int size) { #ifdef MULTICORE_GC case GCFINISHMARK: case GCMOVESTART: -#ifdef GC_PROFILE_S +#ifdef GC_PROFILE//_S case GCPROFILES: #endif #endif @@ -2892,7 +2898,7 @@ INLINE void processmsg_gclobjmapping_I() { mgcsharedhashInsert_I(gcsharedptbl, data1, data2); } -#ifdef GC_PROFILE_S +#ifdef GC_PROFILE//_S INLINE void processmsg_gcprofiles_I() { int data1 = msgdata[msgdataindex]; MSG_INDEXINC_I(); @@ -3181,7 +3187,7 @@ processmsg: break; } // case GCLOBJMAPPING -#ifdef GC_PROFILE_S +#ifdef GC_PROFILE//_S case GCPROFILES: { // received a gcprofiles msg processmsg_gcprofiles_I(); diff --git a/Robust/src/buildscript b/Robust/src/buildscript index 9424807f..54fd2b2c 100755 --- a/Robust/src/buildscript +++ b/Robust/src/buildscript @@ -52,7 +52,7 @@ echo "-gcsmallpagesize set the gc shared memory to use small page size (should b echo "-gclargepagesize set the gc shared memory to use large page size (should be used together with -multicoregc)" echo "-gclargesharedheap set the gc shared memory as large (should be used together with -multicoregc)" echo -gcprofile build with gcprofile options -echo -gcprofile_s build with gcprofile_s options +echo "-tilera_memprof build the memprof version (should be used together with -tilera_xx) " echo -accurateprofile build with accurate profile information including pre/post task processing info echo "-useio use standard io to output profiling data (should be used together with -raw and -profile), it only works with single core version" echo @@ -128,6 +128,7 @@ RAWFLAG=false TILERAFLAG=false TILERABMEFLAG=false TILERAZLINUXFLAG=false +TILERAMEMPROF=false TILERACONFIG='' CACHEFLUSHFLAG=false RAWCONFIG='' @@ -363,6 +364,9 @@ elif [[ $1 = '-tilera_zlinux' ]] then TILERAFLAG=true TILERAZLINUXFLAG=true +elif [[ $1 = '-tilera_memprof' ]] +then +TILERAMEMPROFFLAG=true elif [[ $1 = '-tileraconfig' ]] then TILERACONFIG="$2" @@ -746,6 +750,7 @@ then # TILERAZLINUXFLAG TILERA_INDIR="ZLinux" MAKEFILE="Makefile.tilera.$TILERACONFIG" fi + mkdir $TILERADIR cd $TILERADIR make clean @@ -753,6 +758,12 @@ rm ./* export TILERACFLAGS="-DTASK -DMULTICORE -DCLOSE_PRINT -DTILERA" +if $TILERAMEMPROFFLAG +then # not only with 1 core + PCIHVC="$PCIHVC.memprof" + TILERACFLAGS="${TILERACFLAGS} -DBAMBOO_MEMPROF" +fi + if $TILERABMEFLAG then # TILERABMEFLAG TILERACFLAGS="${TILERACFLAGS} -DTILERA_BME" @@ -909,6 +920,10 @@ if $TILERAZLINUXFLAG then # TILERAZLINUXFLAG cp ../Tilera/Runtime/$TILERA_INDIR/*.S ./ fi +#if $TILERAMEMPROFFLAG +#then # TILERAMEMPROFFLAG +cp ../Tilera/Runtime/$TILERA_INDIR/linux_client.c ./ +#fi cp ../Tilera/lib/* ./ cp ../$tmpbuilddirectory/*.c ./ cp ../$tmpbuilddirectory/*.h ./ -- 2.34.1