Add code for change the cache policy for the mutator. In each round of gc, decide...

author jzhou <jzhou>

Wed, 15 Sep 2010 22:38:32 +0000 (22:38 +0000)

committer jzhou <jzhou>

Wed, 15 Sep 2010 22:38:32 +0000 (22:38 +0000)
author jzhou <jzhou>
Wed, 15 Sep 2010 22:38:32 +0000 (22:38 +0000)
committer jzhou <jzhou>
Wed, 15 Sep 2010 22:38:32 +0000 (22:38 +0000)
diff --git a/Robust/src/Runtime/bamboo/multicorecache.h b/Robust/src/Runtime/bamboo/multicorecache.h

new file mode 100644 (file)

index 0000000..ee85342
--- /dev/null
+++ b/Robust/src/Runtime/bamboo/multicorecache.h
@@ -0,0 +1,41 @@
+#ifndef BAMBOO_MULTICORE_CACHE_H
+#define BAMBOO_MULTICORE_CACHE_H
+
+#ifdef MULTICORE_GC
+#ifdef GC_CACHE_ADAPT
+#define GC_TILE_TIMER_EVENT_SETTING 100000000 // should be consistent with 
+                                              // runtime_arch.h
+#define GC_NUM_SAMPLING 24
+#define GC_CACHE_ADAPT_HOTPAGE_THRESHOLD 1000
+#define GC_CACHE_ADAPT_ACCESS_THRESHOLD  30
+
+// should be consistent with multicoreruntime.h
+typedef union
+{
+  unsigned int word;
+  struct
+  {
+    // policy type
+    unsigned int cache_mode   : 2;
+       // Reserved.
+    unsigned int __reserved_0 : 6;
+       // Location Override Target Y
+    unsigned int lotar_y      : 4;
+    // Reserved.
+    unsigned int __reserved_1 : 4;
+    // Location Override Target X
+    unsigned int lotar_x      : 4;
+    // Reserved.
+    unsigned int __reserved_2 : 12;
+  };
+} bamboo_cache_policy_t;
+
+#define BAMBOO_CACHE_MODE_LOCAL 0
+#define BAMBOO_CACHE_MODE_HASH 1
+#define BAMBOO_CACHE_MODE_NONE 2
+#define BAMBOO_CACHE_MODE_COORDS 3
+
+#endif // GC_CACHE_ADAPT
+#endif // MULTICORE_GC
+
+#endif
diff --git a/Robust/src/Runtime/bamboo/multicoregarbage.c b/Robust/src/Runtime/bamboo/multicoregarbage.c

index c0027dee9facdd45c68945c72c9aad200728094c..530a289e2df19968ad9e6b6f062412bd30a7d4fe 100644 (file)
--- a/Robust/src/Runtime/bamboo/multicoregarbage.c
+++ b/Robust/src/Runtime/bamboo/multicoregarbage.c
@@ -8,13 +8,6 @@
  #include "ObjectHash.h"
  #include "GCSharedHash.h"
  
-// TODO for profiling the flush phase
-#ifdef GC_PROFILE
-/*int num_mapinforequest;
-int num_markrequest;
-unsigned long long marktime;*/
-#endif
-
  extern int corenum;
  extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
  extern int numqueues[][NUMCLASSES];
@@ -64,6 +57,19 @@ struct lobjpointerblock *gclobjtail2=NULL;
  int gclobjtailindex2=0;
  struct lobjpointerblock *gclobjspare=NULL;
  
+#ifdef GC_CACHE_ADAPT
+typedef struct gc_cache_revise_info {
+  int orig_page_start_va;
+  int orig_page_end_va;
+  int orig_page_index;
+  int to_page_start_va;
+  int to_page_end_va;
+  int to_page_index;
+  int revised_sampling[NUMCORESACTIVE];
+} gc_cache_revise_info_t;
+gc_cache_revise_info_t gc_cache_revise_infomation;
+#endif// GC_CACHE_ADAPT
+
  #ifdef GC_DEBUG
  // dump whole mem in blocks
  inline void dumpSMem() {
@@ -688,13 +694,6 @@ inline void initGC() {
         *((int *)bamboo_cur_msp) = 0;
    }*/
  #ifdef GC_PROFILE
-  // TODO
-  /*num_mapinforequest = 0;
-  num_mapinforequest_i = 0;
-  flushstalltime = 0;
-  flushstalltime_i = 0;
-  num_markrequest = 0;
-  marktime = 0;*/
    gc_num_livespace = 0;
    gc_num_freespace = 0;
    gc_num_lobj = 0;
@@ -1114,8 +1113,6 @@ inline void moveLObjs() {
                 }
                 // fill the remaining space with -2 padding
                 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
-               // zero out original mem caching the lobj
-               //BAMBOO_MEMSET_WH(gcheaptop, '\0', size); // TODO ??
  #ifdef DEBUG
                 BAMBOO_DEBUGPRINT(0xea06);
                 BAMBOO_DEBUGPRINT_REG(gcheaptop);
@@ -1188,14 +1185,6 @@ inline void moveLObjs() {
      }
    } while(true);
  
-  // TODO
-  /*unsigned long long gc_num_livespace = 0;
-  for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
-       gc_num_livespace += bamboo_smemtbl[tmpi];
-  }
-  BAMBOO_DEBUGPRINT_REG(gc_num_livespace);
-  BAMBOO_DEBUGPRINT_REG(bamboo_free_block);*/
-
  #ifdef GC_PROFILE
    // check how many live space there are
    gc_num_livespace = 0;
@@ -1235,16 +1224,9 @@ inline void markObj(void * objptr) {
  #endif
        // check if this obj has been forwarded
        if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
-#ifdef GC_PROFILE
-               // TODO unsigned long long ttime = BAMBOO_GET_EXE_TIME();
-#endif
                 // send a msg to host informing that objptr is active
                 send_msg_2(host, GCMARKEDOBJ, objptr, /*BAMBOO_NUM_OF_CORE,*/ false);
  #ifdef GC_PROFILE
-               // TODO
-               /*
-               marktime += BAMBOO_GET_EXE_TIME() - ttime;
-               num_markrequest++;*/
                 gc_num_forwardobj++;
  #endif // GC_PROFILE
                 gcself_numsendobjs++;
@@ -1845,7 +1827,7 @@ innernextSBlock:
  #endif
      // not start from the very beginning
      orig->blockbase = gcsbstarttbl[orig->sblockindex];
-  }       // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
+  }  // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
  
    // setup information for this sblock
    orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
@@ -1882,6 +1864,15 @@ inline bool initOrig_Dst(struct moveHelper * orig,
    BAMBOO_DEBUGPRINT_REG(to->base);
  #endif
    to->ptr = to->base + to->offset;
+#ifdef GC_CACHE_ADAPT
+  // initialize the gc_cache_revise_information
+  gc_cache_revise_infomation.to_page_start_va = to->ptr;
+  gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
+       ((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+  gc_cache_revise_infomation.to_page_index = 
+       (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
+  gc_cache_revise_infomation.orig_page_start_va = -1; 
+#endif // GC_CACHE_ADAPT
  
    // init the orig ptr
    orig->numblocks = 0;
@@ -1924,11 +1915,12 @@ inline bool initOrig_Dst(struct moveHelper * orig,
    BAMBOO_DEBUGPRINT(0xef06);
    BAMBOO_DEBUGPRINT_REG(orig->base);
  #endif
+
    return true;
  } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
  
  inline void nextBlock(struct moveHelper * to) {
-  to->top = to->bound + BAMBOO_CACHE_LINE_SIZE;       // header!
+  to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
    to->bound += BAMBOO_SMEM_SIZE;
    to->numblocks++;
    BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
@@ -1945,11 +1937,9 @@ inline bool moveobj(struct moveHelper * orig,
    }
  
  #ifdef DEBUG
-  //if((int)orig->ptr > 0x10767a00) {
    BAMBOO_DEBUGPRINT(0xe201);
    BAMBOO_DEBUGPRINT_REG(orig->ptr);
    BAMBOO_DEBUGPRINT_REG(to->ptr);
-  //}
  #endif
  
    int type = 0;
@@ -1960,6 +1950,32 @@ innermoveobj:
    while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
      orig->ptr = (int*)(orig->ptr) + 1;
    }
+#ifdef GC_CACHE_ADAPT
+  if(orig->ptr >= gc_cache_revise_infomation.orig_page_end_va) {
+       // end of an orig page
+       // compute the impact of this page for the new page
+       float tmp_factor = 
+         ((float)(to->ptr-gc_cache_revise_infomation.to_page_start_va))/
+         ((float)(BAMBOO_PAGE_SIZE));
+       for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
+         ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
+                )[gc_cache_revise_infomation.to_page_index] += (int)(
+                  ((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
+               gc_cache_revise_infomation.orig_page_index]*tmp_factor);
+         // TODO
+/*       if(((gc_cache_revise_infomation.orig_page_start_va-gcbaseva)/(BAMBOO_PAGE_SIZE))*(BAMBOO_PAGE_SIZE)+gcbaseva == 0xd180000) {
+               tprintf("0xd180000 -> %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
+         }*/
+       }
+       // prepare for an new orig page
+       gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
+       gc_cache_revise_infomation.orig_page_end_va = gcbaseva + 
+         (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+       gc_cache_revise_infomation.orig_page_index = 
+         (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
+       gc_cache_revise_infomation.to_page_start_va = to->ptr;
+  }
+#endif
    if((orig->ptr >= orig->bound) || (orig->ptr == orig->blockbound)) {
      if(!nextSBlock(orig)) {
        // finished, no more data
@@ -2003,9 +2019,7 @@ innermoveobj:
                                   // should be able to across it
    if((mark & MARKED) != 0) {
  #ifdef DEBUG
-//if((int)orig->ptr > 0x10760f00) {
      BAMBOO_DEBUGPRINT(0xe204);
-//}
  #endif
  #ifdef GC_PROFILE
         gc_num_liveobj++;
@@ -2019,7 +2033,39 @@ innermoveobj:
        to->offset += to->bound - to->top;
        BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
        (*((int*)(to->base))) = to->offset;
+#ifdef GC_CACHE_ADAPT
+         int tmp_ptr = to->ptr;
+#endif // GC_CACHE_ADAPT
        nextBlock(to);
+#ifdef GC_CACHE_ADAPT
+         if((to->base+to->bound) >= gc_cache_revise_infomation.to_page_end_va) {
+         // end of an to page, wrap up its information
+         float tmp_factor = 
+               ((float)(tmp_ptr-gc_cache_revise_infomation.to_page_start_va))/
+               ((float)(BAMBOO_PAGE_SIZE));
+         for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
+               ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
+                )[gc_cache_revise_infomation.to_page_index] += (int)(
+                 ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
+                 gc_cache_revise_infomation.orig_page_index]*tmp_factor);
+               // TODO
+/*       if(((gc_cache_revise_infomation.orig_page_start_va-gcbaseva)/(BAMBOO_PAGE_SIZE))*(BAMBOO_PAGE_SIZE)+gcbaseva == 0xd180000) {
+               tprintf("0xd180000 -> %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
+         }*/
+         }
+         // prepare for an new to page
+         gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
+         gc_cache_revise_infomation.orig_page_end_va = gcbaseva + 
+               (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+         gc_cache_revise_infomation.orig_page_index = 
+               (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
+         gc_cache_revise_infomation.to_page_start_va = to->ptr;
+         gc_cache_revise_infomation.to_page_end_va = gcbaseva + 
+               (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+         gc_cache_revise_infomation.to_page_index = 
+               (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
+       }
+#endif // GC_CACHE_ADAPT
        if(stopblock == to->numblocks) {
                 // already fulfilled the block
                 return true;
@@ -2047,18 +2093,11 @@ innermoveobj:
  #endif
         //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr);
         if(isremote) {
-#ifdef GC_PROFILE
-       //unsigned long long ttimet = BAMBOO_GET_EXE_TIME();
-#endif
           // add to the sharedptbl
           if(gcsharedptbl != NULL) {
                 //GCSharedHashadd_I(gcsharedptbl, orig->ptr, to->ptr);
                 mgcsharedhashInsert_I(gcsharedptbl, orig->ptr, to->ptr);
-               //num_mapinforequest++; // TODO
           }
-#ifdef GC_PROFILE
-       //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
-#endif
         }
      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
      //}
@@ -2072,13 +2111,45 @@ innermoveobj:
      to->ptr += isize;
      to->offset += isize;
      to->top += isize;
+#ifdef GC_CACHE_ADAPT
+       int tmp_ptr = to->ptr;
+#endif // GC_CACHE_ADAPT
      if(to->top == to->bound) {
        // fill the header of this block and then go to next block
        BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
        (*((int*)(to->base))) = to->offset;
        nextBlock(to);
      }
-  }       // if(mark == 1)
+#ifdef GC_CACHE_ADAPT
+       if((to->base+to->bound) >= gc_cache_revise_infomation.to_page_end_va) {
+         // end of an to page, wrap up its information
+         float tmp_factor = 
+               ((float)(tmp_ptr-gc_cache_revise_infomation.to_page_start_va))/
+               ((float)(BAMBOO_PAGE_SIZE));
+         for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
+               ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
+                )[gc_cache_revise_infomation.to_page_index] += (int)(
+                 ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
+                 gc_cache_revise_infomation.orig_page_index]*tmp_factor);
+      // TODO
+/*       if(((gc_cache_revise_infomation.orig_page_start_va-gcbaseva)/(BAMBOO_PAGE_SIZE))*(BAMBOO_PAGE_SIZE)+gcbaseva == 0xd180000) {
+               tprintf("0xd180000 -> %x %d, %d, %d\n",(int)(gcbaseva+(BAMBOO_PAGE_SIZE)*gc_cache_revise_infomation.to_page_index), (int)(((int*)((void *)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[gc_cache_revise_infomation.orig_page_index]*tmp_factor), (int)(tmp_factor*100000), (int)(to->ptr-gc_cache_revise_infomation.to_page_start_va));
+         }*/
+         }
+         // prepare for an new to page
+         gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
+         gc_cache_revise_infomation.orig_page_end_va = gcbaseva + 
+               (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+         gc_cache_revise_infomation.orig_page_index = 
+               (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
+         gc_cache_revise_infomation.to_page_start_va = to->ptr;
+         gc_cache_revise_infomation.to_page_end_va = gcbaseva + 
+               (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+         gc_cache_revise_infomation.to_page_index = 
+               (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
+       }
+#endif // GC_CACHE_ADAPT
+  } // if(mark == 1)
  #ifdef DEBUG
    BAMBOO_DEBUGPRINT(0xe205);
  #endif
@@ -2173,6 +2244,18 @@ innercompact:
        break;
      }
    }
+#ifdef GC_CACHE_ADAPT
+       // end of an to page, wrap up its information
+       float tmp_factor = 
+         ((float)(to->ptr-gc_cache_revise_infomation.to_page_start_va))/
+         ((float)(BAMBOO_PAGE_SIZE));
+       for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
+         ((int*)((void*)gccachesamplingtbl_r+tt*size_cachesamplingtbl_local_r)
+          )[gc_cache_revise_infomation.to_page_index] += (int)(
+               ((int*)((void*)gccachesamplingtbl+tt*size_cachesamplingtbl_local))[
+               gc_cache_revise_infomation.orig_page_index]*tmp_factor);
+       }
+#endif // GC_CACHE_ADAPT
    // if no objs have been compact, do nothing,
    // otherwise, fill the header of this block
    if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
@@ -2182,7 +2265,7 @@ innercompact:
      to->offset = 0;
      to->ptr = to->base;
      to->top -= BAMBOO_CACHE_LINE_SIZE;
-  }       // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
+  }  // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
    if(*localcompact) {
      *heaptopptr = to->ptr;
      *filledblocks = to->numblocks;
@@ -2284,6 +2367,19 @@ innercompact:
      } else {
        *localcompact = false;
      }
+#ifdef GC_CACHE_ADAPT
+       // initialize the gc_cache_revise_information
+       gc_cache_revise_infomation.to_page_start_va = to->ptr;
+       gc_cache_revise_infomation.to_page_end_va = gcbaseva + 
+         (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+       gc_cache_revise_infomation.to_page_index = 
+         (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
+       gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
+       gc_cache_revise_infomation.orig_page_end_va = gcbaseva + 
+         (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+       gc_cache_revise_infomation.orig_page_index = 
+         orig->blockbase/(BAMBOO_PAGE_SIZE);
+#endif // GC_CACHE_ADAPT
      goto innercompact;
    }
  #ifdef DEBUG
@@ -2316,6 +2412,13 @@ inline void compact() {
      RUNFREE(to);
      return;
    }
+#ifdef GC_CACHE_ADAPT
+  gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
+  gc_cache_revise_infomation.orig_page_end_va = gcbaseva +  
+       (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+  gc_cache_revise_infomation.orig_page_index = 
+       orig->blockbase/(BAMBOO_PAGE_SIZE);
+#endif
  
    int filledblocks = 0;
    INTPTR heaptopptr = 0;
@@ -2407,14 +2510,6 @@ inline void * flushObj(void * objptr) {
                   gcobj2map = (int)objptr;
                   gcismapped = false;
                   gcmappedobj = NULL;
-#ifdef GC_PROFILE
-                 // TODO
-                 //num_mapinforequest++;
-                 //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
-#endif
-#ifdef GC_PROFILE
-                 //unsigned long long ttimet = BAMBOO_GET_EXE_TIME();
-#endif
                   // the first time require the mapping, send msg to the hostcore
                   // for the mapping info
                   send_msg_3(hostc, GCMAPREQUEST, (int)objptr,
@@ -2426,10 +2521,6 @@ inline void * flushObj(void * objptr) {
                   }
  #ifdef GC_PROFILE
                   //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
-#endif
-#ifdef GC_PROFILE
-                 // TODO
-                 //flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
  #endif
                   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
  #ifdef LOCALHASHTBL_TEST
@@ -2571,17 +2662,8 @@ inline void transmappinginfo() {
  }
  
  inline void flush(struct garbagelist * stackptr) {
-#ifdef GC_PROFILE
-  /* TODO if(BAMBOO_NUM_OF_CORE == 0) {
-    BAMBOO_DEBUGPRINT(0xcccc);
-    BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
-  }*/
-#endif
  
    flushRuntimeObj(stackptr);
-#ifdef GC_PROFILE
-  // TODO if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
-#endif
  
    while(true) {
      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
@@ -2678,9 +2760,6 @@ inline void flush(struct garbagelist * stackptr) {
  #ifdef DEBUG
    BAMBOO_DEBUGPRINT(0xe308);
  #endif
-#ifdef GC_PROFILE
-  // TODO if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
-#endif
  
    // TODO bug here: the startup core contains all lobjs' info, thus all the
    // lobjs are flushed in sequence.
@@ -2767,9 +2846,6 @@ inline void flush(struct garbagelist * stackptr) {
  #ifdef DEBUG
    BAMBOO_DEBUGPRINT(0xe310);
  #endif
-#ifdef GC_PROFILE
-  // TODO if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
-#endif
  
    // send flush finish message to core coordinator
    if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
@@ -2777,17 +2853,6 @@ inline void flush(struct garbagelist * stackptr) {
    } else {
      send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
    }
-#ifdef GC_PROFILE
-  // TODO 
-  //if(BAMBOO_NUM_OF_CORE == 0) {
-    //BAMBOO_DEBUGPRINT(0xffff);
-    //BAMBOO_DEBUGPRINT_REG(num_mapinforequest);
-    //BAMBOO_DEBUGPRINT_REG(flushstalltime);
-    //BAMBOO_DEBUGPRINT_REG(num_mapinforequest_i);
-    //BAMBOO_DEBUGPRINT_REG(flushstalltime_i);
-  //}
-  //BAMBOO_DEBUGPRINT_REG(flushstalltime);
-#endif
  #ifdef DEBUG
    BAMBOO_DEBUGPRINT(0xe311);
  #endif
@@ -2798,7 +2863,7 @@ inline void flush(struct garbagelist * stackptr) {
  //   -- flush the shared heap
  //   -- clean dtlb entries
  //   -- change cache strategy
-void cacheAdapt(bool isgccachestage) {
+void cacheAdapt_gc(bool isgccachestage) {
    // flush the shared heap
    BAMBOO_CACHE_FLUSH_L2();
  
@@ -2807,11 +2872,94 @@ void cacheAdapt(bool isgccachestage) {
  
    // change the cache strategy
    gccachestage = isgccachestage;
+} // cacheAdapt_gc(bool isgccachestage)
+
+// the master core decides how to adapt cache strategy for the mutator 
+// according to collected statistic data
+extern int gc_num_sampling;
+void cacheAdapt_master() {
+  // check the statistic data
+  // for each page, decide the new cache strategy
+  unsigned int page_index = 0;
+  VA page_sva = 0;
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  int numchanged = 0;
+  int * tmp_p = gccachepolicytbl+1;
+  int hottestcore = 0;
+  int num_hotcore = 0;
+  int hotfreq = 0;
+  for(page_index = 0; page_index < page_num; page_index++) {
+       hottestcore = 0;
+       num_hotcore = 0;
+       hotfreq = 0;
+       page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+       for(int i = 0; i < NUMCORESACTIVE; i++) {
+         int * local_tbl = (int *)((void *)gccachesamplingtbl_r
+                 +page_num*sizeof(float)*i);
+         int freq = local_tbl[page_index];
+         // TODO
+/*       if(page_sva == 0xd180000) {
+               tprintf("%x %d %d\n", (int)page_sva, i, (int)(freq*100000));
+         }*/
+         // TODO
+         // check the freqency, decide if this page is hot for the core
+         if(hotfreq < freq) {
+               hotfreq = freq;
+               hottestcore = i;
+         }
+         if(freq > GC_CACHE_ADAPT_HOTPAGE_THRESHOLD) {
+               num_hotcore++;
+         }
+       }
+       // TODO
+       // Decide the cache strategy for this page
+       // If decide to adapt a new cache strategy, write into the shared block of
+       // the gcsharedsamplingtbl. The mem recording information that has been 
+       // written is enough to hold the information.
+       // Format: page start va + cache strategy(hfh/(host core+[x,y]))
+       if(hotfreq == 0) {
+         // this page has not been accessed, do not change its cache strategy
+         continue;
+       }
+       bamboo_cache_policy_t policy = {0};
+       if(num_hotcore > GC_CACHE_ADAPT_ACCESS_THRESHOLD) {
+         // use hfh
+         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
+       } else {
+         // locally cache the page in the hottest core
+         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
+         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
+         policy.lotar_x = bamboo_cpu2coords[2*hottestcore]+1;
+         policy.lotar_y = bamboo_cpu2coords[2*hottestcore+1]+1;
+       }
+       *tmp_p = page_index;
+       tmp_p++;
+       *tmp_p = policy.word;
+       tmp_p++;
+       numchanged++;
+  }
+  *gccachepolicytbl = numchanged;
+}
+
+// adapt the cache strategy for the mutator
+void cacheAdapt_mutator() {
+  int numchanged = *gccachepolicytbl;
+  // check the changes and adapt them
+  int * tmp_p = gccachepolicytbl+1;
+  while(numchanged--) {
+       // read out the policy
+       int page_index = *tmp_p;
+       bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
+       // adapt the policy
+       bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva, 
+               policy, BAMBOO_PAGE_SIZE);
+
+       tmp_p += 2;
+  }
  }
  #endif // GC_CACHE_ADAPT
  
  inline void gc_collect(struct garbagelist * stackptr) {
-  //BAMBOO_DEBUGPRINT(0xcccc); // TODO 
    // inform the master that this core is at a gc safe point and is ready to 
    // do gc
    send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs, 
@@ -2829,7 +2977,7 @@ inline void gc_collect(struct garbagelist * stackptr) {
    initGC();
  #ifdef GC_CACHE_ADAPT
    // prepare for cache adaption:
-  cacheAdapt(true);
+  cacheAdapt_gc(true);
  #endif // GC_CACHE_ADAPT
    //send init finish msg to core coordinator
    send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
@@ -2879,11 +3027,6 @@ inline void gc_collect(struct garbagelist * stackptr) {
              udn_tile_coord_y());
  #endif
  #ifdef GC_PROFILE
-  /*BAMBOO_DEBUGPRINT(0xaaaa);
-  BAMBOO_DEBUGPRINT_REG(gc_num_obj);
-  BAMBOO_DEBUGPRINT_REG(gc_num_liveobj);
-  BAMBOO_DEBUGPRINT_REG(gc_num_forwardobj);
-  BAMBOO_DEBUGPRINT(0xaaab);*/
    // send the num of obj/liveobj/forwardobj to the startupcore
    if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
         send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj, 
@@ -2907,7 +3050,9 @@ inline void gc_collect(struct garbagelist * stackptr) {
    printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(), 
              udn_tile_coord_y());
  #endif
-  cacheAdapt(false);
+  // cache adapt phase
+  cacheAdapt_mutator();
+  cacheAdapt_gc(false);
    //send init finish msg to core coordinator
    send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
  #ifdef RAWPATH // TODO GC_DEBUG
@@ -2927,7 +3072,6 @@ inline void gc_collect(struct garbagelist * stackptr) {
  } // void gc_collect(struct garbagelist * stackptr)
  
  inline void gc_nocollect(struct garbagelist * stackptr) {
-  //BAMBOO_DEBUGPRINT(0xcccc); // TODO
    // inform the master that this core is at a gc safe point and is ready to 
    // do gc
    send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs, 
@@ -2944,7 +3088,7 @@ inline void gc_nocollect(struct garbagelist * stackptr) {
    initGC();
  #ifdef GC_CACHE_ADAPT
    // prepare for cache adaption:
-  cacheAdapt(true);
+  cacheAdapt_gc(true);
  #endif // GC_CACHE_ADAPT
    //send init finish msg to core coordinator
    send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
@@ -2975,11 +3119,6 @@ inline void gc_nocollect(struct garbagelist * stackptr) {
              udn_tile_coord_y());
  #endif
  #ifdef GC_PROFILE
-  /*BAMBOO_DEBUGPRINT(0xaaaa);
-  BAMBOO_DEBUGPRINT_REG(gc_num_obj);
-  BAMBOO_DEBUGPRINT_REG(gc_num_liveobj);
-  BAMBOO_DEBUGPRINT_REG(gc_num_forwardobj);
-  BAMBOO_DEBUGPRINT(0xaaab);*/
    if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
         send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj, 
                 gc_num_liveobj, gc_num_forwardobj, false);
@@ -3002,7 +3141,9 @@ inline void gc_nocollect(struct garbagelist * stackptr) {
    printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(), 
              udn_tile_coord_y());
  #endif
-  cacheAdapt(false);
+  // cache adapt phase
+  cacheAdapt_mutator();
+  cacheAdapt_gc(false);
    //send init finish msg to core coordinator
    send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
  #ifdef RAWPATH // TODO GC_DEBUG
@@ -3039,7 +3180,7 @@ inline void gc_master(struct garbagelist * stackptr) {
  
  #ifdef GC_CACHE_ADAPT
    // prepare for cache adaption:
-  cacheAdapt(true);
+  cacheAdapt_gc(true);
  #endif // GC_CACHE_ADAPT
  
  #ifdef RAWPATH // TODO GC_DEBUG
@@ -3102,12 +3243,6 @@ inline void gc_master(struct garbagelist * stackptr) {
    }
  #ifdef GC_PROFILE
    gc_profileItem();
-  // TODO
-  /*if(BAMBOO_NUM_OF_CORE == 0) {
-       BAMBOO_DEBUGPRINT(0xeeee);
-       BAMBOO_DEBUGPRINT_REG(num_markrequest);
-       BAMBOO_DEBUGPRINT_REG(marktime);
-  }*/
  #endif
  #ifdef RAWPATH // TODO GC_DEBUG
    printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
@@ -3156,7 +3291,7 @@ inline void gc_master(struct garbagelist * stackptr) {
                 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
           } else {
                 gcblock2fill = numpbc;
-         }    // if(i != STARTUPCORE)
+         }  // if(i != STARTUPCORE)
         }
  #ifdef DEBUG
         BAMBOO_DEBUGPRINT(0xf000+i);
@@ -3354,6 +3489,9 @@ inline void gc_master(struct garbagelist * stackptr) {
  #endif
  
  #ifdef GC_CACHE_ADAPT
+  // now the master core need to decide the new cache strategy
+  cacheAdapt_master();
+
    gcphase = PREFINISHPHASE;
    gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
    // Note: all cores should flush their runtime data including non-gc
@@ -3370,8 +3508,9 @@ inline void gc_master(struct garbagelist * stackptr) {
    printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(), 
                  udn_tile_coord_y());
  #endif
-  // flush phase
-  cacheAdapt(false);
+  // cache adapt phase
+  cacheAdapt_mutator();
+  cacheAdapt_gc(false);
    gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
    while(PREFINISHPHASE == gcphase) {
         // check the status of all cores
@@ -3410,24 +3549,6 @@ inline void gc_master(struct garbagelist * stackptr) {
                  udn_tile_coord_y());
    //dumpSMem();
  #endif
-  //BAMBOO_DEBUGPRINT(0x1111); // TODO
-/*#ifdef GC_PROFILE_S
-  BAMBOO_DEBUGPRINT(0xaaaa);
-  BAMBOO_DEBUGPRINT_REG(gc_num_obj);
-  BAMBOO_DEBUGPRINT_REG(gc_num_liveobj);
-  BAMBOO_DEBUGPRINT_REG(gc_num_forwardobj);
-  BAMBOO_DEBUGPRINT_REG(gc_num_profiles);
-  BAMBOO_DEBUGPRINT(0xaaab);
-  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-       BAMBOO_DEBUGPRINT(0xaaac);
-       BAMBOO_DEBUGPRINT_REG(gc_num_livespace);
-       BAMBOO_DEBUGPRINT_REG(gc_num_freespace);
-       BAMBOO_DEBUGPRINT(0xaaad);
-  }
-  gc_num_obj = gc_num_liveobj;
-  gc_num_liveobj = 0;
-  gc_num_forwardobj = 0;
-#endif // GC_PROFLIE_S*/
  } // void gc_master(struct garbagelist * stackptr)
  
  inline bool gc(struct garbagelist * stackptr) {
@@ -3463,11 +3584,9 @@ inline bool gc(struct garbagelist * stackptr) {
           gcflag = true;
           return false;
         } else {
-         // TODO
  #ifdef GC_PROFILE
      gc_profileStart();
  #endif
-         //BAMBOO_DEBUGPRINT(0x1111); // TODO
  pregccheck:
           //BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
           gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
@@ -3501,7 +3620,6 @@ pregccheck:
                 // are some update pregc information coming and check it again
                 gcprecheck = false;
                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               //BAMBOO_DEBUGPRINT(0x2222); // TODO
                 while(true) {
                   if(gcprecheck) {
                         break;
@@ -3512,11 +3630,6 @@ pregccheck:
                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
           }
         }
-/*
-#ifdef GC_PROFILE
-    gc_profileStart();
-#endif
-*/
  #ifdef RAWPATH // TODO GC_DEBUG
      printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
      //dumpSMem();
@@ -3539,10 +3652,9 @@ pregccheck:
         }
  #endif
  #ifdef GC_CACHE_ADAPT
-    //BAMBOO_DEBUGPRINT(BAMBOO_GET_EXE_TIME());
      // disable the timer interrupt
      bamboo_mask_timer_intr();
-    // get the sampling data TODO
+    // get the sampling data 
      bamboo_output_dtlb_sampling();
  #endif // GC_CACHE_ADAPT
         gcprocessing = true;
@@ -3568,8 +3680,10 @@ pregccheck:
  #ifdef GC_CACHE_ADAPT
         // disable the timer interrupt
         bamboo_mask_timer_intr();
-       // get the sampling data TODO
-       bamboo_output_dtlb_sampling();
+       if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
+         // get the sampling data 
+         bamboo_output_dtlb_sampling();
+       }
  #endif // GC_CACHE_ADAPT
      gcprocessing = true;
      gc_collect(stackptr);
@@ -3601,8 +3715,10 @@ pregccheck:
  #ifdef GC_CACHE_ADAPT
         // disable the timer interrupt
         bamboo_mask_timer_intr();
-       // get the sampling data TODO
-       bamboo_output_dtlb_sampling();
+       if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
+         // get the sampling data 
+         bamboo_output_dtlb_sampling();
+       }
  #endif // GC_CACHE_ADAPT
      // not a gc core, should wait for gcfinish msg
      gcprocessing = true;
@@ -3618,11 +3734,19 @@ pregccheck:
  #ifdef GC_CACHE_ADAPT
    // reset the sampling arrays
    bamboo_dtlb_sampling_reset();
+  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
+       // zero out the gccachesamplingtbl
+       BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
+       BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
+               size_cachesamplingtbl_local_r);
+       if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+         BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
+       }
+  }
    // enable the timer interrupt
-  bamboo_tile_timer_set_next_event(500000000); // TODO
+  bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING); 
    bamboo_unmask_timer_intr();
  #endif // GC_CACHE_ADAPT
-  //if(STARTUPCORE == BAMBOO_NUM_OF_CORE) BAMBOO_DEBUGPRINT(0xeeee); // TODO 
    return true;
  } // void gc(struct garbagelist * stackptr)
  
diff --git a/Robust/src/Runtime/bamboo/multicoregarbage.h b/Robust/src/Runtime/bamboo/multicoregarbage.h

index a824bf751153d7e5c96475e9af5da2d94ef237cc..1189f38650a653a0345962246c5a801e7785e364 100644 (file)
--- a/Robust/src/Runtime/bamboo/multicoregarbage.h
+++ b/Robust/src/Runtime/bamboo/multicoregarbage.h
@@ -5,6 +5,9 @@
  #include "structdefs.h"
  #include "MGCHash.h"
  #include "GCSharedHash.h"
+#ifdef GC_CACHE_ADAPT
+#include "multicorecache.h"
+#endif // GC_CACHE_ADAPT
  
  #ifndef bool
  #define bool int
@@ -35,7 +38,7 @@ int gc_num_flush_dtlb;
  #define GC_PROFILE_NUM_FIELD 16
  #else
  #define GC_PROFILE_NUM_FIELD 15
-#endif
+#endif // GC_CACHE_ADAPT
  
  typedef struct gc_info {
    unsigned long long time[GC_PROFILE_NUM_FIELD];
@@ -171,8 +174,8 @@ volatile bool gcismapped;
  // table recording the starting address of each small block
  // (size is BAMBOO_SMEM_SIZE)
  // Note: 1. this table always resides on the very bottom of the shared memory
-//       2. the first two blocks are reserved for this table, would never be
-//          moved or garbage collected.
+//       2. it is not counted in the shared heap, would never be garbage 
+//          collected
  INTPTR * gcsbstarttbl;
  int gcreservedsb;  // number of reserved sblock for sbstarttbl
  int gcnumblock; // number of total blocks in the shared mem
@@ -180,6 +183,15 @@ int gcbaseva; // base va for shared memory without reserved sblocks
  #ifdef GC_CACHE_ADAPT
  int gctopva; // top va for shared memory without reserved sblocks
  volatile bool gccachestage;
+// table recording the sampling data collected for cache adaption 
+int * gccachesamplingtbl;
+int * gccachesamplingtbl_local;
+unsigned int size_cachesamplingtbl_local;
+int * gccachesamplingtbl_r;
+int * gccachesamplingtbl_local_r;
+unsigned int size_cachesamplingtbl_local_r;
+int * gccachepolicytbl;
+unsigned int size_cachepolicytbl;
  #endif // GC_CACHE_ADAPT
  
  #define ISSHAREDOBJ(p) \
diff --git a/Robust/src/Runtime/bamboo/multicoreruntime.h b/Robust/src/Runtime/bamboo/multicoreruntime.h

index 958f9f657fc29377e919b1026d664df38b7c8098..0b34c18feaf7e3d42d1e267b0151cafd7cc49900 100644 (file)
--- a/Robust/src/Runtime/bamboo/multicoreruntime.h
+++ b/Robust/src/Runtime/bamboo/multicoreruntime.h
@@ -369,6 +369,28 @@ int bamboo_reserved_smem; // reserved blocks on the top of the shared heap
                            // otherwise gc is invoked
  volatile INTPTR bamboo_smem_zero_top;
  #define BAMBOO_SMEM_ZERO_UNIT_SIZE (4 * 1024) // 4KB
+
+#ifdef GC_CACHE_ADAPT
+typedef union
+{
+  unsigned int word;
+  struct
+  {
+    // policy type
+    unsigned int cache_mode   : 2;
+       // Reserved.
+    unsigned int __reserved_0 : 6;
+       // Location Override Target Y
+    unsigned int lotar_y      : 4;
+    // Reserved.
+    unsigned int __reserved_1 : 4;
+    // Location Override Target X
+    unsigned int lotar_x      : 4;
+    // Reserved.
+    unsigned int __reserved_2 : 12;
+  };
+} bamboo_cache_policy_t;
+#endif // GC_CACHE_ADAPT
  #else
  //volatile mspace bamboo_free_msp;
  INTPTR bamboo_free_smemp;
diff --git a/Robust/src/Runtime/bamboo/multicoretask.c b/Robust/src/Runtime/bamboo/multicoretask.c

index cfcb41d813f4d860ccc8ecd306e8a642cc3e4391..647394bb0dda0618fadcfb42151a4e42c351e3b0 100644 (file)
--- a/Robust/src/Runtime/bamboo/multicoretask.c
+++ b/Robust/src/Runtime/bamboo/multicoretask.c
@@ -380,7 +380,6 @@ void initruntimedata() {
    // enable the timer interrupt
    bamboo_tile_timer_set_next_event(500000000); // TODO
    bamboo_unmask_timer_intr();
-  //BAMBOO_DEBUGPRINT(BAMBOO_GET_EXE_TIME());
    bamboo_dtlb_sampling_process();
  #endif // GC_CACHE_ADAPT
  #else
@@ -823,10 +822,6 @@ inline void run(void * arg) {
      while(true) {
  
  #ifdef MULTICORE_GC
-//#ifdef GC_CACHE_ADAPT
-         // do dtlb sampling if necessary
-//       bamboo_dtlb_sampling_process();
-//#endif // GC_CACHE_ADAPT
        // check if need to do GC
        if(gcflag) {
                 gc(NULL);
@@ -3715,10 +3710,6 @@ void executetasks() {
  newtask:
    while(hashsize(activetasks)>0) {
  #ifdef MULTICORE_GC
-//#ifdef GC_CACHE_ADAPT
-         // do dtlb sampling if necessary
-//       bamboo_dtlb_sampling_process();
-//#endif // GC_CACHE_ADAPT
      if(gcflag) gc(NULL);
  #endif
  #ifdef DEBUG
diff --git a/Robust/src/buildscript b/Robust/src/buildscript

index dd5f0823ef06141faec6f0af7490b2ba8a94e6a0..334cd15d03cf2e59833116d839985d70319b323d 100755 (executable)
--- a/Robust/src/buildscript
+++ b/Robust/src/buildscript
@@ -1015,6 +1015,7 @@ cp $BAMBOORUNTIME/multicoregc.h ./
  cp $BAMBOORUNTIME/multicoregarbage.h ./
  cp $BAMBOORUNTIME/multicorehelper.h ./
  cp $BAMBOORUNTIME/MGCHash.h ./
+cp $BAMBOORUNTIME/multicorecache.h ./
  cp ../Tilera/Runtime/*.c ./
  cp ../Tilera/Runtime/*.h ./
  cp ../Tilera/Runtime/$TILERA_INDIR/*.c ./
author	jzhou <jzhou>
	Wed, 15 Sep 2010 22:38:32 +0000 (22:38 +0000)
committer	jzhou <jzhou>
	Wed, 15 Sep 2010 22:38:32 +0000 (22:38 +0000)
Robust/src/Runtime/bamboo/multicorecache.h	[new file with mode: 0644]	patch \| blob
Robust/src/Runtime/bamboo/multicoregarbage.c		patch \| blob \| history
Robust/src/Runtime/bamboo/multicoregarbage.h		patch \| blob \| history
Robust/src/Runtime/bamboo/multicoreruntime.h		patch \| blob \| history
Robust/src/Runtime/bamboo/multicoretask.c		patch \| blob \| history
Robust/src/buildscript		patch \| blob \| history