Robust/src/Runtime/bamboo/multicorecache.c

   1 #ifdef GC_CACHE_ADAPT
   2 #include "multicorecache.h"
   3 #include "multicoremsg.h"
   4 #include "multicoregcprofile.h"
   5
   6 void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
   7   unsigned int srcpage=(srcptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
   8   unsigned int dstpage=(tostart-gcbase)>>BAMBOO_PAGE_SIZE_BITS;
   9   unsigned int numbytes=tofinish-tostart;
  10
  11   unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
  12   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
  13
  14   unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  15
  16   for(int core = 0; core < NUMCORESACTIVE; core++) {
  17     (*newtable)+=page64th*(*oldtable);
  18     newtable++;
  19     oldtable++;
  20   }
  21 }
  22
  23 void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
  24   unsigned int numbytes=toptr-tostart;
  25
  26   void *tobound=(tostart&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE;
  27   void *origbound=(origstart&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE;
  28
  29   unsigned int topage=(tostart-gcbase)>>BAMBOO_PAGE_SIZE_BITS;
  30   unsigned int origpage=(origptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
  31
  32   unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
  33   unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
  34
  35   unsigned int remaintobytes=tobound-toptr;
  36   unsigned int remainorigbytes=origbound-origptr;
  37
  38   do {
  39     //round source bytes down....don't want to close out page if not necessary
  40     remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
  41
  42     if (remaintobytes<=remainorigbytes) {
  43       //Need to close out to page
  44
  45       numbytes+=remaintobytes;
  46       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  47
  48       for(int core = 0; core < NUMCORESACTIVE; core++) {
  49         (*totable)=(*totable+page64th*(*origtable))>>6;
  50         totable++;
  51         origtable++;
  52       }
  53       toptr+=remaintobytes;
  54       origptr+=remaintobytes;
  55       bytesneeded-=remaintobytes;
  56       topage++;//to page is definitely done
  57       tobound+=BAMBOO_PAGE_SIZE;
  58       origpage=(origptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
  59       origbound=(origptr&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE;
  60     } else {
  61       //Finishing off orig page
  62
  63       numbytes+=remainorigbytes;
  64       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  65
  66       for(int core = 0; core < NUMCORESACTIVE; core++) {
  67         (*totable)+=page64th*(*origtable);
  68         totable++;
  69         origtable++;
  70       }
  71       toptr+=remainorigbytes;
  72       origptr+=remainorigbytes;
  73       bytesneeded-=remainorigbytes;
  74       origpage++;//just orig page is done
  75       origbound+=BAMBOO_PAGE_SIZE;
  76     }
  77     totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
  78     origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
  79
  80     remaintobytes=tobound-toptr;
  81     remainorigbytes=origbound-origptr;
  82
  83     numbytes=0;
  84   } while(bytesneeded!=0);
  85 }
  86
  87 // prepare for cache adaption:
  88 //   -- flush the shared heap
  89 //   -- clean dtlb entries
  90 //   -- change cache strategy
  91 void cacheAdapt_gc(bool isgccachestage) {
  92   // flush the shared heap
  93   BAMBOO_CACHE_FLUSH_L2();
  94
  95   // clean the dtlb entries
  96   BAMBOO_CLEAN_DTLB();
  97
  98   if(isgccachestage) {
  99     bamboo_install_dtlb_handler_for_gc();
 100   } else {
 101     bamboo_install_dtlb_handler_for_mutator();
 102   }
 103 }
 104
 105 // the master core decides how to adapt cache strategy for the mutator
 106 // according to collected statistic data
 107
 108 // find the core that accesses the page #page_index most
 109 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
 110   { \
 111     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
 112     for(int i = 0; i < NUMCORESACTIVE; i++) { \
 113       int freq = *local_tbl; \
 114       local_tbl++; \
 115       if(hotfreq < freq) { \
 116         hotfreq = freq; \
 117         hottestcore = i; \
 118       } \
 119     } \
 120   }
 121 // find the core that accesses the page #page_index most and comput the total
 122 // access time of the page at the same time
 123 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
 124   { \
 125     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
 126     for(int i = 0; i < NUMCORESACTIVE; i++) { \
 127       int freq = *local_tbl; \
 128       local_tbl++; \
 129       totalfreq += freq; \
 130       if(hotfreq < freq) { \
 131         hotfreq = freq; \
 132         hottestcore = i; \
 133       } \
 134     } \
 135   }
 136 // Set the policy as hosted by coren
 137 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
 138 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
 139   { \
 140     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
 141     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
 142     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
 143   }
 144 // store the new policy information at tmp_p in gccachepolicytbl
 145 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
 146   { \
 147     ((int*)(tmp_p))[page_index] = (policy).word; \
 148   }
 149
 150 // make all pages hfh
 151 void cacheAdapt_policy_h4h(int coren){
 152   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 153   unsigned int page_gap=page_num/NUMCORESACTIVE;
 154   unsigned int page_index=page_gap*coren;
 155   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 156   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 157   unsigned int * tmp_p = gccachepolicytbl;
 158   for(; page_index < page_index_end; page_index++) {
 159     bamboo_cache_policy_t policy = {0};
 160     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 161     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 162     page_sva += BAMBOO_PAGE_SIZE;
 163   }
 164 }
 165
 166 // make all pages local as non-cache-adaptable gc local mode
 167 void cacheAdapt_policy_local(int coren){
 168   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 169   unsigned int page_gap=page_num/NUMCORESACTIVE;
 170   unsigned int page_index=page_gap*coren;
 171   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 172   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 173   unsigned int * tmp_p = gccachepolicytbl;
 174   for(; page_index < page_index_end; page_index++) {
 175     bamboo_cache_policy_t policy = {0};
 176     unsigned int block = 0;
 177     BLOCKINDEX(block, (void *) page_sva);
 178     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 179     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
 180     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 181     page_sva += BAMBOO_PAGE_SIZE;
 182   }
 183 }
 184
 185 void cacheAdapt_policy_hottest(int coren){
 186   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 187   unsigned int page_gap=page_num/NUMCORESACTIVE;
 188   unsigned int page_index=page_gap*coren;
 189   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 190   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 191   unsigned int * tmp_p = gccachepolicytbl;
 192   for(; page_index < page_index_end; page_index++) {
 193     bamboo_cache_policy_t policy = {0};
 194     unsigned int hottestcore = 0;
 195     unsigned int hotfreq = 0;
 196     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
 197     // TODO
 198     // Decide the cache strategy for this page
 199     // If decide to adapt a new cache strategy, write into the shared block of
 200     // the gcsharedsamplingtbl. The mem recording information that has been
 201     // written is enough to hold the information.
 202     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 203     if(hotfreq != 0) {
 204       // locally cache the page in the hottest core
 205       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 206     }
 207     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 208     page_sva += BAMBOO_PAGE_SIZE;
 209   }
 210 }
 211
 212 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  1
 213 // cache the page on the core that accesses it the most if that core accesses
 214 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
 215 // h4h the page.
 216 void cacheAdapt_policy_dominate(int coren){
 217   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 218   unsigned int page_gap=page_num/NUMCORESACTIVE;
 219   unsigned int page_index=page_gap*coren;
 220   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 221   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 222   unsigned int * tmp_p = gccachepolicytbl;
 223   for(; page_index < page_index_end; page_index++) {
 224     bamboo_cache_policy_t policy = {0};
 225     unsigned int hottestcore = 0;
 226     unsigned int totalfreq = 0;
 227     unsigned int hotfreq = 0;
 228     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 229     // Decide the cache strategy for this page
 230     // If decide to adapt a new cache strategy, write into the shared block of
 231     // the gcpolicytbl
 232     // Format: page start va + cache policy
 233     if(hotfreq != 0) {
 234       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
 235       if((unsigned int)hotfreq < (unsigned int)totalfreq) {
 236         // use hfh
 237         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 238         /*unsigned int block = 0;
 239         BLOCKINDEX(block, (void *) page_sva);
 240         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 241         CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
 242       } else {
 243         // locally cache the page in the hottest core
 244         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 245       }
 246     }
 247     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 248     page_sva += BAMBOO_PAGE_SIZE;
 249   }
 250 }
 251
 252 unsigned int cacheAdapt_decision(int coren) {
 253   BAMBOO_CACHE_MF();
 254   // check the statistic data
 255   // for each page, decide the new cache strategy
 256 #ifdef GC_CACHE_ADAPT_POLICY1
 257   cacheAdapt_policy_h4h(coren);
 258 #elif defined GC_CACHE_ADAPT_POLICY2
 259   cacheAdapt_policy_local(coren);
 260 #elif defined GC_CACHE_ADAPT_POLICY3
 261   cacheAdapt_policy_hottest(coren);
 262 #elif defined GC_CACHE_ADAPT_POLICY4
 263   cacheAdapt_policy_dominate(coren);
 264 #endif
 265 }
 266
 267 // adapt the cache strategy for the mutator
 268 void cacheAdapt_mutator() {
 269   BAMBOO_CACHE_MF();
 270   // check the changes and adapt them
 271   unsigned int * tmp_p = gccachepolicytbl;
 272   unsigned int page_sva = gcbaseva;
 273   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
 274     // read out the policy
 275     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
 276     // adapt the policy
 277     if(policy.word != 0) {
 278       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
 279     }
 280     tmp_p += 1;
 281   }
 282 }
 283
 284 // Cache adapt phase process for clients
 285 void cacheAdapt_phase_client() {
 286   WAITFORGCPHASE(CACHEPOLICYPHASE);
 287   GC_PRINTF("Start cachepolicy phase\n");
 288   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 289   //send init finish msg to core coordinator
 290   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
 291   GC_PRINTF("Finish cachepolicy phase\n");
 292
 293   WAITFORGCPHASE(PREFINISHPHASE);
 294   GC_PRINTF("Start prefinish phase\n");
 295   // cache adapt phase
 296   cacheAdapt_mutator();
 297   cacheAdapt_gc(false);
 298   //send init finish msg to core coordinator
 299   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
 300   GC_PRINTF("Finish prefinish phase\n");
 301   CACHEADAPT_SAMPLING_RESET();
 302   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 303     // zero out the gccachesamplingtbl
 304     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 305     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 306   }
 307 }
 308
 309 extern unsigned long long gc_output_cache_policy_time;
 310
 311 // Cache adpat phase process for the master
 312 void cacheAdapt_phase_master() {
 313   GCPROFILE_ITEM();
 314   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 315   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
 316   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 317   // let all cores to parallelly process the revised profile data and decide
 318   // the cache policy for each page
 319   gc_status_info.gcphase = CACHEPOLICYPHASE;
 320   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
 321   GC_PRINTF("Start cachepolicy phase \n");
 322   // cache adapt phase
 323   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 324   GC_CHECK_ALL_CORE_STATUS();
 325   BAMBOO_CACHE_MF();
 326
 327   // let all cores to adopt new policies
 328   gc_status_info.gcphase = PREFINISHPHASE;
 329   // Note: all cores should flush their runtime data including non-gc cores
 330   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
 331   GC_PRINTF("Start prefinish phase \n");
 332   // cache adapt phase
 333   cacheAdapt_mutator();
 334   cacheAdapt_gc(false);
 335   GC_CHECK_ALL_CORE_STATUS();
 336
 337   CACHEADAPT_SAMPLING_RESET();
 338   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 339     // zero out the gccachesamplingtbl
 340     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 341     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 342     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
 343   }
 344 }
 345
 346 // output original cache sampling data for each page
 347 void gc_output_cache_sampling() {
 348   extern volatile bool gc_profile_flag;
 349   if(!gc_profile_flag) return;
 350   unsigned int page_index = 0;
 351   VA page_sva = 0;
 352   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
 353   for(page_index = 0; page_index < page_num; page_index++) {
 354     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 355     unsigned int block = 0;
 356     BLOCKINDEX(block, (void *) page_sva);
 357     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 358     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 359     unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
 360     int accesscore = 0;
 361     for(int i = 0; i < NUMCORESACTIVE; i++) {
 362       int freq = *local_tbl;
 363       local_tbl++;
 364       if(freq != 0) {
 365         accesscore++;
 366         //printf("%d,  ", freq);
 367       }
 368     }
 369     if(accesscore!=0) {
 370       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 371       unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
 372       for(int i = 0; i < NUMCORESACTIVE; i++) {
 373         int freq = *local_tbl;
 374         local_tbl++;
 375         printf("%d,  ", freq);
 376       }
 377       printf("\n");
 378     }
 379     //printf("\n");
 380   }
 381   printf("=================\n");
 382 }
 383
 384 // output revised cache sampling data for each page after compaction
 385 void gc_output_cache_sampling_r() {
 386   extern volatile bool gc_profile_flag;
 387   if(!gc_profile_flag) return;
 388   // TODO summary data
 389   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
 390   for(int i = 0; i < NUMCORESACTIVE; i++) {
 391     for(int j = 0; j < NUMCORESACTIVE; j++) {
 392       sumdata[i][j] = 0;
 393     }
 394   }
 395   tprintf("cache sampling_r \n");
 396   unsigned int page_index = 0;
 397   VA page_sva = 0;
 398   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
 399   for(page_index = 0; page_index < page_num; page_index++) {
 400     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 401     unsigned int block = 0;
 402     BLOCKINDEX(block, (void *)page_sva);
 403     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 404     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 405     int accesscore = 0; // TODO
 406     unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
 407     for(int i = 0; i < NUMCORESACTIVE; i++) {
 408       int freq = *local_tbl;
 409       //printf("%d,  ", freq);
 410       if(freq != 0) {
 411         accesscore++;// TODO
 412       }
 413       local_tbl++;
 414     }
 415     if(accesscore!=0) {
 416       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 417       unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
 418       for(int i = 0; i < NUMCORESACTIVE; i++) {
 419         int freq = *local_tbl;
 420         printf("%d,  ", freq);
 421         sumdata[accesscore-1][i]+=freq;
 422         local_tbl++;
 423       }
 424       printf("\n");
 425     }
 426     //printf("\n");
 427   }
 428   printf("+++++\n");
 429   // TODO printout the summary data
 430   for(int i = 0; i < NUMCORESACTIVE; i++) {
 431     printf("%d  ", i);
 432     for(int j = 0; j < NUMCORESACTIVE; j++) {
 433       printf(" %d  ", sumdata[j][i]);
 434     }
 435     printf("\n");
 436   }
 437   printf("=================\n");
 438 }
 439 #endif // GC_CACHE_ADAPT