2 #include "multicorecache.h"
3 #include "multicoremsg.h"
4 #include "multicoregcprofile.h"
6 void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
7 unsigned int srcpage=(srcptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
8 unsigned int dstpage=(tostart-gcbase)>>BAMBOO_PAGE_SIZE_BITS;
9 unsigned int numbytes=tofinish-tostart;
11 unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
12 unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
14 unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
16 for(int core = 0; core < NUMCORESACTIVE; core++) {
17 (*newtable)+=page64th*(*oldtable);
23 void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
24 unsigned int numbytes=toptr-tostart;
26 void *tobound=(tostart&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE;
27 void *origbound=(origstart&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE;
29 unsigned int topage=(tostart-gcbase)>>BAMBOO_PAGE_SIZE_BITS;
30 unsigned int origpage=(origptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
32 unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
33 unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
35 unsigned int remaintobytes=tobound-toptr;
36 unsigned int remainorigbytes=origbound-origptr;
39 //round source bytes down....don't want to close out page if not necessary
40 remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
42 if (remaintobytes<=remainorigbytes) {
43 //Need to close out to page
45 numbytes+=remaintobytes;
46 unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
48 for(int core = 0; core < NUMCORESACTIVE; core++) {
49 (*totable)=(*totable+page64th*(*origtable))>>6;
54 origptr+=remaintobytes;
55 bytesneeded-=remaintobytes;
56 topage++;//to page is definitely done
57 tobound+=BAMBOO_PAGE_SIZE;
58 origpage=(origptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
59 origbound=(origptr&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE;
61 //Finishing off orig page
63 numbytes+=remainorigbytes;
64 unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
66 for(int core = 0; core < NUMCORESACTIVE; core++) {
67 (*totable)+=page64th*(*origtable);
71 toptr+=remainorigbytes;
72 origptr+=remainorigbytes;
73 bytesneeded-=remainorigbytes;
74 origpage++;//just orig page is done
75 origbound+=BAMBOO_PAGE_SIZE;
77 totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
78 origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
80 remaintobytes=tobound-toptr;
81 remainorigbytes=origbound-origptr;
84 } while(bytesneeded!=0);
87 // prepare for cache adaption:
88 // -- flush the shared heap
89 // -- clean dtlb entries
90 // -- change cache strategy
91 void cacheAdapt_gc(bool isgccachestage) {
92 // flush the shared heap
93 BAMBOO_CACHE_FLUSH_L2();
95 // clean the dtlb entries
99 bamboo_install_dtlb_handler_for_gc();
101 bamboo_install_dtlb_handler_for_mutator();
105 // the master core decides how to adapt cache strategy for the mutator
106 // according to collected statistic data
108 // find the core that accesses the page #page_index most
109 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
111 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE]; \
112 for(int i = 0; i < NUMCORESACTIVE; i++) { \
113 int freq = *local_tbl; \
115 if(hotfreq < freq) { \
121 // find the core that accesses the page #page_index most and comput the total
122 // access time of the page at the same time
123 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
125 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE]; \
126 for(int i = 0; i < NUMCORESACTIVE; i++) { \
127 int freq = *local_tbl; \
130 if(hotfreq < freq) { \
136 // Set the policy as hosted by coren
137 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
138 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
140 (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
141 (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
142 (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
144 // store the new policy information at tmp_p in gccachepolicytbl
145 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
147 ((int*)(tmp_p))[page_index] = (policy).word; \
150 // make all pages hfh
151 void cacheAdapt_policy_h4h(int coren){
152 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
153 unsigned int page_gap=page_num/NUMCORESACTIVE;
154 unsigned int page_index=page_gap*coren;
155 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
156 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
157 unsigned int * tmp_p = gccachepolicytbl;
158 for(; page_index < page_index_end; page_index++) {
159 bamboo_cache_policy_t policy = {0};
160 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
161 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
162 page_sva += BAMBOO_PAGE_SIZE;
166 // make all pages local as non-cache-adaptable gc local mode
167 void cacheAdapt_policy_local(int coren){
168 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
169 unsigned int page_gap=page_num/NUMCORESACTIVE;
170 unsigned int page_index=page_gap*coren;
171 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
172 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
173 unsigned int * tmp_p = gccachepolicytbl;
174 for(; page_index < page_index_end; page_index++) {
175 bamboo_cache_policy_t policy = {0};
176 unsigned int block = 0;
177 BLOCKINDEX(block, (void *) page_sva);
178 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
179 CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
180 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
181 page_sva += BAMBOO_PAGE_SIZE;
185 void cacheAdapt_policy_hottest(int coren){
186 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
187 unsigned int page_gap=page_num/NUMCORESACTIVE;
188 unsigned int page_index=page_gap*coren;
189 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
190 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
191 unsigned int * tmp_p = gccachepolicytbl;
192 for(; page_index < page_index_end; page_index++) {
193 bamboo_cache_policy_t policy = {0};
194 unsigned int hottestcore = 0;
195 unsigned int hotfreq = 0;
196 CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
198 // Decide the cache strategy for this page
199 // If decide to adapt a new cache strategy, write into the shared block of
200 // the gcsharedsamplingtbl. The mem recording information that has been
201 // written is enough to hold the information.
202 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
204 // locally cache the page in the hottest core
205 CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
207 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
208 page_sva += BAMBOO_PAGE_SIZE;
212 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 1
213 // cache the page on the core that accesses it the most if that core accesses
214 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
216 void cacheAdapt_policy_dominate(int coren){
217 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
218 unsigned int page_gap=page_num/NUMCORESACTIVE;
219 unsigned int page_index=page_gap*coren;
220 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
221 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
222 unsigned int * tmp_p = gccachepolicytbl;
223 for(; page_index < page_index_end; page_index++) {
224 bamboo_cache_policy_t policy = {0};
225 unsigned int hottestcore = 0;
226 unsigned int totalfreq = 0;
227 unsigned int hotfreq = 0;
228 CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
229 // Decide the cache strategy for this page
230 // If decide to adapt a new cache strategy, write into the shared block of
232 // Format: page start va + cache policy
234 totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
235 if((unsigned int)hotfreq < (unsigned int)totalfreq) {
237 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
238 /*unsigned int block = 0;
239 BLOCKINDEX(block, (void *) page_sva);
240 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
241 CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
243 // locally cache the page in the hottest core
244 CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
247 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
248 page_sva += BAMBOO_PAGE_SIZE;
252 unsigned int cacheAdapt_decision(int coren) {
254 // check the statistic data
255 // for each page, decide the new cache strategy
256 #ifdef GC_CACHE_ADAPT_POLICY1
257 cacheAdapt_policy_h4h(coren);
258 #elif defined GC_CACHE_ADAPT_POLICY2
259 cacheAdapt_policy_local(coren);
260 #elif defined GC_CACHE_ADAPT_POLICY3
261 cacheAdapt_policy_hottest(coren);
262 #elif defined GC_CACHE_ADAPT_POLICY4
263 cacheAdapt_policy_dominate(coren);
267 // adapt the cache strategy for the mutator
268 void cacheAdapt_mutator() {
270 // check the changes and adapt them
271 unsigned int * tmp_p = gccachepolicytbl;
272 unsigned int page_sva = gcbaseva;
273 for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
274 // read out the policy
275 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
277 if(policy.word != 0) {
278 bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
284 // Cache adapt phase process for clients
285 void cacheAdapt_phase_client() {
286 WAITFORGCPHASE(CACHEPOLICYPHASE);
287 GC_PRINTF("Start cachepolicy phase\n");
288 cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
289 //send init finish msg to core coordinator
290 send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
291 GC_PRINTF("Finish cachepolicy phase\n");
293 WAITFORGCPHASE(PREFINISHPHASE);
294 GC_PRINTF("Start prefinish phase\n");
296 cacheAdapt_mutator();
297 cacheAdapt_gc(false);
298 //send init finish msg to core coordinator
299 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
300 GC_PRINTF("Finish prefinish phase\n");
301 CACHEADAPT_SAMPLING_RESET();
302 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
303 // zero out the gccachesamplingtbl
304 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
305 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
309 extern unsigned long long gc_output_cache_policy_time;
311 // Cache adpat phase process for the master
312 void cacheAdapt_phase_master() {
314 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
315 CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
316 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
317 // let all cores to parallelly process the revised profile data and decide
318 // the cache policy for each page
319 gc_status_info.gcphase = CACHEPOLICYPHASE;
320 GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
321 GC_PRINTF("Start cachepolicy phase \n");
323 cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
324 GC_CHECK_ALL_CORE_STATUS();
327 // let all cores to adopt new policies
328 gc_status_info.gcphase = PREFINISHPHASE;
329 // Note: all cores should flush their runtime data including non-gc cores
330 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
331 GC_PRINTF("Start prefinish phase \n");
333 cacheAdapt_mutator();
334 cacheAdapt_gc(false);
335 GC_CHECK_ALL_CORE_STATUS();
337 CACHEADAPT_SAMPLING_RESET();
338 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
339 // zero out the gccachesamplingtbl
340 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
341 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
342 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
346 // output original cache sampling data for each page
347 void gc_output_cache_sampling() {
348 extern volatile bool gc_profile_flag;
349 if(!gc_profile_flag) return;
350 unsigned int page_index = 0;
352 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
353 for(page_index = 0; page_index < page_num; page_index++) {
354 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
355 unsigned int block = 0;
356 BLOCKINDEX(block, (void *) page_sva);
357 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
358 //printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
359 unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
361 for(int i = 0; i < NUMCORESACTIVE; i++) {
362 int freq = *local_tbl;
366 //printf("%d, ", freq);
370 printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
371 unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
372 for(int i = 0; i < NUMCORESACTIVE; i++) {
373 int freq = *local_tbl;
375 printf("%d, ", freq);
381 printf("=================\n");
384 // output revised cache sampling data for each page after compaction
385 void gc_output_cache_sampling_r() {
386 extern volatile bool gc_profile_flag;
387 if(!gc_profile_flag) return;
389 unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
390 for(int i = 0; i < NUMCORESACTIVE; i++) {
391 for(int j = 0; j < NUMCORESACTIVE; j++) {
395 tprintf("cache sampling_r \n");
396 unsigned int page_index = 0;
398 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
399 for(page_index = 0; page_index < page_num; page_index++) {
400 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
401 unsigned int block = 0;
402 BLOCKINDEX(block, (void *)page_sva);
403 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
404 //printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
405 int accesscore = 0; // TODO
406 unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
407 for(int i = 0; i < NUMCORESACTIVE; i++) {
408 int freq = *local_tbl;
409 //printf("%d, ", freq);
416 printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
417 unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
418 for(int i = 0; i < NUMCORESACTIVE; i++) {
419 int freq = *local_tbl;
420 printf("%d, ", freq);
421 sumdata[accesscore-1][i]+=freq;
429 // TODO printout the summary data
430 for(int i = 0; i < NUMCORESACTIVE; i++) {
432 for(int j = 0; j < NUMCORESACTIVE; j++) {
433 printf(" %d ", sumdata[j][i]);
437 printf("=================\n");
439 #endif // GC_CACHE_ADAPT