1 #ifndef MULTICORE_RUNTIME
2 #define MULTICORE_RUNTIME
5 #define INLINE inline __attribute__((always_inline))
14 ////////////////////////////////////////////////////////////////
15 // global variables //
16 ///////////////////////////////////////////////////////////////
18 // record the starting time
19 unsigned long long bamboo_start_time;
21 // data structures for msgs
22 #define BAMBOO_OUT_BUF_LENGTH 2048
23 #define BAMBOO_OUT_BUF_MASK (0x7FF)
24 #define BAMBOO_MSG_BUF_LENGTH 2048
25 #define BAMBOO_MSG_BUF_MASK (0x7FF)
26 int msgdata[BAMBOO_MSG_BUF_LENGTH];
27 volatile int msgdataindex;
28 volatile int msgdatalast;
30 volatile bool msgdatafull;
31 int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
35 volatile bool isMsgHanging;
37 #define MSG_INDEXINC_I() \
38 msgdataindex = (msgdataindex + 1) & (BAMBOO_MSG_BUF_MASK)
40 #define MSG_LASTINDEXINC_I() \
41 msgdatalast = (msgdatalast + 1) & (BAMBOO_MSG_BUF_MASK)
43 #define MSG_CACHE_I(n) \
44 msgdata[msgdatalast] = (n); \
47 // NOTE: if msgdataindex == msgdatalast, it always means that the buffer if
48 // full. In the case that the buffer is empty, should never call this
50 #define MSG_REMAINSIZE_I(s) \
51 if(msgdataindex < msgdatalast) { \
52 (*(int*)s) = msgdatalast - msgdataindex; \
53 } else if((msgdataindex == msgdatalast) && (!msgdatafull)) { \
56 (*(int*)s) = (BAMBOO_MSG_BUF_LENGTH) - msgdataindex + msgdatalast; \
59 #define OUTMSG_INDEXINC() \
60 outmsgindex = (outmsgindex + 1) & (BAMBOO_OUT_BUF_MASK)
62 #define OUTMSG_LASTINDEXINC() \
63 outmsglast = (outmsglast + 1) & (BAMBOO_OUT_BUF_MASK); \
64 if(outmsglast == outmsgindex) { \
65 BAMBOO_EXIT(0xd101); \
68 #define OUTMSG_CACHE(n) \
69 outmsgdata[outmsglast] = (n); \
70 OUTMSG_LASTINDEXINC();
72 #define MAX_PACKET_WORDS 5
76 * type: 1 -- transfer object
77 * 2 -- transfer stall msg
82 * // add for profile info
83 * 7 -- transfer profile output msg
84 * 8 -- transfer profile output finish msg
85 * // add for alias lock strategy
86 * 9 -- redirect lock request
87 * a -- lock grant with redirect info
88 * b -- lock deny with redirect info
89 * c -- lock release with redirect info
90 * d -- status confirm request
91 * e -- status report msg
93 * 10 -- requiring for new memory
94 * 11 -- response for new memory request
95 * 12 -- GC init phase start
97 * 14 -- compact phase start
98 * 15 -- flush phase start
99 * 16 -- init phase finish
100 * 17 -- mark phase finish
101 * 18 -- compact phase finish
102 * 19 -- flush phase finish
104 * 1b -- marked phase finish confirm request
105 * 1c -- marked phase finish confirm response
106 * 1d -- markedObj msg
107 * 1e -- start moving objs msg
108 * 1f -- ask for mapping info of a markedObj
109 * 20 -- mapping info of a markedObj
110 * 21 -- large objs info request
111 * 22 -- large objs info response
112 * 23 -- large objs mapping info
114 * ObjMsg: 1 + size of msg + obj's address + (task index + param index)+
115 * StallMsg: 2 + corenum + sendobjs + receiveobjs
116 * (size is always 4 * sizeof(int))
117 * LockMsg: 3 + lock type + obj pointer + lock + request core
118 * (size is always 5 * sizeof(int))
119 * 4/5/6 + lock type + obj pointer + lock
120 * (size is always 4 * sizeof(int))
121 * 9 + lock type + obj pointer + redirect lock + root request core
123 * (size is always 6 * sizeof(int))
124 * a/b + lock type + obj pointer + redirect lock
125 * (size is always 4 * sizeof(int))
126 * c + lock type + lock + redirect lock
127 * (size is always 4 * sizeof(int))
128 * lock type: 0 -- read; 1 -- write
129 * ProfileMsg: 7 + totalexetime
130 * (size is always 2 * sizeof(int))
132 * (size is always 2 * sizeof(int))
133 * StatusMsg: d (size is always 1 * sizeof(int))
134 * e + status + corenum + sendobjs + receiveobjs
135 * (size is always 5 * sizeof(int))
136 * status: 0 -- stall; 1 -- busy
137 * TerminateMsg: f (size is always 1 * sizeof(int)
138 * MemoryMsg: 10 + size + corenum
139 * (size is always 3 * sizeof(int))
140 * 11 + base_va + size
141 * (size is always 3 * sizeof(int))
142 * GCMsg: 12/13 (size is always 1 * sizeof(int))
143 * 14 + size of msg + (num of objs to move + (start address
144 * + end address + dst core + start dst)+)?
145 * + (num of incoming objs + (start dst + orig core)+)?
146 * + (num of large obj lists + (start address + lenght
148 * 15 (size is always 1 * sizeof(int))
150 * (size is always 2 * sizeof(int))
151 * 17 + corenum + gcsendobjs + gcreceiveobjs
152 * (size if always 4 * sizeof(int))
153 * 18 + corenum + fulfilled blocks num + (finish compact(1) + current
154 * heap top)/(need mem(0) + mem need)
155 * size is always 5 * sizeof(int))
157 * (size is always 2 * sizeof(int))
158 * 1a (size is always 1 * sizeof(int))
159 * 1b (size if always 1 * sizeof(int))
160 * 1c + size of msg + corenum + gcsendobjs + gcreceiveobjs
161 * (size is always 5 * sizeof(int))
162 * 1d + obj's address + request core
163 * (size is always 3 * sizeof(int))
164 * 1e + corenum + start addr + end addr
165 * (size if always 4 * sizeof(int))
166 * 1f + obj's address + corenum
167 * (size is always 3 * sizeof(int))
168 * 20 + obj's address + dst address
169 * (size if always 3 * sizeof(int))
170 * 21 (size is always 1 * sizeof(int))
171 * 22 + size of msg + corenum + current heap size
172 * + (num of large obj lists + (start address + length)+)?
173 * 23 + orig large obj ptr + new large obj ptr
174 * (size is always 3 * sizeof(int))
177 MSGSTART = 0xD0, // 0xD0
184 PROFILEOUTPUT, // 0xD7
185 PROFILEFINISH, // 0xD8
186 REDIRECTLOCK, // 0xD9
187 REDIRECTGROUNT, // 0xDa
188 REDIRECTDENY, // 0xDb
189 REDIRECTRELEASE, // 0xDc
190 STATUSCONFIRM, // 0xDd
191 STATUSREPORT, // 0xDe
199 GCSTARTCOMPACT, // 0xE5
200 GCSTARTMAPINFO, // 0xE6
201 GCSTARTFLUSH, // 0xE7
203 GCFINISHINIT, // 0xE9
204 GCFINISHMARK, // 0xEa
205 GCFINISHCOMPACT, // 0xEb
206 GCFINISHMAPINFO, // 0xEc
207 GCFINISHFLUSH, // 0xEd
209 GCMARKCONFIRM, // 0xEf
210 GCMARKREPORT, // 0xF0
213 GCMAPREQUEST, // 0xF3
216 GCLOBJREQUEST, // 0xF6
218 GCLOBJMAPPING, // 0xF8
222 #ifdef GC_CACHE_ADAPT
223 GCSTARTPOSTINIT, // 0xFa
225 GCFINISHPOSTINIT, // 0xFc
226 GCFINISHPREF, // 0xFd
227 #endif // GC_CACHE_ADAPT
232 /////////////////////////////////////////////////////////////////////////////////
233 // NOTE: BAMBOO_TOTALCORE -- number of the available cores in the processor.
234 // No greater than the number of all the cores in
236 // NUMCORES -- number of cores chosen to deploy the application. It can
237 // be greater than that required to fully parallelize the
238 // application. The same as NUMCORES.
239 // NUMCORESACTIVE -- number of cores that really execute the
240 // application. No greater than NUMCORES
241 // NUMCORES4GC -- number of cores for gc. No greater than NUMCORES.
242 // NOTE: currently only support ontinuous cores as gc
243 // cores, i.e. 0~NUMCORES4GC-1
244 ////////////////////////////////////////////////////////////////////////////////
245 // data structures of status for termination
246 // only check working cores
247 volatile int corestatus[NUMCORESACTIVE]; // records status of each core
250 volatile int numsendobjs[NUMCORESACTIVE]; // records how many objects a core
252 volatile int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a
254 volatile int numconfirm;
255 volatile bool waitconfirm;
257 int self_numsendobjs;
258 int self_numreceiveobjs;
260 // get rid of lock msgs for GC version
262 // data structures for locking
263 struct RuntimeHash locktable;
264 static struct RuntimeHash* locktbl = &locktable;
265 struct RuntimeHash * lockRedirectTbl;
266 struct RuntimeHash * objRedirectLockTbl;
277 // data structures for waiting objs
278 struct Queue objqueue;
279 struct Queue * totransobjqueue; // queue to hold objs to be transferred
280 // should be cleared whenever enter a task
282 // data structures for shared memory allocation
284 #define BAMBOO_BASE_VA 0xd000000
285 #elif defined TILERA_ZLINUX
287 #define BAMBOO_BASE_VA 0xd000000
288 #endif // MULTICORE_GC
291 #ifdef BAMBOO_MEMPROF
292 #define GC_BAMBOO_NUMCORES 56
294 #define GC_BAMBOO_NUMCORES 62
298 #include "structdefs.h"
299 #define BAMBOO_NUM_BLOCKS (NUMCORES4GC*(2+1)+3)
300 #define BAMBOO_PAGE_SIZE (64 * 64)
301 #define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
302 #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) *(BAMBOO_NUM_BLOCKS))
304 #elif defined GC_CACHE_ADAPT
305 #define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+14))
306 #define BAMBOO_PAGE_SIZE (64 * 1024) // 64K
307 #ifdef GC_LARGEPAGESIZE
308 #define BAMBOO_PAGE_SIZE (4 * 64 * 1024)
309 #define BAMBOO_SMEM_SIZE (4 * (BAMBOO_PAGE_SIZE))
310 #elif defined GC_SMALLPAGESIZE
311 #define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
312 #elif defined GC_SMALLPAGESIZE2
313 #define BAMBOO_PAGE_SIZE (16 * 1024) // (4096)
314 #define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
315 #elif defined GC_LARGEPAGESIZE2
316 #define BAMBOO_PAGE_SIZE (4 * 64 * 1024) // 64K
317 #define BAMBOO_SMEM_SIZE ((BAMBOO_PAGE_SIZE))
319 #define BAMBOO_SMEM_SIZE (4 * (BAMBOO_PAGE_SIZE))
320 #endif // GC_LARGEPAGESIZE
321 #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) * (BAMBOO_NUM_BLOCKS))
324 #ifdef GC_LARGESHAREDHEAP
325 #define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+2))
326 #elif defined GC_LARGESHAREDHEAP2
327 #define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+2))
329 #define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+3)) //(15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G
331 #ifdef GC_LARGEPAGESIZE
332 #define BAMBOO_PAGE_SIZE (4 * 1024 * 1024) // (4096)
333 #define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
334 #elif defined GC_SMALLPAGESIZE
335 #define BAMBOO_PAGE_SIZE (256 * 1024) // (4096)
336 #define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
337 #elif defined GC_SMALLPAGESIZE2
338 #define BAMBOO_PAGE_SIZE (64 * 1024) // (4096)
339 #define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
341 #define BAMBOO_PAGE_SIZE (1024 * 1024) // (4096)
342 #define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
343 #endif // GC_LARGEPAGESIZE
344 #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) * (BAMBOO_NUM_BLOCKS)) //(1024 * 1024 * 240) //((unsigned long long int)(3.0 * 1024 * 1024 * 1024)) // 3G
348 volatile bool gc_localheap_s;
352 #include "multicoregarbage.h"
355 SMEMLOCAL = 0x0,// 0x0, using local mem only
356 SMEMFIXED, // 0x1, use local mem in lower address space(1 block only)
357 // and global mem in higher address space
358 SMEMMIXED, // 0x2, like FIXED mode but use a threshold to control
359 SMEMGLOBAL, // 0x3, using global mem only
363 SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED;
364 //-DSMEMM: MIXED; -DSMEMG: GLOBAL;
371 struct freeMemItem * next;
375 struct freeMemItem * head;
376 struct freeMemItem * backuplist; // hold removed freeMemItem for reuse;
377 // only maintain 1 freemMemItem
380 // table recording the number of allocated bytes on each block
381 // Note: this table resides on the bottom of the shared heap for all cores
383 volatile int * bamboo_smemtbl;
384 volatile int bamboo_free_block;
385 int bamboo_reserved_smem; // reserved blocks on the top of the shared heap
386 // e.g. 20% of the heap and should not be allocated
387 // otherwise gc is invoked
388 volatile INTPTR bamboo_smem_zero_top;
389 #define BAMBOO_SMEM_ZERO_UNIT_SIZE (4 * 1024) // 4KB
391 //volatile mspace bamboo_free_msp;
392 INTPTR bamboo_free_smemp;
393 int bamboo_free_smem_size;
395 volatile bool smemflag;
396 volatile INTPTR bamboo_cur_msp;
397 volatile int bamboo_smem_size;
402 // data structures for profile mode
405 #define TASKINFOLENGTH 3000 // 0
406 #ifdef PROFILE_INTERRUPT
407 #define INTERRUPTINFOLENGTH 50 //0
408 #endif // PROFILE_INTERRUPT
413 typedef struct task_info {
415 unsigned long long startTime;
416 unsigned long long endTime;
417 unsigned long long exitIndex;
418 struct Queue * newObjs;
421 TaskInfo * taskInfoArray[TASKINFOLENGTH];
423 bool taskInfoOverflow;
424 #ifdef PROFILE_INTERRUPT
425 typedef struct interrupt_info {
426 unsigned long long startTime;
427 unsigned long long endTime;
430 InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
431 int interruptInfoIndex;
432 bool interruptInfoOverflow;
433 #endif // PROFILE_INTERUPT
434 volatile int profilestatus[NUMCORESACTIVE]; // records status of each core
437 #endif // #ifdef PROFILE
442 /////////////////////////////////////////////////////////////
444 ////////////////////////////////////////////////////////////
445 // these are functions should be implemented in //
446 // multicore runtime for any multicore processors //
447 ////////////////////////////////////////////////////////////
450 INLINE void initialization(void);
451 INLINE void initCommunication(void);
452 INLINE void fakeExecution(void);
453 INLINE void terminate(void);
454 INLINE void initlock(struct ___Object___ * v);
455 #ifdef BAMBOO_MEMPROF
456 INLINE void terminatememprof(void);
459 // lock related functions
460 bool getreadlock(void* ptr);
461 void releasereadlock(void* ptr);
462 bool getwritelock(void* ptr);
463 void releasewritelock(void* ptr);
464 bool getwritelock_I(void* ptr);
465 void releasewritelock_I(void * ptr);
467 void releasewritelock_r(void * lock, void * redirectlock);
469 /* this function is to process lock requests.
470 * can only be invoked in receiveObject() */
471 // if return -1: the lock request is redirected
472 // 0: the lock request is approved
473 // 1: the lock request is denied
474 INLINE int processlockrequest(int locktype,
480 INLINE void processlockrelease(int locktype,
485 // msg related functions
486 INLINE void send_hanging_msg(bool isInterrupt);
487 INLINE void send_msg_1(int targetcore,
490 INLINE void send_msg_2(int targetcore,
494 INLINE void send_msg_3(int targetcore,
499 INLINE void send_msg_4(int targetcore,
505 INLINE void send_msg_5(int targetcore,
512 INLINE void send_msg_6(int targetcore,
520 INLINE void cache_msg_1(int targetcore,
522 INLINE void cache_msg_2(int targetcore,
525 INLINE void cache_msg_3(int targetcore,
529 INLINE void cache_msg_4(int targetcore,
534 INLINE void cache_msg_5(int targetcore,
540 INLINE void cache_msg_6(int targetcore,
547 INLINE void transferObject(struct transObjInfo * transObj);
548 INLINE int receiveMsg(uint32_t send_port_pending);
551 INLINE void transferMarkResults();
555 INLINE void profileTaskStart(char * taskname);
556 INLINE void profileTaskEnd(void);
557 void outputProfileData();
558 #endif // #ifdef PROFILE
559 ///////////////////////////////////////////////////////////
561 /////////////////////////////////////////////////////////////////////////////
562 // For each version of BAMBOO runtime, there should be a header file named //
563 // runtim_arch.h defining following MARCOS: //
564 // BAMBOO_NUM_OF_CORE: the # of current residing core //
565 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core //
566 // BAMBOO_COORDS(c, x, y): convert the cpu # to coords (*x, *y) //
567 // BAMBOO_DEBUGPRINT(x): print out integer x //
568 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x //
569 // BAMBOO_EXIT_APP(x): exit the whole application //
570 // BAMBOO_EXIT(x): error exit routine with error # //
571 // BAMBOO_DIE(x): error exit routine with error msg //
572 // BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number //
573 // BAMBOO_MSG_AVAIL(): checking if there are msgs coming in //
574 // BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in //
575 // BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(): change to runtime mode from //
577 // BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(): change to client mode from //
579 // BAMBOO_ENTER_SEND_MODE_FROM_CLIENT(): change to send mode from //
581 // BAMBOO_ENTER_CLIENT_MODE_FROM_SEND(): change to client mode from //
583 // BAMBOO_ENTER_RUNTIME_MODE_FROM_SEND(): change to runtime mode from //
585 // BAMBOO_ENTER_SEND_MODE_FROM_RUNTIME(): change to send mode from //
587 // BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock //
588 // request response //
589 // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of //
590 // whose size in bytes is y on local memory //
591 // which is given by the hypervisor //
592 // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory //
593 // BAMBOO_LOCAL_MEM_CLOSE(): close the local heap //
594 // BAMBOO_LOCAL_MEM_CALLOC_S(x, y): allocate an array of x elements each of//
595 // whose size in bytes is y on local //
596 // memory which is not from the hypervisor//
597 // but is allocated from the free memory //
598 // BAMBOO_LOCAL_MEM_FREE_S(x): free space with ptr x on self-allocated //
600 // BAMBOO_LOCAL_MEM_CLOSE_S(): close the self-allocated local heap //
601 // BAMBOO_SHARE_MEM_CALLOC_I(x, y): allocate an array of x elements each of//
602 // whose size in bytes is y on shared memory//
603 // BAMBOO_SHARE_MEM_CLOSE(): close the shared heap //
604 // BAMBOO_CACHE_LINE_SIZE: the cache line size //
605 // BAMBOO_CACHE_LINE_MASK: mask for a cache line //
606 // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with //
608 // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary //
609 // BAMBOO_MEMSET_WH(x, y, z): memset the specified region of memory (start //
610 // address x, size z) to value y with write //
611 // hint, the processor will not fetch the //
612 // current content of the memory and directly //
614 // BAMBOO_CLEAN_DTLB(): zero-out all the dtlb entries //
615 // BAMBOO_CACHE_FLUSH_L2(): Flush the contents of this tile's L2 back to //
617 // BAMBOO_CACHE_FLUSH_RANGE_NO_FENCE(x, y): flush a range of mem without //
619 // BAMBOO_CACHE_MEM_FENCE_INCOHERENT(): fence to guarantee visibility of //
620 // stores to incoherent memory //
621 /////////////////////////////////////////////////////////////////////////////
623 #endif // #ifdef MULTICORE
624 #endif // #ifdef TASK
625 #endif // #ifndef MULTICORE_RUNTIME