bug fixes in multicore gc
[IRC.git] / Robust / src / Runtime / multicoreruntime.h
1 #ifndef MULTICORE_RUNTIME
2 #define MULTICORE_RUNTIME
3
4 ////////////////////////////////////////////////////////////////
5 // global variables                                          //
6 ///////////////////////////////////////////////////////////////
7
8 // data structures for msgs
9 #define BAMBOO_OUT_BUF_LENGTH 300
10 #define BAMBOO_MSG_BUF_LENGTH 30
11 int msgdata[BAMBOO_MSG_BUF_LENGTH];
12 int msgtype;
13 int msgdataindex;
14 int msglength;
15 int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
16 int outmsgindex;
17 int outmsglast;
18 int outmsgleft;
19 bool isMsgHanging;
20 volatile bool isMsgSending;
21
22 #define OUTMSG_INDEXINC() \
23         outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
24
25 #define OUTMSG_LASTINDEXINC() \
26         outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
27         if(outmsglast == outmsgindex) { \
28                 BAMBOO_EXIT(0xd001); \
29         } 
30
31 #define OUTMSG_CACHE(n) \
32         outmsgdata[outmsglast] = (n); \
33   OUTMSG_LASTINDEXINC(); 
34
35 /* Message format:
36  *      type + Msgbody
37  * type: 0 -- transfer object
38  *       1 -- transfer stall msg
39  *       2 -- lock request
40  *       3 -- lock grount
41  *       4 -- lock deny
42  *       5 -- lock release
43  *       // add for profile info
44  *       6 -- transfer profile output msg
45  *       7 -- transfer profile output finish msg
46  *       // add for alias lock strategy
47  *       8 -- redirect lock request
48  *       9 -- lock grant with redirect info
49  *       a -- lock deny with redirect info
50  *       b -- lock release with redirect info
51  *       c -- status confirm request
52  *       d -- status report msg
53  *       e -- terminate
54  *       f -- requiring for new memory
55  *      10 -- response for new memory request
56  *      11 -- GC init phase start
57  *      12 -- GC start
58  *      13 -- compact phase start
59  *      14 -- flush phase start
60  *      15 -- init phase finish
61  *      16 -- mark phase finish
62  *      17 -- compact phase finish
63  *      18 -- flush phase finish
64  *      19 -- GC finish
65  *      1a -- marked phase finish confirm request
66  *      1b -- marked phase finish confirm response
67  *      1c -- markedObj msg
68  *      1d -- start moving objs msg
69  *      1e -- ask for mapping info of a markedObj
70  *      1f -- mapping info of a markedObj
71  *      20 -- large objs info request
72  *      21 -- large objs info response
73  *      22 -- large objs mapping info
74  *
75  * ObjMsg: 0 + size of msg + obj's address + (task index + param index)+
76  * StallMsg: 1 + corenum + sendobjs + receiveobjs 
77  *             (size is always 4 * sizeof(int))
78  * LockMsg: 2 + lock type + obj pointer + lock + request core 
79  *            (size is always 5 * sizeof(int))
80  *          3/4/5 + lock type + obj pointer + lock 
81  *            (size is always 4 * sizeof(int))
82  *          8 + lock type + obj pointer +  redirect lock + root request core 
83  *            + request core 
84  *            (size is always 6 * sizeof(int))
85  *          9/a + lock type + obj pointer + redirect lock 
86  *              (size is always 4 * sizeof(int))
87  *          b + lock type + lock + redirect lock 
88  *            (size is always 4 * sizeof(int))
89  *          lock type: 0 -- read; 1 -- write
90  * ProfileMsg: 6 + totalexetime 
91  *               (size is always 2 * sizeof(int))
92  *             7 + corenum 
93  *               (size is always 2 * sizeof(int))
94  * StatusMsg: c (size is always 1 * sizeof(int))
95  *            d + status + corenum + sendobjs + receiveobjs 
96  *              (size is always 5 * sizeof(int))
97  *            status: 0 -- stall; 1 -- busy
98  * TerminateMsg: e (size is always 1 * sizeof(int)
99  * MemoryMsg: f + size + corenum 
100  *              (size is always 3 * sizeof(int))
101  *           10 + base_va + size 
102  *              (size is always 3 * sizeof(int))
103  * GCMsg: 11/12 (size is always 1 * sizeof(int))
104  *        13 + size of msg + (num of objs to move + (start address 
105  *           + end address + dst core + start dst)+)? 
106  *           + (num of incoming objs + (start dst + orig core)+)? 
107  *           + (num of large obj lists + (start address + lenght 
108  *           + start dst)+)?
109  *        14 (size is always 1 * sizeof(int))
110  *        15 + corenum 
111  *           (size is always 2 * sizeof(int))
112  *        16 + corenum + gcsendobjs + gcreceiveobjs     
113  *           (size if always 4 * sizeof(int))
114  *        17 + corenum + fulfilled blocks num + (finish compact(1) + current
115  *           heap top)/(need mem(0) + mem need) 
116  *           size is always 5 * sizeof(int))
117  *        18 + corenum 
118  *              (size is always 2 * sizeof(int))
119  *        19 (size is always 1 * sizeof(int))
120  *        1a (size if always 1 * sizeof(int))
121  *        1b + size of msg + corenum + gcsendobjs + gcreceiveobjs 
122  *           (size is always 5 * sizeof(int))
123  *        1c + obj's address 
124  *           (size is always 2 * sizeof(int))
125  *        1d + corenum + start addr + end addr
126  *           (size if always 4 * sizeof(int))
127  *        1e + obj's address + corenum 
128  *           (size is always 3 * sizeof(int))
129  *        1f + obj's address + dst address 
130  *           (size if always 3 * sizeof(int))
131  *        20 (size is always 1 * sizeof(int))
132  *        21 + size of msg + corenum + current heap size 
133  *           + (num of large obj lists + (start address + length)+)?
134  *        22 + orig large obj ptr + new large obj ptr 
135  *            (size is always 3 * sizeof(int))
136  */
137 typedef enum {
138         TRANSOBJ = 0x0,  // 0x0
139         TRANSTALL,       // 0x1
140         LOCKREQUEST,     // 0x2
141         LOCKGROUNT,      // 0x3
142         LOCKDENY,        // 0x4
143         LOCKRELEASE,     // 0x5
144         PROFILEOUTPUT,   // 0x6
145         PROFILEFINISH,   // 0x7
146         REDIRECTLOCK,    // 0x8
147         REDIRECTGROUNT,  // 0x9
148         REDIRECTDENY,    // 0xa
149         REDIRECTRELEASE, // 0xb
150         STATUSCONFIRM,   // 0xc
151         STATUSREPORT,    // 0xd
152         TERMINATE,       // 0xe
153         MEMREQUEST,      // 0xf
154         MEMRESPONSE,     // 0x10
155 #ifdef MULTICORE_GC
156         GCSTARTINIT,     // 0x11
157         GCSTART,         // 0x12
158         GCSTARTCOMPACT,  // 0x13
159         GCSTARTFLUSH,    // 0x14
160         GCFINISHINIT,    // 0x15
161         GCFINISHMARK,    // 0x16
162         GCFINISHCOMPACT, // 0x17
163         GCFINISHFLUSH,   // 0x18
164         GCFINISH,        // 0x19
165         GCMARKCONFIRM,   // 0x1a
166         GCMARKREPORT,    // 0x1b
167         GCMARKEDOBJ,     // 0x1c
168         GCMOVESTART,     // 0x1d
169         GCMAPREQUEST,    // 0x1e
170         GCMAPINFO,       // 0x1f
171         GCLOBJREQUEST,   // 0x20
172         GCLOBJINFO,      // 0x21
173         GCLOBJMAPPING,   // 0x22
174 #endif
175         MSGEND
176 } MSGTYPE;
177
178 // data structures of status for termination
179 int corestatus[NUMCORES]; // records status of each core
180                           // 1: running tasks
181                           // 0: stall
182 int numsendobjs[NUMCORES]; // records how many objects a core has sent out
183 int numreceiveobjs[NUMCORES]; // records how many objects a core has received
184 int numconfirm;
185 bool waitconfirm;
186 bool busystatus;
187 int self_numsendobjs;
188 int self_numreceiveobjs;
189
190 // get rid of lock msgs for GC version
191 #ifndef MULTICORE_GC
192 // data structures for locking
193 struct RuntimeHash locktable;
194 static struct RuntimeHash* locktbl = &locktable;
195 struct RuntimeHash * lockRedirectTbl;
196 struct RuntimeHash * objRedirectLockTbl;
197 #endif
198 struct LockValue {
199         int redirectlock;
200         int value;
201 };
202 int lockobj;
203 int lock2require;
204 int lockresult;
205 bool lockflag;
206
207 // data structures for waiting objs
208 struct Queue objqueue;
209 struct Queue * totransobjqueue; // queue to hold objs to be transferred
210                                 // should be cleared whenever enter a task
211
212 // data structures for shared memory allocation
213 #define BAMBOO_BASE_VA 0xd000000
214 #ifdef GC_DEBUG
215 #include "structdefs.h"
216 #define BAMBOO_NUM_PAGES (NUMCORES*(1+1))
217 #define BAMBOO_PAGE_SIZE (16 * 16)
218 #define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
219 #else
220 #define BAMBOO_NUM_PAGES (1024 * 512)
221 #define BAMBOO_PAGE_SIZE (4096)
222 #define BAMBOO_SMEM_SIZE (16 * BAMBOO_PAGE_SIZE)
223 #endif
224 #define BAMBOO_SHARED_MEM_SIZE (BAMBOO_PAGE_SIZE * BAMBOO_NUM_PAGES)
225
226 #ifdef MULTICORE_GC
227 #include "multicoregarbage.h"
228
229 struct freeMemItem {
230         INTPTR ptr;
231         int size;
232         struct freeMemItem * next;
233 };
234
235 struct freeMemList {
236         struct freeMemItem * head;
237         struct freeMemItem * tail;
238 };
239
240 bool smemflag;
241 struct freeMemList * bamboo_free_mem_list;
242 INTPTR bamboo_cur_msp;
243 int bamboo_smem_size;
244 #else
245 bool smemflag;
246 mspace bamboo_free_msp;
247 INTPTR bamboo_cur_msp;
248 int bamboo_smem_size;
249 #endif
250
251 // for test TODO
252 int total_num_t6;
253
254 // data structures for profile mode
255 #ifdef PROFILE
256
257 #define TASKINFOLENGTH 30000
258 //#define INTERRUPTINFOLENGTH 500
259
260 bool stall;
261 //bool isInterrupt;
262 int totalexetime;
263
264 typedef struct task_info {
265   char* taskName;
266   unsigned long long startTime;
267   unsigned long long endTime;
268   unsigned long long exitIndex;
269   struct Queue * newObjs; 
270 } TaskInfo;
271
272 /*typedef struct interrupt_info {
273    int startTime;
274    int endTime;
275    } InterruptInfo;*/
276
277 TaskInfo * taskInfoArray[TASKINFOLENGTH];
278 int taskInfoIndex;
279 bool taskInfoOverflow;
280 /*InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
281    int interruptInfoIndex;
282    bool interruptInfoOverflow;*/
283 int profilestatus[NUMCORES]; // records status of each core
284                              // 1: running tasks
285                              // 0: stall
286 #endif // #ifdef PROFILE
287
288 #ifndef INTERRUPT
289 bool reside;
290 #endif
291 /////////////////////////////////////////////////////////////
292
293 ////////////////////////////////////////////////////////////
294 // these are functions should be implemented in           //
295 // multicore runtime for any multicore processors         //
296 ////////////////////////////////////////////////////////////
297 #ifdef TASK
298 #ifdef MULTICORE
299 inline void initialization(void) __attribute__((always_inline));
300 inline void initCommunication(void) __attribute__((always_inline));
301 inline void fakeExecution(void) __attribute__((always_inline));
302 inline void terminate(void) __attribute__((always_inline));
303 inline void initlock(struct ___Object___ * v) __attribute__((always_inline));
304
305 // lock related functions
306 bool getreadlock(void* ptr);
307 void releasereadlock(void* ptr);
308 bool getwritelock(void* ptr);
309 void releasewritelock(void* ptr);
310 bool getwritelock_I(void* ptr);
311 void releasewritelock_I(void * ptr);
312 #ifndef MULTICORE_GC
313 void releasewritelock_r(void * lock, void * redirectlock);
314 #endif
315 /* this function is to process lock requests. 
316  * can only be invoked in receiveObject() */
317 // if return -1: the lock request is redirected
318 //            0: the lock request is approved
319 //            1: the lock request is denied
320 inline int processlockrequest(int locktype, 
321                                           int lock, 
322                                                                                                                         int obj, 
323                                                                                                                         int requestcore, 
324                                                                                                                         int rootrequestcore, 
325                                                                                                                         bool cache) __attribute__((always_inline));
326 inline void processlockrelease(int locktype, 
327                                            int lock, 
328                                                                                                                          int redirectlock, 
329                                                                                                                          bool redirect)__attribute__((always_inline));
330
331 // msg related functions
332 inline void send_hanging_msg() __attribute__((always_inline));
333 inline void send_msg_1(int targetcore, 
334                                    unsigned long n0) __attribute__((always_inline));
335 inline void send_msg_2(int targetcore, 
336                                    unsigned long n0, 
337                                                                                          unsigned long n1) __attribute__((always_inline));
338 inline void send_msg_3(int targetcore, 
339                                    unsigned long n0, 
340                                                                                          unsigned long n1, 
341                                                                                          unsigned long n2) __attribute__((always_inline));
342 inline void send_msg_4(int targetcore, 
343                                    unsigned long n0, 
344                                                                                          unsigned long n1, 
345                                                                                          unsigned long n2, 
346                                                                                          unsigned long n3) __attribute__((always_inline));
347 inline void send_msg_5(int targetcore, 
348                                    unsigned long n0, 
349                                                                                          unsigned long n1, 
350                                                                                          unsigned long n2, 
351                                                                                          unsigned long n3, 
352                                                                                          unsigned long n4) __attribute__((always_inline));
353 inline void send_msg_6(int targetcore, 
354                                    unsigned long n0, 
355                                                                                          unsigned long n1, 
356                                                                                          unsigned long n2, 
357                                                                                          unsigned long n3, 
358                                                                                          unsigned long n4, 
359                                                                                          unsigned long n5) __attribute__((always_inline));
360 inline void cache_msg_2(int targetcore, 
361                                     unsigned long n0, 
362                                                                                                 unsigned long n1) __attribute__((always_inline));
363 inline void cache_msg_3(int targetcore, 
364                                     unsigned long n0, 
365                                                                                                 unsigned long n1, 
366                                                                                                 unsigned long n2) __attribute__((always_inline));
367 inline void cache_msg_4(int targetcore, 
368                                     unsigned long n0, 
369                                                                                                 unsigned long n1, 
370                                                                                                 unsigned long n2, 
371                                                                                                 unsigned long n3) __attribute__((always_inline));
372 inline void cache_msg_5(int targetcore, 
373                                     unsigned long n0, 
374                                                                                                 unsigned long n1, 
375                                                                                                 unsigned long n2, 
376                                                                                                 unsigned long n3, 
377                                                                                                 unsigned long n4) __attribute__((always_inline));
378 inline void cache_msg_6(int targetcore, 
379                                     unsigned long n0, 
380                                                                                                 unsigned long n1, 
381                                                                                                 unsigned long n2, 
382                                                                                                 unsigned long n3, 
383                                                                                                 unsigned long n4, 
384                                                                                                 unsigned long n5) __attribute__((always_inline));
385 inline void transferObject(struct transObjInfo * transObj);
386 inline int receiveMsg(void) __attribute__((always_inline));
387
388 #ifdef MULTICORE_GC
389 inline void transferMarkResults() __attribute__((always_inline));
390 #endif
391
392 #ifdef PROFILE
393 inline void profileTaskStart(char * taskname) __attribute__((always_inline));
394 inline void profileTaskEnd(void) __attribute__((always_inline));
395 void outputProfileData();
396 #endif  // #ifdef PROFILE
397 ///////////////////////////////////////////////////////////
398
399 /////////////////////////////////////////////////////////////////////////////
400 // For each version of BAMBOO runtime, there should be a header file named //
401 // runtim_arch.h defining following MARCOS:                                //
402 // BAMBOO_TOTALCORE: the total # of cores available in the processor       //
403 // BAMBOO_NUM_OF_CORE: the # of current residing core                      //
404 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core        //
405 // BAMBOO_DEBUGPRINT(x): print out integer x                               //
406 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x                 //
407 // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of  //
408 //                                whose size in bytes is y on local memory //
409 // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory         //
410 // BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of  //
411 //                                whose size in bytes is y on shared memory//
412 // BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE()                               //
413 // BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(): locks for global data        //
414 //                                            structures related to obj    //
415 //                                            queue                        //
416 // BAMBOO_START_CRITICAL_SECTION_STATUS()                                  //
417 // BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(): locks for global data structures//
418 //                                         related to status data          //
419 // BAMBOO_START_CRITICAL_SECTION_MSG()                                     //
420 // BAMBOO_CLOSE_CRITICAL_SECTION_MSG(): locks for global data structures   //
421 //                                      related to msg data                //
422 // BAMBOO_START_CRITICAL_SECTION_LOCK()                                    //
423 // BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(): locks for global data structures  //
424 //                                       related to lock table             //
425 // BAMBOO_START_CRITICAL_SECTION_MEM()                                     //
426 // BAMBOO_CLOSE_CRITICAL_SECTION_MEM(): locks for allocating memory        //
427 // BAMBOO_START_CRITICAL_SECTION()                                         //
428 // BAMBOO_CLOSE_CRITICAL_SECTION(): locks for all global data structures   //
429 // BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock      //
430 //                            request response                             //
431 // BAMBOO_CACHE_LINE_SIZE: the cache line size                             //
432 // BAMBOO_CACHE_LINE_MASK: mask for a cache line                           //
433 // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with     //
434 //                                 length y                                //
435 // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary  //
436 // BAMBOO_EXIT(x): exit routine                                            //
437 // BAMBOO_MSG_AVAIL(): checking if there are msgs coming in                //
438 // BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in            //
439 // BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number       //
440 /////////////////////////////////////////////////////////////////////////////
441
442 #endif  // #ifdef MULTICORE
443 #endif  // #ifdef TASK
444 #endif  // #ifndef MULTICORE_RUNTIME