2 #include "debugmacro.h"
4 #include "machinepile.h"
5 #include "altmlookup.h"
8 #include "altprelookup.h"
9 #include "threadnotify.h"
11 #include "addUdpEnhance.h"
12 #include "addPrefetchEnhance.h"
20 #include "abortreaders.h"
25 #define CONFIG_FILENAME "dstm.conf"
27 //#define LOGEVENTS //turn on Logging events
29 char bigarray[16*1024*1024];
31 #define LOGEVENT(x) { \
41 char bigarray1[6*1024*1024];
42 unsigned int bigarray2[6*1024*1024];
43 unsigned int bigarray3[6*1024*1024];
44 long long bigarray4[6*1024*1024];
45 int bigarray5[6*1024*1024];
47 #define LOGTIME(x,y,z,a,b) { \
57 #define LOGTIME(x,y,z,a,b)
60 /* Thread transaction variables */
61 __thread objstr_t *t_cache;
62 __thread struct ___Object___ *revertlist;
63 __thread struct timespec exponential_backoff;
64 __thread int count_exponential_backoff;
65 __thread const int max_exponential_backoff = 1000; // safety limit
67 __thread int trans_allocation_bytes;
73 __thread jmp_buf aborttrans;
76 int globalid=0; /* This variable is a unique global identifier for a sendPrefetch request */
78 /* Global Variables */
79 extern int classsize[];
80 pfcstats_t *evalPrefetch;
81 extern int numprefetchsites; //Global variable containing number of prefetch sites
82 extern pthread_mutex_t mainobjstore_mutex; // Mutex to lock main Object store
83 pthread_mutex_t prefetchcache_mutex; // Mutex to lock Prefetch Cache
84 pthread_mutexattr_t prefetchcache_mutex_attr; /* Attribute for lock to make it a recursive lock */
85 extern prehashtable_t pflookup; //Global Prefetch cache's lookup table
86 pthread_t wthreads[NUM_THREADS]; //Worker threads for working on the prefetch queue
87 pthread_t tPrefetch; /* Primary Prefetch thread that processes the prefetch queue */
88 extern objstr_t *mainobjstore;
89 unsigned int myIpAddr;
90 unsigned int *hostIpAddrs;
93 int myIndexInHostArray;
94 unsigned int oidsPerBlock;
97 sockPoolHashTable_t *transReadSockPool;
98 sockPoolHashTable_t *transPrefetchSockPool;
99 sockPoolHashTable_t *transRequestSockPool;
100 pthread_mutex_t notifymutex;
101 pthread_mutex_t atomicObjLock;
103 /***********************************
104 * Global Variables for statistics
105 **********************************/
106 int numTransCommit = 0;
107 int numTransAbort = 0;
108 int nchashSearch = 0;
109 int nmhashSearch = 0;
110 int nprehashSearch = 0;
111 int ndirtyCacheObj = 0;
116 int totalObjSize = 0;
117 int sendRemoteReq = 0;
120 void printhex(unsigned char *, int);
121 plistnode_t *createPiles();
122 plistnode_t *sortPiles(plistnode_t *pileptr);
124 /*******************************
125 * Send and Recv function calls
126 *******************************/
127 void send_data(int fd, void *buf, int buflen) {
128 char *buffer = (char *)(buf);
132 numbytes = send(fd, buffer, size, MSG_NOSIGNAL);
133 bytesSent = bytesSent + numbytes;
134 if (numbytes == -1) {
143 void send_buf(int fd, struct writestruct * sendbuffer, void *buffer, int buflen) {
144 if (buflen+sendbuffer->offset>WMAXBUF) {
145 send_data(fd, sendbuffer->buf, sendbuffer->offset);
146 sendbuffer->offset=0;
147 send_data(fd, buffer, buflen);
150 memcpy(&sendbuffer->buf[sendbuffer->offset], buffer, buflen);
151 sendbuffer->offset+=buflen;
152 if (sendbuffer->offset>WTOP) {
153 send_data(fd, sendbuffer->buf, sendbuffer->offset);
154 sendbuffer->offset=0;
158 void forcesend_buf(int fd, struct writestruct * sendbuffer, void *buffer, int buflen) {
159 if (buflen+sendbuffer->offset>WMAXBUF) {
160 send_data(fd, sendbuffer->buf, sendbuffer->offset);
161 sendbuffer->offset=0;
162 send_data(fd, buffer, buflen);
165 memcpy(&sendbuffer->buf[sendbuffer->offset], buffer, buflen);
166 sendbuffer->offset+=buflen;
167 send_data(fd, sendbuffer->buf, sendbuffer->offset);
168 sendbuffer->offset=0;
171 int recvw(int fd, void *buf, int len, int flags) {
172 return recv(fd, buf, len, flags);
175 void recv_data_buf(int fd, struct readstruct * readbuffer, void *buffer, int buflen) {
176 char *buf=(char *)buffer;
177 int numbytes=readbuffer->head-readbuffer->tail;
181 memcpy(buf, &readbuffer->buf[readbuffer->tail], numbytes);
182 readbuffer->tail+=numbytes;
189 if (buflen>=MAXBUF) {
190 recv_data(fd, buf, buflen);
199 int numbytes = recvw(fd, &readbuffer->buf[readbuffer->head], maxbuf, 0);
200 if (numbytes == -1) {
206 readbuffer->head+=numbytes;
209 memcpy(buf,readbuffer->buf,obufflen);
210 readbuffer->tail=obufflen;
213 int recv_data_errorcode_buf(int fd, struct readstruct * readbuffer, void *buffer, int buflen) {
214 char *buf=(char *)buffer;
216 int numbytes=readbuffer->head-readbuffer->tail;
220 memcpy(buf, &readbuffer->buf[readbuffer->tail], numbytes);
221 readbuffer->tail+=numbytes;
228 if (buflen>=MAXBUF) {
229 return recv_data_errorcode(fd, buf, buflen);
237 int numbytes = recvw(fd, &readbuffer->buf[readbuffer->head], maxbuf, 0);
247 readbuffer->head+=numbytes;
250 memcpy(buf,readbuffer->buf,obufflen);
251 readbuffer->tail=obufflen;
256 void recv_data(int fd, void *buf, int buflen) {
257 char *buffer = (char *)(buf);
261 numbytes = recvw(fd, buffer, size, 0);
262 bytesRecv = bytesRecv + numbytes;
263 if (numbytes == -1) {
272 int recv_data_errorcode(int fd, void *buf, int buflen) {
273 char *buffer = (char *)(buf);
277 numbytes = recvw(fd, buffer, size, 0);
280 if (numbytes == -1) {
291 void printhex(unsigned char *ptr, int numBytes) {
293 for (i = 0; i < numBytes; i++) {
295 printf("0%x ", ptr[i]);
297 printf("%x ", ptr[i]);
303 inline int arrayLength(int *array) {
305 for(i=0; array[i] != -1; i++)
310 inline int findmax(int *array, int arraylength) {
313 for(i = 0; i < arraylength; i++) {
321 #define INLINEPREFETCH
322 #define PREFTHRESHOLD 0
324 /* This function is a prefetch call generated by the compiler that
325 * populates the shared primary prefetch queue*/
326 void prefetch(int siteid, int ntuples, unsigned int *oids, unsigned short *endoffsets, short *arrayfields) {
327 /* Allocate for the queue node*/
328 int qnodesize = 2*sizeof(int) + ntuples * (sizeof(unsigned short) + sizeof(unsigned int)) + endoffsets[ntuples - 1] * sizeof(short);
330 #ifdef INLINEPREFETCH
334 node=getmemory(qnodesize);
335 if (node==NULL&&attempted)
339 char *node=getmemory(qnodesize);
341 int top=endoffsets[ntuples-1];
347 /* Set queue node values */
349 /* TODO: Remove this after testing */
350 evalPrefetch[siteid].callcount++;
352 *((int *)(node))=siteid;
353 *((int *)(node + sizeof(int))) = ntuples;
355 memcpy(node+len, oids, ntuples*sizeof(unsigned int));
356 memcpy(node+len+ntuples*sizeof(unsigned int), endoffsets, ntuples*sizeof(unsigned short));
357 memcpy(node+len+ntuples*(sizeof(unsigned int)+sizeof(short)), arrayfields, top*sizeof(short));
359 #ifdef INLINEPREFETCH
362 int numpref=numavailable();
365 if (node==NULL && numpref!=0 || numpref>=PREFTHRESHOLD) {
367 prefetchpile_t *pilehead = foundLocal(node,numpref,siteid);
368 if (pilehead!=NULL) {
369 // Get sock from shared pool
371 /* Send Prefetch Request */
372 prefetchpile_t *ptr = pilehead;
375 int sd = getSock2(transPrefetchSockPool, ptr->mid);
376 sendPrefetchReq(ptr, sd, globalid);
383 } //end do prefetch if condition
386 /* Lock and insert into primary prefetch queue */
391 /* This function starts up the transaction runtime. */
392 int dstmStartup(const char * option) {
393 pthread_t thread_Listen, udp_thread_Listen;
395 int master=option!=NULL && strcmp(option, "master")==0;
399 if (processConfigFile() != 0)
400 return 0; //TODO: return error value, cause main program to exit
407 printf("Trans stats is on\n");
414 //Initialize socket pool
415 transReadSockPool = createSockPool(transReadSockPool, DEFAULTSOCKPOOLSIZE);
416 transPrefetchSockPool = createSockPool(transPrefetchSockPool, DEFAULTSOCKPOOLSIZE);
417 transRequestSockPool = createSockPool(transRequestSockPool, DEFAULTSOCKPOOLSIZE);
423 pthread_attr_init(&attr);
424 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
427 pthread_create(&udp_thread_Listen, &attr, udpListenBroadcast, (void*)udpfd);
430 pthread_create(&thread_Listen, &attr, dstmListen, (void*)fd);
433 dstmListen((void *)fd);
438 //TODO Use this later
439 void *pCacheAlloc(objstr_t *store, unsigned int size) {
445 while(ptr->next != NULL) {
446 /* check if store is empty */
447 if(((unsigned int)ptr->top - (unsigned int)ptr - sizeof(objstr_t) + size) <= ptr->size) {
462 /* This function initiates the prefetch thread A queue is shared
463 * between the main thread of execution and the prefetch thread to
464 * process the prefetch call Call from compiler populates the shared
465 * queue with prefetch requests while prefetch thread processes the
466 * prefetch requests */
469 //Create and initialize prefetch cache structure
472 if((evalPrefetch = initPrefetchStats()) == NULL) {
473 printf("%s() Error allocating memory at %s, %d\n", __func__, __FILE__, __LINE__);
478 /* Initialize attributes for mutex */
479 pthread_mutexattr_init(&prefetchcache_mutex_attr);
480 pthread_mutexattr_settype(&prefetchcache_mutex_attr, PTHREAD_MUTEX_RECURSIVE_NP);
482 pthread_mutex_init(&prefetchcache_mutex, &prefetchcache_mutex_attr);
483 pthread_mutex_init(¬ifymutex, NULL);
484 pthread_mutex_init(&atomicObjLock, NULL);
486 //Create prefetch cache lookup table
487 if(prehashCreate(PHASH_SIZE, PLOADFACTOR)) {
492 //Initialize primary shared queue
494 //Initialize machine pile w/prefetch oids and offsets shared queue
497 //Create the primary prefetch thread
501 retval=pthread_create(&tPrefetch, NULL, transPrefetchNew, NULL);
504 #ifndef INLINEPREFETCH
506 retval=pthread_create(&tPrefetch, NULL, transPrefetch, NULL);
510 #ifndef INLINEPREFETCH
511 pthread_detach(tPrefetch);
516 /* This function stops the threads spawned */
520 pthread_cancel(tPrefetch);
521 for(t = 0; t < NUM_THREADS; t++)
522 pthread_cancel(wthreads[t]);
528 /* This functions inserts randowm wait delays in the order of msec
529 * Mostly used when transaction commits retry*/
536 req.tv_nsec = (long)(1000 + (t%10000)); //1-11 microsec
537 nanosleep(&req, NULL);
541 void exponentialdelay() {
542 exponential_backoff.tv_nsec = exponential_backoff.tv_nsec * 2;
543 nanosleep(&exponential_backoff, NULL);
544 ++count_exponential_backoff;
545 if (count_exponential_backoff >= max_exponential_backoff) {
546 printf(" reached max_exponential_backoff at %s, %s(), %d\n", __FILE__, __func__, __LINE__);
552 /* This function initializes things required in the transaction start*/
554 t_cache = objstrCreate(1048576);
555 t_chashCreate(CHASH_SIZE, CLOADFACTOR);
558 trans_allocation_bytes = 0;
565 // Search for an address for a given oid
566 /*#define INLINE inline __attribute__((always_inline))
568 INLINE void * chashSearchI(chashtable_t *table, unsigned int key) {
569 //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE
570 chashlistnode_t *node = &table->table[(key & table->mask)>>1];
573 if(node->key == key) {
577 } while(node != NULL);
585 /* This function finds the location of the objects involved in a transaction
586 * and returns the pointer to the object if found in a remote location */
587 __attribute__((pure)) objheader_t *transRead(unsigned int oid) {
588 unsigned int machinenumber;
589 objheader_t *tmp, *objheader;
590 objheader_t *objcopy;
593 chashlistnode_t *node;
600 node= &c_table[(oid & c_mask)>>1];
602 if(node->key == oid) {
607 return &((objheader_t*)node->val)[1];
613 } while(node != NULL);
617 if((objheader = chashSearchI(record->lookupTable, oid)) != NULL) {
622 return &objheader[1];
631 //abort this transaction
632 removetransactionhash();
633 objstrDelete(t_cache);
635 _longjmp(aborttrans,1);
640 if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
644 /* Look up in machine lookup table and copy into cache*/
645 GETSIZE(size, objheader);
646 size += sizeof(objheader_t);
647 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
648 memcpy(objcopy, objheader, size);
649 /* Insert into cache's lookup table */
651 t_chashInsert(OID(objheader), objcopy);
659 if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) {
660 if(STATUS(tmp) & DIRTY) {
669 /* Look up in prefetch cache */
671 size+=sizeof(objheader_t);
672 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
673 memcpy(objcopy, tmp, size);
674 /* Insert into cache's lookup table */
675 t_chashInsert(OID(tmp), objcopy);
684 /* Get the object from the remote location */
685 if((machinenumber = lhashSearch(oid)) == 0) {
686 printf("Error: %s() No machine found for oid =% %s,%dx\n",__func__, machinenumber, __FILE__, __LINE__);
689 objcopy = getRemoteObj(machinenumber, oid);
691 if(objcopy == NULL) {
692 printf("Error: Object not found in Remote location %s, %d\n", __FILE__, __LINE__);
700 //Copy object to prefetch cache
701 pthread_mutex_lock(&prefetchcache_mutex);
702 objheader_t *headerObj;
704 GETSIZE(size, objcopy);
705 if((headerObj = prefetchobjstrAlloc(size + sizeof(objheader_t))) == NULL) {
706 printf("%s(): Error in getting memory from prefetch cache at %s, %d\n", __func__,
708 pthread_mutex_unlock(&prefetchcache_mutex);
711 pthread_mutex_unlock(&prefetchcache_mutex);
712 memcpy(headerObj, objcopy, size+sizeof(objheader_t));
713 //make an entry in prefetch lookup hashtable
714 prehashInsert(oid, headerObj);
726 /* This function finds the location of the objects involved in a transaction
727 * and returns the pointer to the object if found in a remote location */
728 __attribute__((pure)) objheader_t *transRead2(unsigned int oid) {
729 //DEBUG: __attribute__((pure)) objheader_t *transRead2(unsigned int oid, char tmpptr[]) {
730 unsigned int machinenumber;
731 objheader_t *tmp, *objheader;
732 objheader_t *objcopy;
737 //abort this transaction
738 removetransactionhash();
739 objstrDelete(t_cache);
741 _longjmp(aborttrans,1);
746 if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
750 /* Look up in machine lookup table and copy into cache*/
751 GETSIZE(size, objheader);
752 size += sizeof(objheader_t);
753 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
754 memcpy(objcopy, objheader, size);
755 /* Insert into cache's lookup table */
757 t_chashInsert(OID(objheader), objcopy);
765 if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) {
766 if(STATUS(tmp) & DIRTY) {
776 /* Look up in prefetch cache */
778 size+=sizeof(objheader_t);
779 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
780 memcpy(objcopy, tmp, size);
781 LOGOIDTYPE("P",oid, TYPE(objcopy), myrdtsc());
782 /* Insert into cache's lookup table */
783 t_chashInsert(OID(tmp), objcopy);
792 /* Get the object from the remote location */
793 if((machinenumber = lhashSearch(oid)) == 0) {
794 printf("Error: %s() No machine found for oid =% %s,%dx\n",__func__, machinenumber, __FILE__, __LINE__);
797 objcopy = getRemoteObj(machinenumber, oid);
803 if(objcopy == NULL) {
804 printf("Error: Object %u not found in Remote location %s, %d\n", oid,__FILE__, __LINE__);
809 LOGOIDTYPE("RR",oid, TYPE(objcopy),myrdtsc());
810 LOGTIME('r', oid, TYPE(objcopy),myrdtsc(),0);
811 //Copy object to prefetch cache
812 pthread_mutex_lock(&prefetchcache_mutex);
813 objheader_t *headerObj;
815 GETSIZE(size, objcopy);
816 if((headerObj = prefetchobjstrAlloc(size+sizeof(objheader_t))) == NULL) {
817 printf("%s(): Error in getting memory from prefetch cache at %s, %d\n", __func__,
819 pthread_mutex_unlock(&prefetchcache_mutex);
822 pthread_mutex_unlock(&prefetchcache_mutex);
823 memcpy(headerObj, objcopy, size+sizeof(objheader_t));
824 //make an entry in prefetch lookup hashtable
825 prehashInsert(oid, headerObj);
836 /* This function creates objects in the transaction record */
837 objheader_t *transCreateObj(unsigned int size) {
838 objheader_t *tmp = (objheader_t *) objstrAlloc(&t_cache, (sizeof(objheader_t) + size));
839 OID(tmp) = getNewOID();
843 t_chashInsert(OID(tmp), tmp);
845 trans_allocation_bytes += size;
846 /* Validate the read set if allocation is exceeds threshold */
847 if(trans_allocation_bytes > MEM_ALLOC_THRESHOLD) {
853 return &tmp[1]; //want space after object header
860 /* This function creates machine piles based on all machines involved in a
861 * transaction commit request */
862 plistnode_t *createPiles() {
864 plistnode_t *pile = NULL;
865 unsigned int machinenum;
866 objheader_t *headeraddr;
867 chashlistnode_t * ptr = c_table;
868 /* Represents number of bins in the chash table */
869 unsigned int size = c_size;
871 for(i = 0; i < size; i++) {
872 chashlistnode_t * curr = &ptr[i];
873 /* Inner loop to traverse the linked list of the cache lookupTable */
874 while(curr != NULL) {
875 //if the first bin in hash table is empty
878 headeraddr=(objheader_t *) curr->val;
879 //Get machine location for object id (and whether local or not)
880 if (STATUS(headeraddr) & NEW || (mhashSearch(curr->key) != NULL)) {
881 machinenum = myIpAddr;
882 } else if ((machinenum = lhashSearch(curr->key)) == 0) {
883 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
887 //Make machine groups
888 pile = pInsert(pile, headeraddr, machinenum, c_numelements);
895 /* This function creates machine piles based on all machines involved in a
896 * transaction commit request */
897 plistnode_t *createPiles() {
899 plistnode_t *pile = NULL;
900 unsigned int machinenum;
901 objheader_t *headeraddr;
902 struct chashentry * ptr = c_table;
903 /* Represents number of bins in the chash table */
904 unsigned int size = c_size;
906 for(i = 0; i < size; i++) {
907 struct chashentry * curr = &ptr[i];
908 /* Inner loop to traverse the linked list of the cache lookupTable */
909 //if the first bin in hash table is empty
912 headeraddr=(objheader_t *) curr->ptr;
914 //Get machine location for object id (and whether local or not)
915 if (STATUS(headeraddr) & NEW || (mhashSearch(curr->key) != NULL)) {
916 machinenum = myIpAddr;
917 } else if ((machinenum = lhashSearch(curr->key)) == 0) {
918 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
922 //Make machine groups
923 pile = pInsert(pile, headeraddr, machinenum, c_numelements);
929 /* This function initiates the transaction commit process
930 * Spawns threads for each of the new connections with Participants
931 * and creates new piles by calling the createPiles(),
932 * Sends a transrequest() to each remote machines for objects found remotely
933 * and calls handleLocalReq() to process objects found locally */
936 unsigned int tot_bytes_mod, *listmid;
937 plistnode_t *pile, *pile_ptr;
938 char treplyretry; /* keeps track of the common response that needs to be sent */
940 trans_commit_data_t transinfo; /* keeps track of objs locked during transaction */
945 struct writestruct writebuffer;
946 writebuffer.offset=0;
950 for(iii=0; iii<bigindex; iii++) {
951 printf("%c", bigarray[iii]);
957 for(jjj=0; jjj<bigindex1; jjj++) {
958 printf("[%c %u %u %lld %d]\n", bigarray1[jjj], bigarray2[jjj], bigarray3[jjj], bigarray4[jjj], bigarray5[jjj]);
964 //abort this transaction
965 removetransactionhash();
966 objstrDelete(t_cache);
973 int treplyretryCount = 0;
974 /* Initialize timeout for exponential delay */
975 exponential_backoff.tv_sec = 0;
976 exponential_backoff.tv_nsec = (long)(10000); //10 microsec
977 count_exponential_backoff = 0;
981 /* Look through all the objects in the transaction record and make piles
982 * for each machine involved in the transaction*/
984 pile_ptr = pile = createPiles();
985 pile_ptr = pile = sortPiles(pile);
990 /* Create the packet to be sent in TRANS_REQUEST */
992 /* Count the number of participants */
994 pilecount = pCount(pile);
996 /* Create a list of machine ids(Participants) involved in transaction */
997 listmid = calloc(pilecount, sizeof(unsigned int));
998 pListMid(pile, listmid);
1000 /* Create a socket and getReplyCtrl array, initialize */
1001 int socklist[pilecount];
1002 char getReplyCtrl[pilecount];
1004 for(loopcount = 0; loopcount < pilecount; loopcount++) {
1005 socklist[loopcount] = 0;
1006 getReplyCtrl[loopcount] = 0;
1009 /* Process each machine pile */
1011 trans_req_data_t *tosend;
1012 tosend = calloc(pilecount, sizeof(trans_req_data_t));
1013 while(pile != NULL) {
1014 tosend[sockindex].f.control = TRANS_REQUEST;
1015 tosend[sockindex].f.mcount = pilecount;
1016 tosend[sockindex].f.numread = pile->numread;
1017 tosend[sockindex].f.nummod = pile->nummod;
1018 tosend[sockindex].f.numcreated = pile->numcreated;
1019 tosend[sockindex].f.sum_bytes = pile->sum_bytes;
1020 tosend[sockindex].listmid = listmid;
1021 tosend[sockindex].objread = pile->objread;
1022 tosend[sockindex].oidmod = pile->oidmod;
1023 tosend[sockindex].oidcreated = pile->oidcreated;
1025 if(pile->mid != myIpAddr) {
1026 if((sd = getSock2WithLock(transRequestSockPool, pile->mid)) < 0) {
1027 printf("transRequest(): socket create error\n");
1032 socklist[sockindex] = sd;
1033 /* Send bytes of data with TRANS_REQUEST control message */
1034 send_buf(sd, &writebuffer, &(tosend[sockindex].f), sizeof(fixed_data_t));
1036 /* Send list of machines involved in the transaction */
1038 int size=sizeof(unsigned int)*(tosend[sockindex].f.mcount);
1039 send_buf(sd, &writebuffer, tosend[sockindex].listmid, size);
1042 /* Send oids and version number tuples for objects that are read */
1044 int size=(sizeof(unsigned int)+sizeof(unsigned short))*(tosend[sockindex].f.numread);
1045 send_buf(sd, &writebuffer, tosend[sockindex].objread, size);
1048 /* Send objects that are modified */
1050 if((modptr = calloc(1, tosend[sockindex].f.sum_bytes)) == NULL) {
1051 printf("Calloc error for modified objects %s, %d\n", __FILE__, __LINE__);
1058 for(i = 0; i < tosend[sockindex].f.nummod; i++) {
1060 objheader_t *headeraddr;
1061 if((headeraddr = t_chashSearch(tosend[sockindex].oidmod[i])) == NULL) {
1062 printf("%s() Error: No such oid %s, %d\n", __func__, __FILE__, __LINE__);
1068 GETSIZE(size,headeraddr);
1069 size+=sizeof(objheader_t);
1070 memcpy(modptr+offset, headeraddr, size);
1073 forcesend_buf(sd, &writebuffer, modptr, tosend[sockindex].f.sum_bytes);
1075 } else { //handle request locally
1076 handleLocalReq(&tosend[sockindex], &transinfo, &getReplyCtrl[sockindex]);
1080 } //end of pile processing
1082 /* Recv Ctrl msgs from all machines */
1084 for(i = 0; i < pilecount; i++) {
1085 int sd = socklist[i];
1088 recv_data(sd, &control, sizeof(char));
1089 //Update common data structure with new ctrl msg
1090 getReplyCtrl[i] = control;
1091 /* Recv Objects if participant sends TRANS_DISAGREE */
1093 if(control == TRANS_DISAGREE) {
1095 recv_data(sd, &length, sizeof(int));
1097 pthread_mutex_lock(&prefetchcache_mutex);
1098 if ((newAddr = prefetchobjstrAlloc((unsigned int)length)) == NULL) {
1099 printf("Error: %s() objstrAlloc error for copying into prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1102 pthread_mutex_unlock(&prefetchcache_mutex);
1105 pthread_mutex_unlock(&prefetchcache_mutex);
1106 recv_data(sd, newAddr, length);
1108 while(length != 0) {
1109 unsigned int oidToPrefetch;
1110 objheader_t * header;
1111 header = (objheader_t *)(((char *)newAddr) + offset);
1112 oidToPrefetch = OID(header);
1115 GETSIZE(size, header);
1116 size += sizeof(objheader_t);
1117 //make an entry in prefetch hash table
1118 prehashInsert(oidToPrefetch, header);
1120 length = length - size;
1123 } //end of receiving objs
1128 /* Decide the final response */
1129 if((finalResponse = decideResponse(getReplyCtrl, &treplyretry, pilecount)) == 0) {
1130 printf("Error: %s() in updating prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1137 if (finalResponse == TRANS_COMMIT) {
1138 /* Invalidate objects in other machine cache */
1140 if((retval = invalidateObj(tosend, pilecount,finalResponse,socklist)) != 0) {
1141 printf("Error: %s() in invalidating Objects %s, %d\n", __func__, __FILE__, __LINE__);
1148 /* Send responses to all machines */
1149 for(i = 0; i < pilecount; i++) {
1150 int sd = socklist[i];
1153 if(finalResponse == TRANS_COMMIT) {
1155 /* Update prefetch cache */
1156 if((retval = updatePrefetchCache(&(tosend[i]))) != 0) {
1157 printf("Error: %s() in updating prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1163 removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
1164 removethisreadtransaction(tosend[i].objread, tosend[i].f.numread);
1168 else if (!treplyretry) {
1169 removethistransaction(tosend[i].oidmod,tosend[i].f.nummod);
1170 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1174 send_data(sd, &finalResponse, sizeof(char));
1176 /* Complete local processing */
1177 doLocalProcess(finalResponse, &(tosend[i]), &transinfo);
1179 if(finalResponse == TRANS_COMMIT) {
1180 removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
1181 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1182 } else if (!treplyretry) {
1183 removethistransaction(tosend[i].oidmod,tosend[i].f.nummod);
1184 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1190 /* Free resources */
1195 /* wait a random amount of time before retrying to commit transaction*/
1198 // if(treplyretryCount >= NUM_TRY_TO_COMMIT)
1199 // exponentialdelay();
1206 /* Retry trans commit procedure during soft_abort case */
1207 } while (treplyretry);
1209 if(finalResponse == TRANS_ABORT) {
1214 /* Free Resources */
1215 objstrDelete(t_cache);
1221 } else if(finalResponse == TRANS_COMMIT) {
1226 /* Free Resources */
1227 objstrDelete(t_cache);
1231 //TODO Add other cases
1232 printf("Error: in %s() THIS SHOULD NOT HAPPEN.....EXIT PROGRAM\n", __func__);
1238 /* This function handles the local objects involved in a transaction
1239 * commiting process. It also makes a decision if this local machine
1240 * sends AGREE or DISAGREE or SOFT_ABORT to coordinator */
1241 void handleLocalReq(trans_req_data_t *tdata, trans_commit_data_t *transinfo, char *getReplyCtrl) {
1242 unsigned int *oidnotfound = NULL, *oidlocked = NULL;
1243 int numoidnotfound = 0, numoidlocked = 0;
1244 int v_nomatch = 0, v_matchlock = 0, v_matchnolock = 0;
1247 unsigned short version;
1249 /* Counters and arrays to formulate decision on control message to be sent */
1250 oidnotfound = (unsigned int *) calloc((tdata->f.numread + tdata->f.nummod), sizeof(unsigned int));
1251 oidlocked = (unsigned int *) calloc((tdata->f.numread + tdata->f.nummod +1), sizeof(unsigned int)); // calloc additional 1 byte for
1252 //setting a divider between read and write locks
1253 numread = tdata->f.numread;
1254 /* Process each oid in the machine pile/ group per thread */
1255 for (i = 0; i < tdata->f.numread + tdata->f.nummod; i++) {
1256 if (i < tdata->f.numread) {
1257 int incr = sizeof(unsigned int) + sizeof(unsigned short); // Offset that points to next position in the objread array
1259 oid = *((unsigned int *)(((char *)tdata->objread) + incr));
1260 version = *((unsigned short *)(((char *)tdata->objread) + incr + sizeof(unsigned int)));
1261 commitCountForObjRead(getReplyCtrl, oidnotfound, oidlocked, &numoidnotfound, &numoidlocked, &v_nomatch, &v_matchlock, &v_matchnolock, oid, version);
1262 } else { // Objects Modified
1263 if(i == tdata->f.numread) {
1264 oidlocked[numoidlocked++] = -1;
1267 objheader_t *headptr;
1268 headptr = (objheader_t *) t_chashSearch(tdata->oidmod[i-numread]);
1269 if (headptr == NULL) {
1270 printf("Error: handleLocalReq() returning NULL, no such oid %s, %d\n", __FILE__, __LINE__);
1274 version = headptr->version;
1275 commitCountForObjMod(getReplyCtrl, oidnotfound, oidlocked, &numoidnotfound, &numoidlocked, &v_nomatch, &v_matchlock, &v_matchnolock, oid, version);
1279 /* Fill out the trans_commit_data_t data structure. This is required for a trans commit process
1280 * if Participant receives a TRANS_COMMIT */
1281 transinfo->objlocked = oidlocked;
1282 transinfo->objnotfound = oidnotfound;
1283 transinfo->modptr = NULL;
1284 transinfo->numlocked = numoidlocked;
1285 transinfo->numnotfound = numoidnotfound;
1287 /* Condition to send TRANS_AGREE */
1288 if(v_matchnolock == tdata->f.numread + tdata->f.nummod) {
1289 *getReplyCtrl = TRANS_AGREE;
1291 /* Condition to send TRANS_SOFT_ABORT */
1292 if((v_matchlock > 0 && v_nomatch == 0) || (numoidnotfound > 0 && v_nomatch == 0)) {
1293 *getReplyCtrl = TRANS_SOFT_ABORT;
1297 void doLocalProcess(char finalResponse, trans_req_data_t *tdata, trans_commit_data_t *transinfo) {
1298 if(finalResponse == TRANS_ABORT) {
1299 if(transAbortProcess(transinfo) != 0) {
1300 printf("Error in transAbortProcess() %s,%d\n", __FILE__, __LINE__);
1304 } else if(finalResponse == TRANS_COMMIT) {
1305 if(transComProcess(tdata, transinfo) != 0) {
1306 printf("Error in transComProcess() %s,%d\n", __FILE__, __LINE__);
1311 printf("ERROR...No Decision\n");
1315 if (transinfo->objlocked != NULL) {
1316 free(transinfo->objlocked);
1318 if (transinfo->objnotfound != NULL) {
1319 free(transinfo->objnotfound);
1323 /* This function decides the reponse that needs to be sent to
1324 * all Participant machines after the TRANS_REQUEST protocol */
1325 char decideResponse(char *getReplyCtrl, char *treplyretry, int pilecount) {
1326 int i, transagree = 0, transdisagree = 0, transsoftabort = 0; /* Counters to formulate decision of what
1328 for (i = 0; i < pilecount; i++) {
1330 control = getReplyCtrl[i];
1333 printf("Participant sent unknown message %d in %s, %d\n", control, __FILE__, __LINE__);
1335 /* treat as disagree, pass thru */
1336 case TRANS_DISAGREE:
1344 case TRANS_SOFT_ABORT:
1350 if(transdisagree > 0) {
1355 /* clear objects from prefetch cache */
1358 } else if(transagree == pilecount) {
1361 return TRANS_COMMIT;
1363 /* Send Abort in soft abort case followed by retry commiting transaction again*/
1370 /* This function opens a connection, places an object read request to
1371 * the remote machine, reads the control message and object if
1372 * available and copies the object and its header to the local
1375 void *getRemoteObj(unsigned int mnum, unsigned int oid) {
1377 struct sockaddr_in serv_addr;
1381 void *objcopy = NULL;
1383 int sd = getSock2(transReadSockPool, mnum);
1384 char readrequest[sizeof(char)+sizeof(unsigned int)];
1385 readrequest[0] = READ_REQUEST;
1386 *((unsigned int *)(&readrequest[1])) = oid;
1387 send_data(sd, readrequest, sizeof(readrequest));
1389 /* Read response from the Participant */
1390 recv_data(sd, &control, sizeof(char));
1392 if (control==OBJECT_NOT_FOUND) {
1395 /* Read object if found into local cache */
1396 recv_data(sd, &size, sizeof(int));
1397 objcopy = objstrAlloc(&t_cache, size);
1398 recv_data(sd, objcopy, size);
1400 /* Insert into cache's lookup table */
1401 t_chashInsert(oid, objcopy);
1403 totalObjSize += size;
1410 /* Commit info for objects modified */
1411 void commitCountForObjMod(char *getReplyCtrl, unsigned int *oidnotfound, unsigned int *oidlocked, int *numoidnotfound,
1412 int *numoidlocked, int *v_nomatch, int *v_matchlock, int *v_matchnolock, unsigned int oid, unsigned short version) {
1414 /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
1415 /* Save the oids not found and number of oids not found for later use */
1416 if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
1417 /* Save the oids not found and number of oids not found for later use */
1418 oidnotfound[*numoidnotfound] = oid;
1419 (*numoidnotfound)++;
1420 } else { /* If Obj found in machine (i.e. has not moved) */
1421 /* Check if Obj is locked by any previous transaction */
1422 if (write_trylock(STATUSPTR(mobj))) { // Can acquire write lock
1423 if (version == ((objheader_t *)mobj)->version) { /* match versions */
1425 //Keep track of what is locked
1426 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1427 } else { /* If versions don't match ...HARD ABORT */
1429 /* Send TRANS_DISAGREE to Coordinator */
1430 *getReplyCtrl = TRANS_DISAGREE;
1432 //Keep track of what is locked
1433 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1436 } else { //A lock is acquired some place else
1437 if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
1439 } else { /* If versions don't match ...HARD ABORT */
1441 /* Send TRANS_DISAGREE to Coordinator */
1442 *getReplyCtrl = TRANS_DISAGREE;
1449 /* Commit info for objects modified */
1450 void commitCountForObjRead(char *getReplyCtrl, unsigned int *oidnotfound, unsigned int *oidlocked, int *numoidnotfound,
1451 int *numoidlocked, int *v_nomatch, int *v_matchlock, int *v_matchnolock, unsigned int oid, unsigned short version) {
1453 /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
1454 /* Save the oids not found and number of oids not found for later use */
1455 if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
1456 /* Save the oids not found and number of oids not found for later use */
1457 oidnotfound[*numoidnotfound] = oid;
1458 (*numoidnotfound)++;
1459 } else { /* If Obj found in machine (i.e. has not moved) */
1460 /* Check if Obj is locked by any previous transaction */
1461 if (read_trylock(STATUSPTR(mobj))) { // Can further acquire read locks
1462 if (version == ((objheader_t *)mobj)->version) { /* If locked then match versions */
1464 //Keep track of what is locked
1465 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1466 } else { /* If versions don't match ...HARD ABORT */
1468 /* Send TRANS_DISAGREE to Coordinator */
1469 *getReplyCtrl = TRANS_DISAGREE;
1470 //Keep track of what is locked
1471 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1474 } else { //Has reached max number of readers or some other transaction
1475 //has acquired a lock on this object
1476 if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
1478 } else { /* If versions don't match ...HARD ABORT */
1480 /* Send TRANS_DISAGREE to Coordinator */
1481 *getReplyCtrl = TRANS_DISAGREE;
1488 /* This function completes the ABORT process if the transaction is aborting */
1489 int transAbortProcess(trans_commit_data_t *transinfo) {
1491 unsigned int *objlocked;
1494 numlocked = transinfo->numlocked;
1495 objlocked = transinfo->objlocked;
1497 int useWriteUnlock = 0;
1498 for (i = 0; i < numlocked; i++) {
1499 if(objlocked[i] == -1) {
1503 if((header = mhashSearch(objlocked[i])) == NULL) {
1504 printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1507 if(!useWriteUnlock) {
1508 read_unlock(STATUSPTR(header));
1510 write_unlock(STATUSPTR(header));
1517 /*This function completes the COMMIT process if the transaction is commiting*/
1518 int transComProcess(trans_req_data_t *tdata, trans_commit_data_t *transinfo) {
1519 objheader_t *header, *tcptr;
1520 int i, nummod, tmpsize, numcreated, numlocked;
1521 unsigned int *oidmod, *oidcreated, *oidlocked;
1524 nummod = tdata->f.nummod;
1525 oidmod = tdata->oidmod;
1526 numcreated = tdata->f.numcreated;
1527 oidcreated = tdata->oidcreated;
1528 numlocked = transinfo->numlocked;
1529 oidlocked = transinfo->objlocked;
1531 for (i = 0; i < nummod; i++) {
1532 if((header = (objheader_t *) mhashSearch(oidmod[i])) == NULL) {
1533 printf("Error: transComProcess() mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1536 /* Copy from transaction cache -> main object store */
1537 if ((tcptr = ((objheader_t *) t_chashSearch(oidmod[i]))) == NULL) {
1538 printf("Error: transComProcess() chashSearch returned NULL at %s, %d\n", __FILE__, __LINE__);
1541 GETSIZE(tmpsize, header);
1542 char *tmptcptr = (char *) tcptr;
1544 struct ___Object___ *dst=(struct ___Object___*)((char*)header+sizeof(objheader_t));
1545 struct ___Object___ *src=(struct ___Object___*)((char*)tmptcptr+sizeof(objheader_t));
1546 dst->___cachedCode___=src->___cachedCode___;
1547 dst->___cachedHash___=src->___cachedHash___;
1549 memcpy(&dst[1], &src[1], tmpsize-sizeof(struct ___Object___));
1555 header->version += 1;
1556 if(header->notifylist != NULL) {
1557 notifyAll(&header->notifylist, OID(header), header->version);
1560 /* If object is newly created inside transaction then commit it */
1561 for (i = 0; i < numcreated; i++) {
1562 if ((header = ((objheader_t *) t_chashSearch(oidcreated[i]))) == NULL) {
1563 printf("Error: transComProcess() chashSearch returned NULL for oid = %x at %s, %d\n", oidcreated[i], __FILE__, __LINE__);
1566 GETSIZE(tmpsize, header);
1567 tmpsize += sizeof(objheader_t);
1568 pthread_mutex_lock(&mainobjstore_mutex);
1569 if ((ptrcreate = objstrAlloc(&mainobjstore, tmpsize)) == NULL) {
1570 printf("Error: transComProcess() failed objstrAlloc %s, %d\n", __FILE__, __LINE__);
1571 pthread_mutex_unlock(&mainobjstore_mutex);
1574 pthread_mutex_unlock(&mainobjstore_mutex);
1575 /* Initialize read and write locks */
1576 initdsmlocks(STATUSPTR(header));
1577 memcpy(ptrcreate, header, tmpsize);
1578 mhashInsert(oidcreated[i], ptrcreate);
1579 lhashInsert(oidcreated[i], myIpAddr);
1581 /* Unlock locked objects */
1582 int useWriteUnlock = 0;
1583 for(i = 0; i < numlocked; i++) {
1584 if(oidlocked[i] == -1) {
1588 if((header = (objheader_t *) mhashSearch(oidlocked[i])) == NULL) {
1589 printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1592 if(!useWriteUnlock) {
1593 read_unlock(STATUSPTR(header));
1595 write_unlock(STATUSPTR(header));
1601 prefetchpile_t *foundLocal(char *ptr, int numprefetches, int mysiteid) {
1604 prefetchpile_t * head=NULL;
1606 for(j=0; j<numprefetches; j++) {
1607 int siteid = *(GET_SITEID(ptr));
1608 int ntuples = *(GET_NTUPLES(ptr));
1609 unsigned int * oidarray = GET_PTR_OID(ptr);
1610 unsigned short * endoffsets = GET_PTR_EOFF(ptr, ntuples);
1611 short * arryfields = GET_PTR_ARRYFLD(ptr, ntuples);
1614 for(i=0; i<ntuples; i++) {
1615 unsigned short baseindex=(i==0) ? 0 : endoffsets[i-1];
1616 unsigned short endindex=endoffsets[i];
1617 unsigned int oid=oidarray[i];
1620 int countInvalidObj=0;
1626 //Look up fields locally
1630 for(newbase=baseindex; newbase<endindex; newbase++) {
1631 if(newbase==(endindex-1))
1633 if (!lookupObject(&oid,arryfields[newbase],&countInvalidObj)) {
1636 //Ended in a null pointer...
1643 //Entire prefetch is local
1644 if (newbase==endindex&&checkoid(oid,isLastOffset)) {
1649 //Add to remote requests
1650 machinenum=lhashSearch(oid);
1651 insertPile(machinenum, oid, siteid,endindex-newbase, &arryfields[newbase], &head);
1656 /* handle dynamic prefetching */
1657 handleDynPrefetching(numLocal, ntuples, siteid);
1658 ptr=((char *)&arryfields[endoffsets[ntuples-1]])+sizeof(int);
1664 int checkoid(unsigned int oid, int isLastOffset) {
1665 objheader_t *header;
1666 if ((header=mhashSearch(oid))!=NULL) {
1669 } else if ((header=prehashSearch(oid))!=NULL) {
1670 //if the last offset then prefetch object
1671 if((STATUS(header) & DIRTY) && isLastOffset) {
1681 int lookupObject(unsigned int * oid, short offset, int *countInvalidObj) {
1682 objheader_t *header;
1683 if ((header=mhashSearch(*oid))!=NULL) {
1686 } else if ((header=prehashSearch(*oid))!=NULL) {
1688 if(STATUS(header) & DIRTY) { //Read an oid that is an old entry in the cache;
1689 //only once because later old entries may still cause unnecessary roundtrips during prefetching
1690 (*countInvalidObj)+=1;
1691 if(*countInvalidObj > 1) {
1699 if(TYPE(header) >= NUMCLASSES) {
1700 int elementsize = classsize[TYPE(header)];
1701 struct ArrayObject *ao = (struct ArrayObject *) (((char *)header) + sizeof(objheader_t));
1702 int length = ao->___length___;
1703 /* Check if array out of bounds */
1704 if(offset < 0 || offset >= length) {
1705 //if yes treat the object as found
1709 (*oid) = *((unsigned int *)(((char *)ao) + sizeof(struct ArrayObject) + (elementsize*offset)));
1712 (*oid) = *((unsigned int *)(((char *)header) + sizeof(objheader_t) + offset));
1718 /* This function is called by the thread calling transPrefetch */
1719 void *transPrefetch(void *t) {
1721 /* read from prefetch queue */
1722 void *node=gettail();
1723 /* Check if the tuples are found locally, if yes then reduce them further*/
1724 /* and group requests by remote machine ids by calling the makePreGroups() */
1725 int count=numavailable();
1726 prefetchpile_t *pilehead = foundLocal(node, count, 0);
1728 if (pilehead!=NULL) {
1729 // Get sock from shared pool
1731 /* Send Prefetch Request */
1732 prefetchpile_t *ptr = pilehead;
1733 while(ptr != NULL) {
1735 int sd = getSock2(transPrefetchSockPool, ptr->mid);
1736 sendPrefetchReq(ptr, sd,globalid);
1740 /* Release socket */
1741 // freeSock(transPrefetchSockPool, pilehead->mid, sd);
1743 /* Deallocated pilehead */
1744 mcdealloc(pilehead);
1746 // Deallocate the prefetch queue pile node
1751 void sendPrefetchReqnew(prefetchpile_t *mcpilenode, int sd) {
1754 int size=sizeof(char)+sizeof(int);
1755 for(tmp=mcpilenode->objpiles; tmp!=NULL; tmp=tmp->next) {
1756 size += sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1761 *buf=TRANS_PREFETCH;
1764 for(tmp=mcpilenode->objpiles; tmp!=NULL; tmp=tmp->next) {
1765 int len = sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1768 *((unsigned int *)buf)=tmp->oid;
1769 buf+=sizeof(unsigned int);
1770 *((unsigned int *)(buf)) = myIpAddr;
1771 buf+=sizeof(unsigned int);
1772 memcpy(buf, tmp->offset, tmp->numoffset*sizeof(short));
1773 buf+=tmp->numoffset*sizeof(short);
1776 send_data(sd, buft, size);
1781 * parameters: mcpilenode -> pile node to traverse to assemble pref requests
1783 * gid -> global identifier for each prefetch request sent, starts with 0
1785 void sendPrefetchReq(prefetchpile_t *mcpilenode, int sd, int gid) {
1789 struct writestruct writebuffer;
1790 writebuffer.offset=0;
1793 /* Send TRANS_PREFETCH control message */
1796 /* Send Oids and offsets in pairs */
1797 tmp = mcpilenode->objpiles;
1798 while(tmp != NULL) {
1799 len = sizeof(int)+sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1800 char oidnoffset[len+5];
1801 char *buf=oidnoffset;
1803 *buf=TRANS_PREFETCH;
1807 *((int*)buf) = tmp->numoffset;
1809 *((unsigned int *)buf) = tmp->oid;
1810 LOGOIDTYPE("S",tmp->oid,tmp->numoffset,myrdtsc());
1814 buf+=sizeof(unsigned int);
1815 *((unsigned int *)buf) = myIpAddr;
1816 buf+= sizeof(unsigned int);
1819 memcpy(buf, tmp->offset, (tmp->numoffset)*sizeof(short));
1822 *((int *)(&oidnoffset[len]))=-1;
1826 send_buf(sd, &writebuffer, oidnoffset, len);
1828 forcesend_buf(sd, &writebuffer, oidnoffset, len);
1830 LOGOIDTYPE("SREQ",0,0,myrdtsc());
1832 LOGTIME('S',0,0,myrdtsc(),gid); //after sending
1836 int getPrefetchResponse(int sd, struct readstruct *readbuffer) {
1837 int gid,length = 0, size = 0;
1840 void *modptr, *oldptr;
1842 recv_data_buf(sd, readbuffer, &length, sizeof(int));
1843 size = length - sizeof(int);
1844 char recvbuffer[size];
1848 LOGTIME('K',0,0, myrdtsc(),0); //log time after first recv
1850 recv_data_buf(sd, readbuffer, recvbuffer, size);
1851 control = *((char *) recvbuffer);
1852 if(control == OBJECT_FOUND) {
1853 oid = *((unsigned int *)(recvbuffer + sizeof(char)));
1854 gid = *((int *) (recvbuffer+sizeof(char)+sizeof(unsigned int)));
1855 LOGTIME('G',oid,0, myrdtsc(),gid); //log time after first recv
1856 size = size - (sizeof(char) + sizeof(unsigned int) + sizeof(int));
1857 pthread_mutex_lock(&prefetchcache_mutex);
1858 if ((modptr = prefetchobjstrAlloc(size)) == NULL) {
1859 printf("Error: objstrAlloc error for copying into prefetch cache %s, %d\n", __FILE__, __LINE__);
1860 pthread_mutex_unlock(&prefetchcache_mutex);
1863 pthread_mutex_unlock(&prefetchcache_mutex);
1864 memcpy(modptr, recvbuffer + sizeof(char) + sizeof(unsigned int)+sizeof(int), size);
1868 /* Insert the oid and its address into the prefetch hash lookup table */
1869 /* Do a version comparison if the oid exists */
1870 if((oldptr = prehashSearch(oid)) != NULL) {
1871 /* If older version then update with new object ptr */
1872 if(((objheader_t *)oldptr)->version < ((objheader_t *)modptr)->version) {
1873 prehashInsert(oid, modptr);
1875 } else { /* Else add the object ptr to hash table*/
1876 prehashInsert(oid, modptr);
1878 LOGOIDTYPE("GR",oid, TYPE(modptr),myrdtsc());
1879 LOGTIME('Z',oid, TYPE(modptr), myrdtsc(),gid); //log time after copying it into the prefetch cache
1880 } else if(control == OBJECT_NOT_FOUND) {
1881 oid = *((unsigned int *)(recvbuffer + sizeof(char)));
1882 gid = *((int *) (recvbuffer+sizeof(char)+sizeof(unsigned int)));
1883 LOGOIDTYPE("NF",oid,0,myrdtsc());
1884 LOGTIME('F',oid, 0, myrdtsc(),gid); //log time after copying it into the prefetch cache
1885 /* TODO: For each object not found query DHT for new location and retrieve the object */
1886 /* Throw an error */
1887 //printf("OBJECT %x NOT FOUND.... THIS SHOULD NOT HAPPEN...TERMINATE PROGRAM\n", oid);
1890 printf("Error: in decoding the control value %d, %s, %d\n",control, __FILE__, __LINE__);
1896 unsigned short getObjType(unsigned int oid) {
1897 objheader_t *objheader;
1898 unsigned short numoffset[] ={0};
1899 short fieldoffset[] ={};
1901 if ((objheader = (objheader_t *) mhashSearch(oid)) == NULL) {
1903 if ((objheader = (objheader_t *) prehashSearch(oid)) == NULL) {
1905 unsigned int mid = lhashSearch(oid);
1906 int sd = getSock2(transReadSockPool, mid);
1907 char remotereadrequest[sizeof(char)+sizeof(unsigned int)];
1908 remotereadrequest[0] = READ_REQUEST;
1909 *((unsigned int *)(&remotereadrequest[1])) = oid;
1910 send_data(sd, remotereadrequest, sizeof(remotereadrequest));
1912 /* Read response from the Participant */
1914 recv_data(sd, &control, sizeof(char));
1916 if (control==OBJECT_NOT_FOUND) {
1917 printf("Error: in %s() THIS SHOULD NOT HAPPEN.....EXIT PROGRAM\n", __func__);
1921 /* Read object if found into local cache */
1923 recv_data(sd, &size, sizeof(int));
1925 pthread_mutex_lock(&prefetchcache_mutex);
1926 if ((objheader = prefetchobjstrAlloc(size)) == NULL) {
1927 printf("Error: %s() objstrAlloc error for copying into prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1930 pthread_mutex_unlock(&prefetchcache_mutex);
1931 recv_data(sd, objheader, size);
1932 prehashInsert(oid, objheader);
1933 return TYPE(objheader);
1936 if((buffer = calloc(1, size)) == NULL) {
1937 printf("%s() Calloc Error %s at line %d\n", __func__, __FILE__, __LINE__);
1941 recv_data(sd, buffer, size);
1942 objheader = (objheader_t *)buffer;
1943 unsigned short type = TYPE(objheader);
1952 return TYPE(objheader);
1955 int startRemoteThread(unsigned int oid, unsigned int mid) {
1957 struct sockaddr_in remoteAddr;
1958 char msg[1 + sizeof(unsigned int)];
1962 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
1963 perror("startRemoteThread():socket()");
1967 bzero(&remoteAddr, sizeof(remoteAddr));
1968 remoteAddr.sin_family = AF_INET;
1969 remoteAddr.sin_port = htons(LISTEN_PORT);
1970 remoteAddr.sin_addr.s_addr = htonl(mid);
1972 if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
1973 printf("startRemoteThread():error %d connecting to %s:%d\n", errno,
1974 inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
1978 msg[0] = START_REMOTE_THREAD;
1979 *((unsigned int *) &msg[1]) = oid;
1980 send_data(sock, msg, 1 + sizeof(unsigned int));
1987 //TODO: when reusing oids, make sure they are not already in use!
1988 static unsigned int id = 0xFFFFFFFF;
1989 unsigned int getNewOID(void) {
1991 if (id > oidMax || id < oidMin) {
1997 int processConfigFile() {
1999 const int maxLineLength = 200;
2000 char lineBuffer[maxLineLength];
2002 const char *delimiters = " \t\n";
2006 configFile = fopen(CONFIG_FILENAME, "r");
2007 if (configFile == NULL) {
2008 printf("error opening %s:\n", CONFIG_FILENAME);
2013 numHostsInSystem = 0;
2014 sizeOfHostArray = 8;
2015 hostIpAddrs = calloc(sizeOfHostArray, sizeof(unsigned int));
2017 while(fgets(lineBuffer, maxLineLength, configFile) != NULL) {
2018 commentBegin = strchr(lineBuffer, '#');
2019 if (commentBegin != NULL)
2020 *commentBegin = '\0';
2021 token = strtok(lineBuffer, delimiters);
2022 while (token != NULL) {
2023 tmpAddr = inet_addr(token);
2024 if ((int)tmpAddr == -1) {
2025 printf("error in %s: bad token:%s\n", CONFIG_FILENAME, token);
2029 addHost(htonl(tmpAddr));
2030 token = strtok(NULL, delimiters);
2036 if (numHostsInSystem < 1) {
2037 printf("error in %s: no IP Adresses found\n", CONFIG_FILENAME);
2041 myIpAddr = getMyIpAddr("en1");
2043 myIpAddr = getMyIpAddr("eth0");
2045 myIndexInHostArray = findHost(myIpAddr);
2046 if (myIndexInHostArray == -1) {
2047 printf("error in %s: IP Address of eth0 not found\n", CONFIG_FILENAME);
2050 oidsPerBlock = (0xFFFFFFFF / numHostsInSystem) + 1;
2051 oidMin = oidsPerBlock * myIndexInHostArray;
2052 if (myIndexInHostArray == numHostsInSystem - 1)
2053 oidMax = 0xFFFFFFFF;
2055 oidMax = oidsPerBlock * (myIndexInHostArray + 1) - 1;
2060 void addHost(unsigned int hostIp) {
2061 unsigned int *tmpArray;
2063 if (findHost(hostIp) != -1)
2066 if (numHostsInSystem == sizeOfHostArray) {
2067 tmpArray = calloc(sizeOfHostArray * 2, sizeof(unsigned int));
2068 memcpy(tmpArray, hostIpAddrs, sizeof(unsigned int) * numHostsInSystem);
2070 hostIpAddrs = tmpArray;
2073 hostIpAddrs[numHostsInSystem++] = hostIp;
2078 int findHost(unsigned int hostIp) {
2080 for (i = 0; i < numHostsInSystem; i++)
2081 if (hostIpAddrs[i] == hostIp)
2088 /* This function sends notification request per thread waiting on object(s) whose version
2090 int reqNotify(unsigned int *oidarry, unsigned short *versionarry, unsigned int numoid) {
2092 objheader_t *objheader;
2093 struct sockaddr_in remoteAddr;
2094 char msg[1 + numoid * (sizeof(unsigned short) + sizeof(unsigned int)) + 3 * sizeof(unsigned int)];
2098 unsigned short version;
2099 unsigned int oid,mid;
2100 static unsigned int threadid = 0;
2101 pthread_mutex_t threadnotify = PTHREAD_MUTEX_INITIALIZER; //Lock and condition var for threadjoin and notification
2102 pthread_cond_t threadcond = PTHREAD_COND_INITIALIZER;
2103 notifydata_t *ndata;
2106 if((mid = lhashSearch(oid)) == 0) {
2107 printf("Error: %s() No such machine found for oid =%x\n",__func__, oid);
2111 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2112 perror("reqNotify():socket()");
2116 bzero(&remoteAddr, sizeof(remoteAddr));
2117 remoteAddr.sin_family = AF_INET;
2118 remoteAddr.sin_port = htons(LISTEN_PORT);
2119 remoteAddr.sin_addr.s_addr = htonl(mid);
2121 /* Generate unique threadid */
2124 /* Save threadid, numoid, oidarray, versionarray, pthread_cond_variable for later processing */
2125 if((ndata = calloc(1, sizeof(notifydata_t))) == NULL) {
2126 printf("Calloc Error %s, %d\n", __FILE__, __LINE__);
2129 ndata->numoid = numoid;
2130 ndata->threadid = threadid;
2131 ndata->oidarry = oidarry;
2132 ndata->versionarry = versionarry;
2133 ndata->threadcond = threadcond;
2134 ndata->threadnotify = threadnotify;
2135 if((status = notifyhashInsert(threadid, ndata)) != 0) {
2136 printf("reqNotify(): Insert into notify hash table not successful %s, %d\n", __FILE__, __LINE__);
2141 /* Send number of oids, oidarry, version array, machine id and threadid */
2142 if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
2143 printf("reqNotify():error %d connecting to %s:%d\n", errno,
2144 inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
2148 msg[0] = THREAD_NOTIFY_REQUEST;
2149 *((unsigned int *)(&msg[1])) = numoid;
2150 /* Send array of oids */
2151 size = sizeof(unsigned int);
2153 for(i = 0; i < numoid; i++) {
2155 *((unsigned int *)(&msg[1] + size)) = oid;
2156 size += sizeof(unsigned int);
2159 /* Send array of version */
2160 for(i = 0; i < numoid; i++) {
2161 version = versionarry[i];
2162 *((unsigned short *)(&msg[1] + size)) = version;
2163 size += sizeof(unsigned short);
2166 *((unsigned int *)(&msg[1] + size)) = myIpAddr; size += sizeof(unsigned int);
2167 *((unsigned int *)(&msg[1] + size)) = threadid;
2168 pthread_mutex_lock(&(ndata->threadnotify));
2169 size = 1 + numoid * (sizeof(unsigned int) + sizeof(unsigned short)) + 3 * sizeof(unsigned int);
2170 send_data(sock, msg, size);
2171 pthread_cond_wait(&(ndata->threadcond), &(ndata->threadnotify));
2172 pthread_mutex_unlock(&(ndata->threadnotify));
2175 pthread_cond_destroy(&threadcond);
2176 pthread_mutex_destroy(&threadnotify);
2182 void threadNotify(unsigned int oid, unsigned short version, unsigned int tid) {
2183 notifydata_t *ndata;
2184 int i, objIsFound = 0, index;
2187 //Look up the tid and call the corresponding pthread_cond_signal
2188 if((ndata = notifyhashSearch(tid)) == NULL) {
2189 printf("threadnotify(): No such threadid is present %s, %d\n", __FILE__, __LINE__);
2192 for(i = 0; i < ndata->numoid; i++) {
2193 if(ndata->oidarry[i] == oid) {
2198 if(objIsFound == 0) {
2199 printf("threadNotify(): Oid not found %s, %d\n", __FILE__, __LINE__);
2202 if(version <= ndata->versionarry[index]) {
2203 printf("threadNotify(): New version %d has not changed since last version for oid = %d, %s, %d\n", version, oid, __FILE__, __LINE__);
2207 /* Clear from prefetch cache and free thread related data structure */
2208 if((ptr = prehashSearch(oid)) != NULL) {
2212 pthread_mutex_lock(&(ndata->threadnotify));
2213 pthread_cond_signal(&(ndata->threadcond));
2214 pthread_mutex_unlock(&(ndata->threadnotify));
2221 int notifyAll(threadlist_t **head, unsigned int oid, unsigned int version) {
2224 struct sockaddr_in remoteAddr;
2225 char msg[1 + sizeof(unsigned short) + 2*sizeof(unsigned int)];
2226 int sock, status, size;
2229 while(*head != NULL) {
2232 //create a socket connection to that machine
2233 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2234 perror("notifyAll():socket()");
2238 bzero(&remoteAddr, sizeof(remoteAddr));
2239 remoteAddr.sin_family = AF_INET;
2240 remoteAddr.sin_port = htons(LISTEN_PORT);
2241 remoteAddr.sin_addr.s_addr = htonl(mid);
2242 //send Thread Notify response and threadid to that machine
2243 if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
2244 printf("notifyAll():error %d connecting to %s:%d\n", errno,
2245 inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
2249 bzero(msg, (1+sizeof(unsigned short) + 2*sizeof(unsigned int)));
2250 msg[0] = THREAD_NOTIFY_RESPONSE;
2251 *((unsigned int *)&msg[1]) = oid;
2252 size = sizeof(unsigned int);
2253 *((unsigned short *)(&msg[1]+ size)) = version;
2254 size+= sizeof(unsigned short);
2255 *((unsigned int *)(&msg[1]+ size)) = ptr->threadid;
2257 size = 1 + 2*sizeof(unsigned int) + sizeof(unsigned short);
2258 send_data(sock, msg, size);
2271 removetransactionhash();
2273 objstrDelete(t_cache);
2278 /* This function inserts necessary information into
2279 * a machine pile data structure */
2280 plistnode_t *pInsert(plistnode_t *pile, objheader_t *headeraddr, unsigned int mid, int num_objs) {
2281 plistnode_t *ptr, *tmp;
2282 int found = 0, offset = 0;
2285 //Add oid into a machine that is already present in the pile linked list structure
2286 while(tmp != NULL) {
2287 if (tmp->mid == mid) {
2290 if (STATUS(headeraddr) & NEW) {
2291 tmp->oidcreated[tmp->numcreated] = OID(headeraddr);
2293 GETSIZE(tmpsize, headeraddr);
2294 tmp->sum_bytes += sizeof(objheader_t) + tmpsize;
2295 } else if (STATUS(headeraddr) & DIRTY) {
2296 tmp->oidmod[tmp->nummod] = OID(headeraddr);
2298 GETSIZE(tmpsize, headeraddr);
2299 tmp->sum_bytes += sizeof(objheader_t) + tmpsize;
2301 offset = (sizeof(unsigned int) + sizeof(short)) * tmp->numread;
2302 *((unsigned int *)(((char *)tmp->objread) + offset))=OID(headeraddr);
2303 offset += sizeof(unsigned int);
2304 *((short *)(((char *)tmp->objread) + offset)) = headeraddr->version;
2312 //Add oid for any new machine
2315 if((ptr = pCreate(num_objs)) == NULL) {
2319 if (STATUS(headeraddr) & NEW) {
2320 ptr->oidcreated[ptr->numcreated] = OID(headeraddr);
2322 GETSIZE(tmpsize, headeraddr);
2323 ptr->sum_bytes += sizeof(objheader_t) + tmpsize;
2324 } else if (STATUS(headeraddr) & DIRTY) {
2325 ptr->oidmod[ptr->nummod] = OID(headeraddr);
2327 GETSIZE(tmpsize, headeraddr);
2328 ptr->sum_bytes += sizeof(objheader_t) + tmpsize;
2330 *((unsigned int *)ptr->objread)=OID(headeraddr);
2331 offset = sizeof(unsigned int);
2332 *((short *)(((char *)ptr->objread) + offset)) = headeraddr->version;
2340 STATUS(headeraddr) =0;
2346 // relocate the position of myIp pile to end of list
2347 plistnode_t *sortPiles(plistnode_t *pileptr) {
2348 plistnode_t *ptr, *tail;
2351 /* Get tail pointer and myIp pile ptr */
2355 while(tail->next != NULL) {
2356 if(tail->mid == myIpAddr)
2361 // if ptr is null, then myIp pile is already at tail
2363 /* Arrange local machine processing at the end of the pile list */
2364 tail->next = pileptr;
2365 pileptr = ptr->next;
2370 /* get too this point iff myIpAddr pile is at tail */