3 #include "machinepile.h"
8 #include "threadnotify.h"
10 #include "addUdpEnhance.h"
11 #include "addPrefetchEnhance.h"
19 #include "abortreaders.h"
26 #include <sys/select.h>
31 #define CONFIG_FILENAME "dstm.conf"
33 /* Thread transaction variables */
35 __thread objstr_t *t_cache;
36 __thread struct ___Object___ *revertlist;
39 __thread jmp_buf aborttrans;
42 /* Global Variables */
43 extern int classsize[];
44 pfcstats_t *evalPrefetch;
45 extern int numprefetchsites; //Global variable containing number of prefetch sites
46 extern pthread_mutex_t mainobjstore_mutex; // Mutex to lock main Object store
47 pthread_mutex_t prefetchcache_mutex; // Mutex to lock Prefetch Cache
48 pthread_mutexattr_t prefetchcache_mutex_attr; /* Attribute for lock to make it a recursive lock */
49 extern prehashtable_t pflookup; //Global Prefetch cache's lookup table
50 pthread_t wthreads[NUM_THREADS]; //Worker threads for working on the prefetch queue
51 pthread_t tPrefetch; /* Primary Prefetch thread that processes the prefetch queue */
52 extern objstr_t *mainobjstore;
53 unsigned int myIpAddr;
54 unsigned int *hostIpAddrs;
57 int myIndexInHostArray;
58 unsigned int oidsPerBlock;
62 sockPoolHashTable_t *transReadSockPool;
63 sockPoolHashTable_t *transPrefetchSockPool;
64 sockPoolHashTable_t *transRequestSockPool;
65 pthread_mutex_t notifymutex;
66 pthread_mutex_t atomicObjLock;
68 /***********************************
69 * Global Variables for statistics
70 **********************************/
71 int numTransCommit = 0;
72 int numTransAbort = 0;
75 int nprehashSearch = 0;
81 int sendRemoteReq = 0;
85 /***********************************
86 * Global variables for Duplication
87 ***********************************/
89 int numLiveHostsInSystem;
90 unsigned int *locateObjHosts;
93 /* variables to clear dead threads */
95 unsigned int waitThreadID;
98 unsigned int transIDMin;
99 unsigned int transIDMax;
101 char ip[16]; // for debugging purpose
103 /******************************
104 * Global variables for Paxos
105 ******************************/
111 unsigned int origleader;
112 unsigned int temp_v_a;
116 void printhex(unsigned char *, int);
117 plistnode_t *createPiles();
118 plistnode_t *sortPiles(plistnode_t *pileptr);
121 char bigarray[16*1024*1024];
123 #define LOGEVENT(x) { \
124 int tmp=bigindex++; \
131 /*******************************
132 * Send and Recv function calls
133 *******************************/
134 int send_data(int fd, void *buf, int buflen) {
135 char *buffer = (char *)(buf);
143 numbytes = send(fd, buffer, size, 0);
146 bytesSent += numbytes;
150 else if( numbytes < 0) {
151 // Receive returned an error.
152 // Analyze underlying cause
154 printf("%s -> fd : %d errno = %d %s\n",__func__, fd, errno,strerror(errno));
157 if(errno == ECONNRESET || errno == EAGAIN || errno == EWOULDBLOCK) {
158 // machine has failed
160 // if we see EAGAIN w/o failures, we should record the time
161 // when we start send and finish send see if it is longer
162 // than our threshold
165 printf("%s -> EAGAIN : %s\n",__func__,(errno == EAGAIN)?"TRUE":"FALSE");
175 printf("%s -> Unexpected ERROR!\n",__func__);
181 // Case : numbytes == 0
182 // // machine has failed -- this case probably doesn't occur in reality
184 printf("%s -> SHOULD NOT BE HERE\n",__func__);
194 } // close while loop
196 printf("%s-> Exiting\n", __func__);
198 return 0; // completed sending data
201 //Returns negative value if receive cannot be completed because of
202 //timeout or machine failure
204 int recv_data(int fd, void *buf, int buflen) {
205 char *buffer = (char *)(buf);
214 numbytes = recv(fd, buffer, size, 0);
221 else if (numbytes<0){
222 //Receive returned an error.
223 //Analyze underlying cause
225 printf("%s-> fd : %d errno = %d %s\n", __func__, fd, errno, strerror(errno));
227 if(errno == ECONNRESET || errno == EAGAIN || errno == EWOULDBLOCK) {
229 //if we see EAGAIN w/o failures, we should record the time
230 //when we start read and finish read and see if it is longer
233 printf("%s -> EAGAIN : %s\n",__func__,(errno == EAGAIN)?"TRUE":"FALSE");
235 if(errno == EAGAIN) {
238 printf("%s -> TRYcounter increases\n",__func__);
254 printf("%s -> Unexpected ERROR!\n",__func__);
255 printf("%s-> errno = %d %s\n", __func__, errno, strerror(errno));
261 //machine has failed -- this case probably doesn't occur in reality
264 printf("%s -> SHOULD NOT BE HERE\n",__func__);
269 if( numbytes == -1) {
276 printf("%s -> fd = %d Exiting\n",__func__,fd);
278 return 0; // got all the data
281 int recv_data_errorcode(int fd, void *buf, int buflen) {
283 printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
285 char *buffer = (char *)(buf);
289 numbytes = recv(fd, buffer, size, 0);
291 printf("%s-> numbytes: %d\n", __func__, numbytes);
295 else if (numbytes == -1) {
296 printf("%s -> ERROR NUMBER = %d %s\n",__func__,errno,strerror(errno));
297 perror("recv_data_errorcode");
304 printf("%s-> Exiting\n", __func__);
309 void printhex(unsigned char *ptr, int numBytes) {
311 for (i = 0; i < numBytes; i++) {
313 printf("0%x ", ptr[i]);
315 printf("%x ", ptr[i]);
321 inline int arrayLength(int *array) {
323 for(i=0 ; array[i] != -1; i++)
328 inline int findmax(int *array, int arraylength) {
331 for(i = 0; i < arraylength; i++) {
340 char* midtoIPString(unsigned int mid){
345 /* This function is a prefetch call generated by the compiler that
346 * populates the shared primary prefetch queue*/
347 void prefetch(int siteid, int ntuples, unsigned int *oids, unsigned short *endoffsets, short *arrayfields) {
348 /* Allocate for the queue node*/
349 int qnodesize = 2*sizeof(int) + ntuples * (sizeof(unsigned short) + sizeof(unsigned int)) + endoffsets[ntuples - 1] * sizeof(short);
351 char * node= getmemory(qnodesize);
352 int top=endoffsets[ntuples-1];
356 /* Set queue node values */
358 /* TODO: Remove this after testing */
359 evalPrefetch[siteid].callcount++;
361 *((int *)(node))=siteid;
362 *((int *)(node + sizeof(int))) = ntuples;
364 memcpy(node+len, oids, ntuples*sizeof(unsigned int));
365 memcpy(node+len+ntuples*sizeof(unsigned int), endoffsets, ntuples*sizeof(unsigned short));
366 memcpy(node+len+ntuples*(sizeof(unsigned int)+sizeof(short)), arrayfields, top*sizeof(short));
368 /* Lock and insert into primary prefetch queue */
372 /* This function starts up the transaction runtime. */
373 int dstmStartup(const char * option) {
374 pthread_t thread_Listen, udp_thread_Listen;
376 int master=option!=NULL && strcmp(option, "master")==0;
380 if (processConfigFile() != 0)
381 return 0; //TODO: return error value, cause main program to exit
388 printf("Trans stats is on\n");
395 //Initialize socket pool
396 transReadSockPool = createSockPool(transReadSockPool, DEFAULTSOCKPOOLSIZE);
397 transPrefetchSockPool = createSockPool(transPrefetchSockPool, DEFAULTSOCKPOOLSIZE);
398 transRequestSockPool = createSockPool(transRequestSockPool, DEFAULTSOCKPOOLSIZE);
404 pthread_attr_init(&attr);
405 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
408 pthread_create(&udp_thread_Listen, &attr, udpListenBroadcast, (void*)udpfd);
411 pthread_create(&thread_Listen, &attr, dstmListen, (void*)fd);
415 updateLiveHostsCommit();
418 if(!allHostsLive()) {
419 printf("Not all hosts live. Exiting.\n");
425 dstmListen((void *)fd);
430 //TODO Use this later
431 void *pCacheAlloc(objstr_t *store, unsigned int size) {
437 while(ptr->next != NULL) {
438 /* check if store is empty */
439 if(((unsigned int)ptr->top - (unsigned int)ptr - sizeof(objstr_t) + size) <= ptr->size) {
454 /* This function initiates the prefetch thread A queue is shared
455 * between the main thread of execution and the prefetch thread to
456 * process the prefetch call Call from compiler populates the shared
457 * queue with prefetch requests while prefetch thread processes the
458 * prefetch requests */
461 //Create and initialize prefetch cache structure
464 if((evalPrefetch = initPrefetchStats()) == NULL) {
465 printf("%s() Error allocating memory at %s, %d\n", __func__, __FILE__, __LINE__);
470 /* Initialize attributes for mutex */
471 pthread_mutexattr_init(&prefetchcache_mutex_attr);
472 pthread_mutexattr_settype(&prefetchcache_mutex_attr, PTHREAD_MUTEX_RECURSIVE_NP);
474 pthread_mutex_init(&prefetchcache_mutex, &prefetchcache_mutex_attr);
475 pthread_mutex_init(¬ifymutex, NULL);
476 pthread_mutex_init(&atomicObjLock, NULL);
478 //Create prefetch cache lookup table
479 if(prehashCreate(PHASH_SIZE, PLOADFACTOR)) {
484 //Initialize primary shared queue
486 //Initialize machine pile w/prefetch oids and offsets shared queue
489 //Create the primary prefetch thread
493 retval=pthread_create(&tPrefetch, NULL, transPrefetchNew, NULL);
497 retval=pthread_create(&tPrefetch, NULL, transPrefetch, NULL);
500 pthread_detach(tPrefetch);
504 /* This function stops the threads spawned */
508 pthread_cancel(tPrefetch);
509 for(t = 0; t < NUM_THREADS; t++)
510 pthread_cancel(wthreads[t]);
516 /* This functions inserts randowm wait delays in the order of msec
517 * Mostly used when transaction commits retry*/
524 req.tv_nsec = (long)(1000 + (t%10000)); //1-11 microsec
525 nanosleep(&req, NULL);
529 /* This function initializes things required in the transaction start*/
531 t_cache = objstrCreate(1048576);
532 t_chashCreate(CHASH_SIZE, CLOADFACTOR);
539 /*#define INLINE inline __attribute__((always_inline))
541 INLINE void * chashSearchI(chashtable_t *table, unsigned int key) {
542 //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE
543 chashlistnode_t *node = &table->table[(key & table->mask)>>1];
546 if(node->key == key) {
550 } while(node != NULL);
558 /* This function finds the location of the objects involved in a transaction
559 * and returns the pointer to the object if found in a remote location */
560 __attribute__((pure)) objheader_t *transRead(unsigned int oid) {
561 unsigned int machinenumber;
562 objheader_t *tmp, *objheader;
563 objheader_t *objcopy;
566 chashlistnode_t *node;
572 node= &c_table[(oid & c_mask)>>1];
574 if(node->key == oid) {
579 return &((objheader_t*)node->val)[1];
585 } while(node != NULL);
589 if((objheader = chashSearchI(record->lookupTable, oid)) != NULL) {
594 return &objheader[1];
603 //abort this transaction
604 //printf("ABORTING\n");
605 removetransactionhash();
606 objstrDelete(t_cache);
608 _longjmp(aborttrans,1);
613 if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
617 /* Look up in machine lookup table and copy into cache*/
618 GETSIZE(size, objheader);
619 size += sizeof(objheader_t);
620 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
621 memcpy(objcopy, objheader, size);
622 /* Insert into cache's lookup table */
624 t_chashInsert(OID(objheader), objcopy);
632 if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) {
636 /* Look up in prefetch cache */
638 size+=sizeof(objheader_t);
639 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
640 memcpy(objcopy, tmp, size);
641 /* Insert into cache's lookup table */
642 t_chashInsert(OID(tmp), objcopy);
650 /* Get the object from the remote location */
651 if((machinenumber = lhashSearch(oid)) == 0) {
652 printf("Error: %s() No machine found for oid =% %s,%dx\n",__func__, machinenumber, __FILE__, __LINE__);
655 objcopy = getRemoteObj(machinenumber, oid);
657 if(objcopy == NULL) {
658 printf("Error: Object not found in Remote location %s, %d\n", __FILE__, __LINE__);
674 /* This function finds the location of the objects involved in a transaction
675 * and returns the pointer to the object if found in a remote location */
676 __attribute__((pure)) objheader_t *transRead2(unsigned int oid) {
677 unsigned int machinenumber;
678 objheader_t *tmp, *objheader;
679 objheader_t *objcopy;
683 printf("%s-> Start, oid:%u\n", __func__, oid);
688 //abort this transaction
689 //printf("ABORTING\n");
690 removetransactionhash();
691 objstrDelete(t_cache);
693 _longjmp(aborttrans,1);
698 if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
700 printf("%s-> Grab from this machine\n", __func__);
705 /* Look up in machine lookup table and copy into cache*/
706 GETSIZE(size, objheader);
707 size += sizeof(objheader_t);
708 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
709 memcpy(objcopy, objheader, size);
710 /* Insert into cache's lookup table */
712 t_chashInsert(OID(objheader), objcopy);
714 printf("%s -> obj type = %d\n",__func__,getObjType(oid));
715 printf("%s -> obj grabbed\n",__func__);
724 if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) {
729 /* Look up in prefetch cache */
731 size+=sizeof(objheader_t);
732 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
733 memcpy(objcopy, tmp, size);
734 /* Insert into cache's lookup table */
735 t_chashInsert(OID(tmp), objcopy);
743 /* Get the object from the remote location */
744 if((machinenumber = lhashSearch(oid)) == 0) {
745 printf("Error: %s() No machine found for oid =% %s,%dx\n",__func__, machinenumber, __FILE__, __LINE__);
749 printf("%s-> Grab from remote machine\n", __func__);
754 unsigned int machinenumber;
755 static int flipBit = 0; // Used to distribute requests between primary and backup evenly
756 // either primary or backup machine
757 machinenumber = (flipBit)?getPrimaryMachine(lhashSearch(oid)):getBackupMachine(lhashSearch(oid));
761 printf("mindex:%d, oid:%d, machinenumber:%s\n", mindex, oid, midtoIPString(machinenumber));
765 objcopy = getRemoteObj(machinenumber, oid);
769 restoreDuplicationState(machinenumber);
771 printf("%s -> Recall transRead2\n",__func__);
773 return transRead2(oid);
777 if(objcopy == NULL) {
778 printf("Error: Object not found in Remote location %s, %d\n", __FILE__, __LINE__);
793 printf("%s -> Finished!!\n",__func__);
798 /* This function creates objects in the transaction record */
799 objheader_t *transCreateObj(unsigned int size) {
800 objheader_t *tmp = (objheader_t *) objstrAlloc(&t_cache, (sizeof(objheader_t) + size));
801 OID(tmp) = getNewOID();
806 t_chashInsert(OID(tmp), tmp);
809 return &tmp[1]; //want space after object header
817 /* This function creates machine piles based on all machines involved in a
818 * transaction commit request */
819 plistnode_t *createPiles() {
822 printf("%s -> Entering\n",__func__);
826 plistnode_t *pile = NULL;
827 unsigned int machinenum;
828 unsigned int destMachine[2];
829 objheader_t *headeraddr;
830 chashlistnode_t * ptr = c_table;
831 /* Represents number of bins in the chash table */
832 unsigned int size = c_size;
834 for(i = 0; i < size ; i++) {
835 chashlistnode_t * curr = &ptr[i];
836 /* Inner loop to traverse the linked list of the cache lookupTable */
837 while(curr != NULL) {
838 //if the first bin in hash table is empty
841 headeraddr=(objheader_t *) curr->val;
844 oid = OID(headeraddr);
849 mid = lhashSearch(oid);
851 // if the obj is dirty or new
852 if(STATUS(headeraddr) & DIRTY || STATUS(headeraddr) & NEW) {
853 // set flag for backup machine
857 // if the obj is new or local, destination will be my Ip
858 if((mid = lhashSearch(oid)) == 0) {
862 pile = pInsert(pile, headeraddr, getPrimaryMachine(mid), c_numelements);
865 STATUS(headeraddr) = DIRTY;
868 pile = pInsert(pile, headeraddr, getBackupMachine(mid), c_numelements);
870 // Get machine location for object id (and whether local or not)
871 if (STATUS(headeraddr) & NEW || (mhashSearch(curr->key) != NULL)) {
872 machinenum = myIpAddr;
873 } else if ((machinenum = lhashSearch(curr->key)) == 0) {
874 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
877 //Make machine groups
878 pile = pInsert(pile, headeraddr, machinenum, c_numelements);
886 /* This function creates machine piles based on all machines involved in a
887 * transaction commit request */
888 plistnode_t *createPiles() {
890 plistnode_t *pile = NULL;
891 unsigned int machinenum;
892 unsigned int destMachine[2];
893 objheader_t *headeraddr;
894 struct chashentry * ptr = c_table;
895 /* Represents number of bins in the chash table */
896 unsigned int size = c_size;
898 for(i = 0; i < size ; i++) {
899 struct chashentry * curr = & ptr[i];
900 /* Inner loop to traverse the linked list of the cache lookupTable */
901 // if the first bin in hash table is empty
904 headeraddr=(objheader_t *) curr->ptr;
906 //Get machine location for object id (and whether local or not)
907 if (STATUS(headeraddr) & NEW || (mhashSearch(curr->key) != NULL)) {
908 machinenum = myIpAddr;
909 } else if ((machinenum = lhashSearch(curr->key)) == 0) {
910 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
914 //Make machine groups
915 pile = pInsert(pile, headeraddr, machinenum, c_numelements);
921 /* This function initiates the transaction commit process
922 * Spawns threads for each of the new connections with Participants
923 * and creates new piles by calling the createPiles(),
924 * Sends a transrequest() to each remote machines for objects found remotely
925 * and calls handleLocalReq() to process objects found locally */
927 unsigned int tot_bytes_mod, *listmid;
928 plistnode_t *pile, *pile_ptr;
929 char treplyretry; /* keeps track of the common response that needs to be sent */
931 trans_commit_data_t transinfo; /* keeps track of objs locked during transaction */
937 unsigned int transID = getNewTransID();
941 printf("%s -> Starts transCommit\n",__func__);
946 //abort this transaction
948 * printf("ABORTING TRANSACTION AT COMMIT\n");
950 removetransactionhash();
951 objstrDelete(t_cache);
954 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
963 /* Look through all the objects in the transaction record and make piles
964 * for each machine involved in the transaction*/
966 pile_ptr = pile = createPiles();
967 pile_ptr = pile = sortPiles(pile);
972 /* Create the packet to be sent in TRANS_REQUEST */
974 /* Count the number of participants */
976 pilecount = pCount(pile);
978 /* Create a list of machine ids(Participants) involved in transaction */
979 listmid = calloc(pilecount, sizeof(unsigned int));
980 pListMid(pile, listmid);
982 /* Create a socket and getReplyCtrl array, initialize */
983 int socklist[pilecount];
985 for(loopcount = 0 ; loopcount < pilecount; loopcount++)
986 socklist[loopcount] = 0;
987 char getReplyCtrl[pilecount];
988 for(loopcount = 0 ; loopcount < pilecount; loopcount++)
989 getReplyCtrl[loopcount] = 0;
991 /* Process each machine pile */
993 int localReqsock = -1;
994 trans_req_data_t *tosend;
995 tosend = calloc(pilecount, sizeof(trans_req_data_t));
996 while(pile != NULL) {
998 printf("%s-> New pile:[%s],", __func__, midtoIPString(pile->mid));
999 printf(" myIp:[%s]\n", midtoIPString(myIpAddr));
1001 tosend[sockindex].f.control = TRANS_REQUEST;
1002 tosend[sockindex].f.mcount = pilecount;
1003 tosend[sockindex].f.numread = pile->numread;
1004 tosend[sockindex].f.nummod = pile->nummod;
1005 tosend[sockindex].f.numcreated = pile->numcreated;
1006 tosend[sockindex].f.sum_bytes = pile->sum_bytes;
1007 tosend[sockindex].listmid = listmid;
1008 tosend[sockindex].objread = pile->objread;
1009 tosend[sockindex].oidmod = pile->oidmod;
1010 tosend[sockindex].oidcreated = pile->oidcreated;
1014 if(pile->mid != myIpAddr) {
1015 if((sd = getSockWithLock(transRequestSockPool, pile->mid)) < 0) {
1016 printf("\ntransRequest(): socket create error\n");
1020 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1024 socklist[sockindex] = sd;
1025 /* Send bytes of data with TRANS_REQUEST control message */
1026 send_data(sd, &(tosend[sockindex].f), sizeof(fixed_data_t));
1028 /* Send list of machines involved in the transaction */
1030 int size=sizeof(unsigned int)*(tosend[sockindex].f.mcount);
1031 send_data(sd, tosend[sockindex].listmid, size);
1034 /* Send oids and version number tuples for objects that are read */
1036 int size=(sizeof(unsigned int)+sizeof(unsigned short))*(tosend[sockindex].f.numread);
1037 send_data(sd, tosend[sockindex].objread, size);
1040 /* Send objects that are modified */
1042 if((modptr = calloc(1, tosend[sockindex].f.sum_bytes)) == NULL) {
1043 printf("Calloc error for modified objects %s, %d\n", __FILE__, __LINE__);
1050 for(i = 0; i < tosend[sockindex].f.nummod ; i++) {
1052 objheader_t *headeraddr;
1053 if((headeraddr = t_chashSearch(tosend[sockindex].oidmod[i])) == NULL) {
1054 printf("%s() Error: No such oid %s, %d\n", __func__, __FILE__, __LINE__);
1060 GETSIZE(size,headeraddr);
1061 size+=sizeof(objheader_t);
1062 memcpy(modptr+offset, headeraddr, size);
1065 send_data(sd, modptr, tosend[sockindex].f.sum_bytes);
1068 /* send transaction id, number of machine involved, machine ids */
1069 send_data(sd, &transID, sizeof(unsigned int));
1072 } else { //handle request locally
1073 handleLocalReq(&tosend[sockindex], &transinfo, &getReplyCtrl[sockindex]);
1077 } //end of pile processing
1079 /* Recv Ctrl msgs from all machines */
1081 printf("%s-> Finished sending transaction read/mod objects\n",__func__);
1085 for(i = 0; i < pilecount; i++) {
1086 int sd = socklist[i];
1089 int timeout; // a variable to check if the connection is still alive. if it is -1, then need to transcommit again
1090 timeout = recv_data(sd, &control, sizeof(char));
1091 //Update common data structure with new ctrl msg
1092 getReplyCtrl[i] = control;
1093 /* Recv Objects if participant sends TRANS_DISAGREE */
1095 if(control == TRANS_DISAGREE) {
1097 recv_data(sd, &length, sizeof(int));
1099 pthread_mutex_lock(&prefetchcache_mutex);
1100 if ((newAddr = prefetchobjstrAlloc((unsigned int)length)) == NULL) {
1101 printf("Error: %s() objstrAlloc error for copying into prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1104 pthread_mutex_unlock(&prefetchcache_mutex);
1107 pthread_mutex_unlock(&prefetchcache_mutex);
1108 recv_data(sd, newAddr, length);
1110 while(length != 0) {
1111 unsigned int oidToPrefetch;
1112 objheader_t * header;
1113 header = (objheader_t *)(((char *)newAddr) + offset);
1114 oidToPrefetch = OID(header);
1117 GETSIZE(size, header);
1118 size += sizeof(objheader_t);
1119 //make an entry in prefetch hash table
1121 if((oldptr = prehashSearch(oidToPrefetch)) != NULL) {
1122 prehashRemove(oidToPrefetch);
1123 prehashInsert(oidToPrefetch, header);
1125 prehashInsert(oidToPrefetch, header);
1127 length = length - size;
1130 } //end of receiving objs
1135 deadmid = listmid[i];
1138 printf("%s -> Dead Machine ID : %s\n",__func__,midtoIPString(deadmid));
1139 printf("%s -> Dead SD : %d\n",__func__,sd);
1141 getReplyCtrl[i] = TRANS_DISAGREE;
1148 printf("%s-> Decide final response now\n", __func__);
1150 /* Decide the final response */
1151 if((finalResponse = decideResponse(getReplyCtrl, &treplyretry, pilecount)) == 0) {
1152 printf("Error: %s() in updating prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1158 printf("%s-> Final Response: %d\n", __func__, (int)finalResponse);
1161 /* Send responses to all machines */
1162 for(i = 0; i < pilecount; i++) {
1163 int sd = socklist[i];
1169 if(finalResponse == TRANS_COMMIT) {
1171 /* Update prefetch cache */
1172 if((retval = updatePrefetchCache(&(tosend[i]))) != 0) {
1173 printf("Error: %s() in updating prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1179 /* Invalidate objects in other machine cache */
1180 if(tosend[i].f.nummod > 0) {
1181 if((retval = invalidateObj(&(tosend[i]))) != 0) {
1182 printf("Error: %s() in invalidating Objects %s, %d\n", __func__, __FILE__, __LINE__);
1189 removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
1190 removethisreadtransaction(tosend[i].objread, tosend[i].f.numread);
1194 else if (!treplyretry) {
1195 removethistransaction(tosend[i].oidmod,tosend[i].f.nummod);
1196 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1200 send_data(sd,&finalResponse,sizeof(char));
1202 printf("%s -> Decision Sent to %s\n",__func__,midtoIPString(listmid[i]));
1206 /* Complete local processing */
1208 thashInsert(transID,finalResponse);
1210 doLocalProcess(finalResponse, &(tosend[i]), &transinfo);
1213 if(finalResponse == TRANS_COMMIT) {
1214 removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
1215 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1216 } else if (!treplyretry) {
1217 removethistransaction(tosend[i].oidmod,tosend[i].f.nummod);
1218 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1228 for(i = 0; i< pilecount; i++) {
1229 if(socklist[i] > 0) {
1230 freeSockWithLock(transRequestSockPool,listmid[i], socklist[i]);
1234 /* Free resources */
1239 /* wait a random amount of time before retrying to commit transaction*/
1248 } while (treplyretry && deadmid != -1);
1250 if(finalResponse == TRANS_ABORT) {
1254 /* Free Resources */
1255 objstrDelete(t_cache);
1258 if(deadmid != -1) { /* if deadmid is greater than or equal to 0,
1259 then there is dead machine. */
1261 printf("%s -> Dead machine Detected : %s\n",__func__,midtoIPString(deadmid));
1263 restoreDuplicationState(deadmid);
1267 } else if(finalResponse == TRANS_COMMIT) {
1271 /* Free Resources */
1272 objstrDelete(t_cache);
1276 //TODO Add other cases
1277 printf("Error: in %s() THIS SHOULD NOT HAPPEN.....EXIT PROGRAM\n", __func__);
1281 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1286 /* This function handles the local objects involved in a transaction
1287 * commiting process. It also makes a decision if this local machine
1288 * sends AGREE or DISAGREE or SOFT_ABORT to coordinator */
1289 void handleLocalReq(trans_req_data_t *tdata, trans_commit_data_t *transinfo, char *getReplyCtrl) {
1290 unsigned int *oidnotfound = NULL, *oidlocked = NULL;
1291 int numoidnotfound = 0, numoidlocked = 0;
1292 int v_nomatch = 0, v_matchlock = 0, v_matchnolock = 0;
1295 unsigned short version;
1297 /* Counters and arrays to formulate decision on control message to be sent */
1298 oidnotfound = (unsigned int *) calloc((tdata->f.numread + tdata->f.nummod), sizeof(unsigned int));
1299 oidlocked = (unsigned int *) calloc((tdata->f.numread + tdata->f.nummod +1), sizeof(unsigned int)); // calloc additional 1 byte for
1300 //setting a divider between read and write locks
1301 numread = tdata->f.numread;
1302 /* Process each oid in the machine pile/ group per thread */
1303 for (i = 0; i < tdata->f.numread + tdata->f.nummod; i++) {
1304 if (i < tdata->f.numread) {
1305 int incr = sizeof(unsigned int) + sizeof(unsigned short); // Offset that points to next position in the objread array
1307 oid = *((unsigned int *)(((char *)tdata->objread) + incr));
1308 version = *((unsigned short *)(((char *)tdata->objread) + incr + sizeof(unsigned int)));
1309 commitCountForObjRead(getReplyCtrl, oidnotfound, oidlocked, &numoidnotfound, &numoidlocked, &v_nomatch, &v_matchlock, &v_matchnolock, oid, version);
1310 } else { // Objects Modified
1311 if(i == tdata->f.numread) {
1312 oidlocked[numoidlocked++] = -1;
1315 objheader_t *headptr;
1316 headptr = (objheader_t *) t_chashSearch(tdata->oidmod[i-numread]);
1317 if (headptr == NULL) {
1318 printf("Error: handleLocalReq() returning NULL, no such oid %s, %d\n", __FILE__, __LINE__);
1322 version = headptr->version;
1323 commitCountForObjMod(getReplyCtrl, oidnotfound, oidlocked, &numoidnotfound, &numoidlocked, &v_nomatch, &v_matchlock, &v_matchnolock, oid, version);
1327 /* Fill out the trans_commit_data_t data structure. This is required for a trans commit process
1328 * if Participant receives a TRANS_COMMIT */
1329 transinfo->objlocked = oidlocked;
1330 transinfo->objnotfound = oidnotfound;
1331 transinfo->modptr = NULL;
1332 transinfo->numlocked = numoidlocked;
1333 transinfo->numnotfound = numoidnotfound;
1335 /* Condition to send TRANS_AGREE */
1336 if(v_matchnolock == tdata->f.numread + tdata->f.nummod) {
1337 *getReplyCtrl = TRANS_AGREE;
1339 /* Condition to send TRANS_SOFT_ABORT */
1340 if((v_matchlock > 0 && v_nomatch == 0) || (numoidnotfound > 0 && v_nomatch == 0)) {
1341 *getReplyCtrl = TRANS_SOFT_ABORT;
1345 void doLocalProcess(char finalResponse, trans_req_data_t *tdata, trans_commit_data_t *transinfo) {
1347 if(finalResponse == TRANS_ABORT) {
1348 if(transAbortProcess(transinfo) != 0) {
1349 printf("Error in transAbortProcess() %s,%d\n", __FILE__, __LINE__);
1353 } else if(finalResponse == TRANS_COMMIT) {
1355 /* Invalidate objects in other machine cache */
1356 if(tdata->f.nummod > 0) {
1358 if((retval = invalidateObj(tdata)) != 0) {
1359 printf("Error: %s() in invalidating Objects %s, %d\n", __func__, __FILE__, __LINE__);
1364 if(transComProcess(tdata, transinfo) != 0) {
1365 printf("Error in transComProcess() %s,%d\n", __FILE__, __LINE__);
1370 printf("ERROR...No Decision\n");
1374 if (transinfo->objlocked != NULL) {
1375 free(transinfo->objlocked);
1377 if (transinfo->objnotfound != NULL) {
1378 free(transinfo->objnotfound);
1382 /* This function decides the reponse that needs to be sent to
1383 * all Participant machines after the TRANS_REQUEST protocol */
1384 char decideResponse(char *getReplyCtrl, char *treplyretry, int pilecount) {
1385 int i, transagree = 0, transdisagree = 0, transsoftabort = 0; /* Counters to formulate decision of what
1387 for (i = 0 ; i < pilecount; i++) {
1389 control = getReplyCtrl[i];
1393 printf("%s-> Participant sent unknown message, i:%d, Control: %d\n", __func__, i, (int)control);
1396 /* treat as disagree, pass thru */
1397 case TRANS_DISAGREE:
1400 printf("%s-> Participant sent TRANS_DISAGREE, i:%d, Control: %d\n", __func__, i, (int)control);
1407 printf("%s-> Participant sent TRANS_AGREE, i:%d, Control: %d\n", __func__, i, (int)control);
1411 case TRANS_SOFT_ABORT:
1414 printf("%s-> Participant sent TRANS_SOFT_ABORT, i:%d, Control: %d\n", __func__, i, (int)control);
1420 if(transdisagree > 0) {
1425 /* clear objects from prefetch cache */
1428 } else if(transagree == pilecount) {
1431 return TRANS_COMMIT;
1433 /* Send Abort in soft abort case followed by retry commiting transaction again*/
1440 /* This function opens a connection, places an object read request to
1441 * the remote machine, reads the control message and object if
1442 * available and copies the object and its header to the local
1445 void *getRemoteObj(unsigned int mnum, unsigned int oid) {
1447 printf("%s -> entering\n",__func__);
1450 struct sockaddr_in serv_addr;
1453 void *objcopy = NULL;
1455 int sd = getSock2(transRequestSockPool, mnum);
1456 char readrequest[sizeof(char)+sizeof(unsigned int)];
1457 readrequest[0] = READ_REQUEST;
1458 *((unsigned int *)(&readrequest[1])) = oid;
1459 send_data(sd, readrequest, sizeof(readrequest));
1461 /* Read response from the Participant */
1462 if(recv_data(sd, &control, sizeof(char)) < 0) {
1467 if (control==OBJECT_NOT_FOUND) {
1469 } else if(control==OBJECT_FOUND) {
1471 /* Read object if found into local cache */
1473 if(recv_data(sd, &size, sizeof(int)) < 0) {
1478 objcopy = objstrAlloc(&t_cache, size);
1480 if(recv_data(sd, objcopy, size) < 0) {
1486 /* Insert into cache's lookup table */
1487 t_chashInsert(oid, objcopy);
1489 totalObjSize += size;
1496 /* ask machines if they received decision */
1497 char receiveDecisionFromBackup(unsigned int transID,int nummid,unsigned int *listmid)
1500 printf("%s -> Entering\n",__func__);
1503 int sd; // socket id
1507 for(i = 0; i < nummid; i++) {
1508 if((sd = getSock(transPrefetchSockPool, listmid[i])) < 0) {
1509 printf("%s -> socket Error!!\n");
1512 char control = ASK_COMMIT;
1514 send_data(sd,&control, sizeof(char));
1515 send_data(sd,&transID, sizeof(unsigned int));
1517 // return -1 if it didn't receive the response
1518 int timeout = recv_data(sd,&response, sizeof(char));
1521 if(timeout == 0 || response > 0)
1522 break; // received response
1524 // else check next machine
1525 freeSock(transPrefetchSockPool, listmid[i],sd);
1529 printf("%s -> response : %d\n",__func__,response);
1532 return (response==-1)?TRANS_ABORT:response;
1537 void restoreDuplicationState(unsigned int deadHost) {
1541 if(!liveHosts[findHost(deadHost)]) { // if it is already fixed
1546 if(deadHost == leader) // if leader is dead, then pick a new leader
1550 printf("%s-> leader?:%s, me?:%s\n", __func__, midtoIPString(leader), (myIpAddr == leader)?"LEADER":"NOT LEADER");
1553 if(leader == myIpAddr) {
1554 pthread_mutex_lock(&leaderFixing_mutex);
1557 pthread_mutex_unlock(&leaderFixing_mutex);
1559 if(!liveHosts[findHost(deadHost)]) { // if it is already fixed
1561 printf("%s -> already fixed\n",__func__);
1563 pthread_mutex_lock(&leaderFixing_mutex);
1565 pthread_mutex_unlock(&leaderFixing_mutex);
1567 else { // if i am the leader
1569 duplicateLostObjects(deadHost);
1571 if(updateLiveHostsCommit() != 0) {
1572 printf("%s -> error updateLiveHostsCommit()\n",__func__);
1575 pthread_mutex_lock(&leaderFixing_mutex);
1577 pthread_mutex_unlock(&leaderFixing_mutex);
1581 pthread_mutex_unlock(&leaderFixing_mutex);
1583 printf("%s (REMOTE_RESTORE_DUPLICATED_STATE -> LEADER is already fixing\n",__func__);
1588 else { // request leader to fix the situation
1589 if((sd = getSockWithLock(transPrefetchSockPool, leader)) < 0) {
1590 printf("%s -> socket create error\n",__func__);
1593 ctrl = REMOTE_RESTORE_DUPLICATED_STATE;
1594 send_data(sd, &ctrl, sizeof(char));
1595 send_data(sd, &deadHost, sizeof(unsigned int));
1596 freeSockWithLock(transPrefetchSockPool,leader,sd);
1600 printf("%s -> Finished!\n",__func__);
1605 /* Commit info for objects modified */
1606 void commitCountForObjMod(char *getReplyCtrl, unsigned int *oidnotfound, unsigned int *oidlocked, int *numoidnotfound,
1607 int *numoidlocked, int *v_nomatch, int *v_matchlock, int *v_matchnolock, unsigned int oid, unsigned short version) {
1609 /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
1610 /* Save the oids not found and number of oids not found for later use */
1611 if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
1612 /* Save the oids not found and number of oids not found for later use */
1613 oidnotfound[*numoidnotfound] = oid;
1614 (*numoidnotfound)++;
1615 } else { /* If Obj found in machine (i.e. has not moved) */
1616 /* Check if Obj is locked by any previous transaction */
1617 if (write_trylock(STATUSPTR(mobj))) { // Can acquire write lock
1618 if (version == ((objheader_t *)mobj)->version) { /* match versions */
1620 //Keep track of what is locked
1621 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1622 } else { /* If versions don't match ...HARD ABORT */
1624 /* Send TRANS_DISAGREE to Coordinator */
1625 *getReplyCtrl = TRANS_DISAGREE;
1627 //Keep track of what is locked
1628 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1629 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1632 } else { //A lock is acquired some place else
1633 if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
1635 } else { /* If versions don't match ...HARD ABORT */
1637 /* Send TRANS_DISAGREE to Coordinator */
1638 *getReplyCtrl = TRANS_DISAGREE;
1639 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1646 /* Commit info for objects modified */
1647 void commitCountForObjRead(char *getReplyCtrl, unsigned int *oidnotfound, unsigned int *oidlocked, int *numoidnotfound,
1648 int *numoidlocked, int *v_nomatch, int *v_matchlock, int *v_matchnolock, unsigned int oid, unsigned short version) {
1650 /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
1651 /* Save the oids not found and number of oids not found for later use */
1652 if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
1653 /* Save the oids not found and number of oids not found for later use */
1654 oidnotfound[*numoidnotfound] = oid;
1655 (*numoidnotfound)++;
1656 } else { /* If Obj found in machine (i.e. has not moved) */
1657 /* Check if Obj is locked by any previous transaction */
1658 if (read_trylock(STATUSPTR(mobj))) { // Can further acquire read locks
1659 if (version == ((objheader_t *)mobj)->version) { /* If locked then match versions */
1661 //Keep track of what is locked
1662 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1663 } else { /* If versions don't match ...HARD ABORT */
1665 /* Send TRANS_DISAGREE to Coordinator */
1666 *getReplyCtrl = TRANS_DISAGREE;
1667 //Keep track of what is locked
1668 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1669 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1672 } else { //Has reached max number of readers or some other transaction
1673 //has acquired a lock on this object
1674 if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
1676 } else { /* If versions don't match ...HARD ABORT */
1678 /* Send TRANS_DISAGREE to Coordinator */
1679 *getReplyCtrl = TRANS_DISAGREE;
1680 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1687 /* This function completes the ABORT process if the transaction is aborting */
1688 int transAbortProcess(trans_commit_data_t *transinfo) {
1690 unsigned int *objlocked;
1693 numlocked = transinfo->numlocked;
1694 objlocked = transinfo->objlocked;
1696 int useWriteUnlock = 0;
1697 for (i = 0; i < numlocked; i++) {
1698 if(objlocked[i] == -1) {
1702 if((header = mhashSearch(objlocked[i])) == NULL) {
1703 printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1706 if(!useWriteUnlock) {
1707 read_unlock(STATUSPTR(header));
1709 write_unlock(STATUSPTR(header));
1716 /*This function completes the COMMIT process if the transaction is commiting*/
1717 int transComProcess(trans_req_data_t *tdata, trans_commit_data_t *transinfo) {
1718 objheader_t *header, *tcptr;
1719 int i, nummod, tmpsize, numcreated, numlocked;
1720 unsigned int *oidmod, *oidcreated, *oidlocked;
1723 printf("%s-> Entering transComProcess, trans.c\n", __func__);
1726 nummod = tdata->f.nummod;
1727 oidmod = tdata->oidmod;
1728 numcreated = tdata->f.numcreated;
1729 oidcreated = tdata->oidcreated;
1730 numlocked = transinfo->numlocked;
1731 oidlocked = transinfo->objlocked;
1733 for (i = 0; i < nummod; i++) {
1734 if((header = (objheader_t *) mhashSearch(oidmod[i])) == NULL) {
1735 printf("Error: transComProcess() mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1738 /* Copy from transaction cache -> main object store */
1739 if ((tcptr = ((objheader_t *) t_chashSearch(oidmod[i]))) == NULL) {
1740 printf("Error: transComProcess() chashSearch returned NULL at %s, %d\n", __FILE__, __LINE__);
1743 GETSIZE(tmpsize, header);
1744 char *tmptcptr = (char *) tcptr;
1746 struct ___Object___ *dst=(struct ___Object___*)((char*)header+sizeof(objheader_t));
1747 struct ___Object___ *src=(struct ___Object___*)((char*)tmptcptr+sizeof(objheader_t));
1748 dst->___cachedCode___=src->___cachedCode___;
1749 dst->___cachedHash___=src->___cachedHash___;
1751 memcpy(&dst[1], &src[1], tmpsize-sizeof(struct ___Object___));
1754 header->version += 1;
1755 //printf("oid: %u, new header version: %d\n", oidmod[i], header->version);
1756 if(header->notifylist != NULL) {
1758 if(header->isBackup != 0) // if it is primary obj, notify
1759 notifyAll(&header->notifylist, OID(header), header->version);
1760 else // if not, just clear the notification list
1761 clearNotifyList(OID(header));
1763 notifyAll(&header->notifylist, OID(header), header->version);
1767 /* If object is newly created inside transaction then commit it */
1768 for (i = 0; i < numcreated; i++) {
1769 if ((header = ((objheader_t *) t_chashSearch(oidcreated[i]))) == NULL) {
1770 printf("Error: transComProcess() chashSearch returned NULL for oid = %x at %s, %d\n", oidcreated[i], __FILE__, __LINE__);
1773 header->version += 1;
1774 GETSIZE(tmpsize, header);
1775 tmpsize += sizeof(objheader_t);
1776 pthread_mutex_lock(&mainobjstore_mutex);
1777 if ((ptrcreate = objstrAlloc(&mainobjstore, tmpsize)) == NULL) {
1778 printf("Error: transComProcess() failed objstrAlloc %s, %d\n", __FILE__, __LINE__);
1779 pthread_mutex_unlock(&mainobjstore_mutex);
1782 pthread_mutex_unlock(&mainobjstore_mutex);
1783 /* Initialize read and write locks */
1784 initdsmlocks(STATUSPTR(header));
1785 memcpy(ptrcreate, header, tmpsize);
1786 mhashInsert(oidcreated[i], ptrcreate);
1787 lhashInsert(oidcreated[i], myIpAddr);
1788 // printf("oid created : %u\n",oidcreated[i]);
1790 /* Unlock locked objects */
1791 int useWriteUnlock = 0;
1792 for(i = 0; i < numlocked; i++) {
1793 if(oidlocked[i] == -1) {
1797 if((header = (objheader_t *) mhashSearch(oidlocked[i])) == NULL) {
1798 printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1801 if(!useWriteUnlock) {
1802 read_unlock(STATUSPTR(header));
1804 write_unlock(STATUSPTR(header));
1810 prefetchpile_t *foundLocal(char *ptr) {
1811 int siteid = *(GET_SITEID(ptr));
1812 int ntuples = *(GET_NTUPLES(ptr));
1813 unsigned int * oidarray = GET_PTR_OID(ptr);
1814 unsigned short * endoffsets = GET_PTR_EOFF(ptr, ntuples);
1815 short * arryfields = GET_PTR_ARRYFLD(ptr, ntuples);
1816 prefetchpile_t * head=NULL;
1820 for(i=0; i<ntuples; i++) {
1821 unsigned short baseindex=(i==0) ? 0 : endoffsets[i-1];
1822 unsigned short endindex=endoffsets[i];
1823 unsigned int oid=oidarray[i];
1828 //Look up fields locally
1829 for(newbase=baseindex; newbase<endindex; newbase++) {
1830 if (!lookupObject(&oid, arryfields[newbase]))
1832 //Ended in a null pointer...
1836 //Entire prefetch is local
1837 if (newbase==endindex&&checkoid(oid)) {
1841 //Add to remote requests
1842 machinenum=lhashSearch(oid);
1843 insertPile(machinenum, oid, endindex-newbase, &arryfields[newbase], &head);
1848 /* handle dynamic prefetching */
1849 handleDynPrefetching(numLocal, ntuples, siteid);
1853 int checkoid(unsigned int oid) {
1854 objheader_t *header;
1855 if ((header=mhashSearch(oid))!=NULL) {
1858 } else if ((header=prehashSearch(oid))!=NULL) {
1866 int lookupObject(unsigned int * oid, short offset) {
1867 objheader_t *header;
1868 if ((header=mhashSearch(*oid))!=NULL) {
1871 } else if ((header=prehashSearch(*oid))!=NULL) {
1878 if(TYPE(header) >= NUMCLASSES) {
1879 int elementsize = classsize[TYPE(header)];
1880 struct ArrayObject *ao = (struct ArrayObject *) (((char *)header) + sizeof(objheader_t));
1881 int length = ao->___length___;
1882 /* Check if array out of bounds */
1883 if(offset < 0 || offset >= length) {
1884 //if yes treat the object as found
1888 (*oid) = *((unsigned int *)(((char *)ao) + sizeof(struct ArrayObject) + (elementsize*offset)));
1891 (*oid) = *((unsigned int *)(((char *)header) + sizeof(objheader_t) + offset));
1897 /* This function is called by the thread calling transPrefetch */
1898 void *transPrefetch(void *t) {
1900 /* read from prefetch queue */
1901 void *node=gettail();
1902 /* Check if the tuples are found locally, if yes then reduce them further*/
1903 /* and group requests by remote machine ids by calling the makePreGroups() */
1904 prefetchpile_t *pilehead = foundLocal(node);
1906 if (pilehead!=NULL) {
1907 // Get sock from shared pool
1909 /* Send Prefetch Request */
1910 prefetchpile_t *ptr = pilehead;
1911 while(ptr != NULL) {
1912 int sd = getSock2(transPrefetchSockPool, ptr->mid);
1913 sendPrefetchReq(ptr, sd);
1917 /* Release socket */
1918 // freeSock(transPrefetchSockPool, pilehead->mid, sd);
1920 /* Deallocated pilehead */
1921 mcdealloc(pilehead);
1923 // Deallocate the prefetch queue pile node
1928 void sendPrefetchReqnew(prefetchpile_t *mcpilenode, int sd) {
1931 int size=sizeof(char)+sizeof(int);
1932 for(tmp=mcpilenode->objpiles; tmp!=NULL; tmp=tmp->next) {
1933 size += sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1938 *buf=TRANS_PREFETCH;
1941 for(tmp=mcpilenode->objpiles; tmp!=NULL; tmp=tmp->next) {
1942 int len = sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1945 *((unsigned int *)buf)=tmp->oid;
1946 buf+=sizeof(unsigned int);
1947 *((unsigned int *)(buf)) = myIpAddr;
1948 buf+=sizeof(unsigned int);
1949 memcpy(buf, tmp->offset, tmp->numoffset*sizeof(short));
1950 buf+=tmp->numoffset*sizeof(short);
1953 send_data(sd, buft, size);
1957 void sendPrefetchReq(prefetchpile_t *mcpilenode, int sd) {
1962 /* Send TRANS_PREFETCH control message */
1963 control = TRANS_PREFETCH;
1964 send_data(sd, &control, sizeof(char));
1966 /* Send Oids and offsets in pairs */
1967 tmp = mcpilenode->objpiles;
1968 while(tmp != NULL) {
1969 len = sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1970 char oidnoffset[len];
1971 char *buf=oidnoffset;
1972 *((int*)buf) = tmp->numoffset;
1974 *((unsigned int *)buf) = tmp->oid;
1975 buf+=sizeof(unsigned int);
1976 *((unsigned int *)buf) = myIpAddr;
1977 buf += sizeof(unsigned int);
1978 memcpy(buf, tmp->offset, (tmp->numoffset)*sizeof(short));
1979 send_data(sd, oidnoffset, len);
1983 /* Send a special char -1 to represent the end of sending oids + offset pair to remote machine */
1985 send_data(sd, &endpair, sizeof(int));
1990 int getPrefetchResponse(int sd) {
1991 int length = 0, size = 0;
1994 void *modptr, *oldptr;
1996 recv_data((int)sd, &length, sizeof(int));
1997 size = length - sizeof(int);
1998 char recvbuffer[size];
2000 recv_data((int)sd, recvbuffer, size);
2001 control = *((char *) recvbuffer);
2002 if(control == OBJECT_FOUND) {
2003 oid = *((unsigned int *)(recvbuffer + sizeof(char)));
2004 size = size - (sizeof(char) + sizeof(unsigned int));
2005 pthread_mutex_lock(&prefetchcache_mutex);
2006 if ((modptr = prefetchobjstrAlloc(size)) == NULL) {
2007 printf("Error: objstrAlloc error for copying into prefetch cache %s, %d\n", __FILE__, __LINE__);
2008 pthread_mutex_unlock(&prefetchcache_mutex);
2011 pthread_mutex_unlock(&prefetchcache_mutex);
2012 memcpy(modptr, recvbuffer + sizeof(char) + sizeof(unsigned int), size);
2015 /* Insert the oid and its address into the prefetch hash lookup table */
2016 /* Do a version comparison if the oid exists */
2017 if((oldptr = prehashSearch(oid)) != NULL) {
2018 /* If older version then update with new object ptr */
2019 if(((objheader_t *)oldptr)->version <= ((objheader_t *)modptr)->version) {
2021 prehashInsert(oid, modptr);
2023 } else { /* Else add the object ptr to hash table*/
2024 prehashInsert(oid, modptr);
2026 /* Lock the Prefetch Cache look up table*/
2027 pthread_mutex_lock(&pflookup.lock);
2028 /* Broadcast signal on prefetch cache condition variable */
2029 pthread_cond_broadcast(&pflookup.cond);
2030 /* Unlock the Prefetch Cache look up table*/
2031 pthread_mutex_unlock(&pflookup.lock);
2032 } else if(control == OBJECT_NOT_FOUND) {
2033 oid = *((unsigned int *)(recvbuffer + sizeof(char)));
2034 /* TODO: For each object not found query DHT for new location and retrieve the object */
2035 /* Throw an error */
2036 //printf("OBJECT %x NOT FOUND.... THIS SHOULD NOT HAPPEN...TERMINATE PROGRAM\n", oid);
2039 printf("Error: in decoding the control value %d, %s, %d\n",control, __FILE__, __LINE__);
2045 unsigned short getObjType(unsigned int oid) {
2046 objheader_t *objheader;
2047 unsigned short numoffset[] ={0};
2048 short fieldoffset[] ={};
2050 if ((objheader = (objheader_t *) mhashSearch(oid)) == NULL) {
2052 if ((objheader = (objheader_t *) prehashSearch(oid)) == NULL) {
2056 unsigned int mid = lhashSearch(oid);
2057 unsigned int machineID;
2059 machineID = (flipBit)?(getPrimaryMachine(mid)):(getBackupMachine(mid));
2060 int sd = getSock2(transReadSockPool, machineID);
2062 unsigned int mid = lhashSearch(oid);
2063 int sd = getSock2(transReadSockPool, mid);
2065 char remotereadrequest[sizeof(char)+sizeof(unsigned int)];
2066 remotereadrequest[0] = READ_REQUEST;
2067 *((unsigned int *)(&remotereadrequest[1])) = oid;
2068 send_data(sd, remotereadrequest, sizeof(remotereadrequest));
2070 /* Read response from the Participant */
2072 recv_data(sd, &control, sizeof(char));
2074 if (control==OBJECT_NOT_FOUND) {
2075 printf("Error: in %s() THIS SHOULD NOT HAPPEN.....EXIT PROGRAM\n", __func__);
2079 /* Read object if found into local cache */
2081 recv_data(sd, &size, sizeof(int));
2083 pthread_mutex_lock(&prefetchcache_mutex);
2084 if ((objheader = prefetchobjstrAlloc(size)) == NULL) {
2085 printf("Error: %s() objstrAlloc error for copying into prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
2088 pthread_mutex_unlock(&prefetchcache_mutex);
2089 recv_data(sd, objheader, size);
2090 prehashInsert(oid, objheader);
2091 return TYPE(objheader);
2094 if((buffer = calloc(1, size)) == NULL) {
2095 printf("%s() Calloc Error %s at line %d\n", __func__, __FILE__, __LINE__);
2099 recv_data(sd, buffer, size);
2100 objheader = (objheader_t *)buffer;
2101 unsigned short type = TYPE(objheader);
2110 return TYPE(objheader);
2113 int startRemoteThread(unsigned int oid, unsigned int mid) {
2115 struct sockaddr_in remoteAddr;
2116 char msg[1 + sizeof(unsigned int)];
2120 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2121 perror("startRemoteThread():socket()");
2125 bzero(&remoteAddr, sizeof(remoteAddr));
2126 remoteAddr.sin_family = AF_INET;
2127 remoteAddr.sin_port = htons(LISTEN_PORT);
2128 remoteAddr.sin_addr.s_addr = htonl(mid);
2130 if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
2131 printf("startRemoteThread():error %d connecting to %s:%d\n", errno,
2132 inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
2136 msg[0] = START_REMOTE_THREAD;
2137 *((unsigned int *) &msg[1]) = oid;
2138 send_data(sock, msg, 1 + sizeof(unsigned int));
2145 //TODO: when reusing oids, make sure they are not already in use!
2146 static unsigned int id = 0xFFFFFFFF;
2147 unsigned int getNewOID(void) {
2149 if (id > oidMax || id < oidMin) {
2156 static unsigned int tid = 0xFFFFFFFF;
2157 unsigned int getNewTransID(void) {
2159 if (tid > transIDMax || tid < transIDMin) {
2160 tid = (transIDMin | 1);
2166 int processConfigFile() {
2168 const int maxLineLength = 200;
2169 char lineBuffer[maxLineLength];
2171 const char *delimiters = " \t\n";
2176 configFile = fopen(CONFIG_FILENAME, "r");
2177 if (configFile == NULL) {
2178 printf("error opening %s:\n", CONFIG_FILENAME);
2183 numHostsInSystem = 0;
2184 sizeOfHostArray = 8;
2185 hostIpAddrs = calloc(sizeOfHostArray, sizeof(unsigned int));
2187 liveHosts = calloc(sizeOfHostArray, sizeof(unsigned int));
2188 locateObjHosts = calloc(sizeOfHostArray*2, sizeof(unsigned int));
2191 while(fgets(lineBuffer, maxLineLength, configFile) != NULL) {
2192 commentBegin = strchr(lineBuffer, '#');
2193 if (commentBegin != NULL)
2194 *commentBegin = '\0';
2195 token = strtok(lineBuffer, delimiters);
2196 while (token != NULL) {
2197 tmpAddr = inet_addr(token);
2198 if ((int)tmpAddr == -1) {
2199 printf("error in %s: bad token:%s\n", CONFIG_FILENAME, token);
2203 addHost(htonl(tmpAddr));
2204 token = strtok(NULL, delimiters);
2210 if (numHostsInSystem < 1) {
2211 printf("error in %s: no IP Adresses found\n", CONFIG_FILENAME);
2215 myIpAddr = getMyIpAddr("en1");
2217 myIpAddr = getMyIpAddr("eth0");
2219 myIndexInHostArray = findHost(myIpAddr);
2221 liveHosts[myIndexInHostArray] = 1;
2223 if (myIndexInHostArray == -1) {
2224 printf("error in %s: IP Address of eth0 not found\n", CONFIG_FILENAME);
2227 oidsPerBlock = (0xFFFFFFFF / numHostsInSystem) + 1;
2228 oidMin = oidsPerBlock * myIndexInHostArray;
2229 if (myIndexInHostArray == numHostsInSystem - 1)
2230 oidMax = 0xFFFFFFFF;
2232 oidMax = oidsPerBlock * (myIndexInHostArray + 1) - 1;
2234 transIDMin = oidMin;
2235 transIDMax = oidMax;
2244 unsigned int getDuplicatedPrimaryMachine(unsigned int mid) {
2246 for(i = 0; i < numHostsInSystem; i++) {
2247 if(mid == locateObjHosts[(i*2)+1]) {
2248 return locateObjHosts[i*2];
2254 unsigned int getPrimaryMachine(unsigned int mid) {
2256 int pmidindex = 2*findHost(mid);
2259 printf("What!!!\n");
2261 pthread_mutex_lock(&liveHosts_mutex);
2262 pmid = locateObjHosts[pmidindex];
2263 pthread_mutex_unlock(&liveHosts_mutex);
2267 unsigned int getBackupMachine(unsigned int mid) {
2269 int bmidindex = 2*findHost(mid)+1;
2274 pthread_mutex_lock(&liveHosts_mutex);
2275 bmid = locateObjHosts[bmidindex];
2276 pthread_mutex_unlock(&liveHosts_mutex);
2280 int getStatus(int mid) {
2282 printf("%s -> host %s : %s\n",__func__,midtoIPString(hostIpAddrs[mid]),(liveHosts[mid] == 1)?"LIVE":"DEAD");
2284 return liveHosts[mid];
2289 // updates the leader's liveHostArray and locateObj
2290 unsigned int updateLiveHosts() {
2292 printf("%s-> Entering updateLiveHosts\n", __func__);
2294 // update everyone's list
2296 //foreach in hostipaddrs, ping -> update list of livemachines
2297 //socket connection?
2300 //liveHosts lock here
2301 int sd = 0, i, j, tmpNumLiveHosts = 0;
2302 for(i = 0; i < numHostsInSystem; i++) {
2303 if(i == myIndexInHostArray)
2309 if((sd = getSockWithLock(transPrefetchSockPool, hostIpAddrs[i])) < 0) {
2320 liverequest = RESPOND_LIVE;
2322 send_data(sd, &liverequest, sizeof(char));
2325 int timeout = recv_data(sd, &response, sizeof(char));
2328 //if timeout, dead host
2329 if(response == LIVE) {
2339 freeSockWithLock(transPrefetchSockPool,hostIpAddrs[i],sd);
2342 numLiveHostsInSystem = tmpNumLiveHosts;
2344 printf("numLiveHostsInSystem:%d\n", numLiveHostsInSystem);
2346 //have updated list of live machines
2349 printf("%s-> Exiting updateLiveHosts\n", __func__);
2355 int getNumLiveHostsInSystem() {
2356 int count = 0, i = 0;
2357 for(; i<numHostsInSystem; i++) {
2364 int updateLiveHostsCommit() {
2366 printf("%s -> Enter\n",__func__);
2370 char updaterequest[sizeof(char)+sizeof(int)*numHostsInSystem+sizeof(unsigned int)*(numHostsInSystem*2)];
2372 updaterequest[0] = UPDATE_LIVE_HOSTS;
2373 for(i = 0; i < numHostsInSystem; i++) {
2374 *((int *)(&updaterequest[i*4+1])) = liveHosts[i]; // clean this up later
2377 for(i = 0; i < numHostsInSystem*2; i++) {
2378 *((unsigned int *)(&updaterequest[i*4+(numHostsInSystem*4)+1])) = locateObjHosts[i]; //ditto
2381 //for each machine send data
2382 for(i = 0; i < numHostsInSystem; i++) { // hard define num of retries
2383 if(i == myIndexInHostArray)
2385 if(liveHosts[i] == 1) {
2386 if((sd = getSockWithLock(transPrefetchSockPool, hostIpAddrs[i])) < 0) {
2387 printf("%s -> socket create error, attempt %d\n",__func__, i);
2390 send_data(sd, updaterequest, sizeof(updaterequest));
2391 freeSockWithLock(transPrefetchSockPool,hostIpAddrs[i],sd);
2396 printf("%s -> Finish\n",__func__);
2404 void setLocateObjHosts() {
2405 int i = 0, validIndex = 0;
2407 //check num hosts even valid first
2409 for(i = 0;i < numHostsInSystem; i++) {
2411 while(liveHosts[(i+validIndex)%numHostsInSystem] == 0) {
2414 locateObjHosts[i*2] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
2416 printf("%s-> locateObjHosts[%d]:%s\n", __func__, i*2, midtoIPString(locateObjHosts[(i*2)]));
2420 while(liveHosts[(i+validIndex)%numHostsInSystem] == 0) {
2424 printf("%s-> validIndex:%d, this mid is: [%s]\n", __func__, validIndex, midtoIPString(hostIpAddrs[(i+validIndex)%numHostsInSystem]));
2426 locateObjHosts[(i*2)+1] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
2430 printf("%s-> locateObjHosts[%d]:%s\n", __func__, i*2+1, midtoIPString(locateObjHosts[(i*2)+1]));
2435 void setReLocateObjHosts(int mid)
2437 int mIndex = findHost(mid);
2438 int backupMachine = getBackupMachine(mid);
2439 int newPrimary = getDuplicatedPrimaryMachine(mid);
2440 int newPrimaryIndex = findHost(newPrimary);
2443 /* duplicateLostObject example
2454 locateObjHosts[2*newPrimaryIndex+1] = backupMachine;
2455 locateObjHosts[2*mIndex] = newPrimary;
2457 /* relocate the objects of the machines already dead */
2458 for(i=0; i<numHostsInSystem *2; i+=2) {
2459 if(locateObjHosts[i] == mid)
2460 locateObjHosts[i] = newPrimary;
2461 if(locateObjHosts[i+1] == mid)
2462 locateObjHosts[i+1] = backupMachine;
2468 void printHostsStatus() {
2470 printf("%s-> *printing live machines and backups*\n", __func__);
2471 for(i = 0; i < numHostsInSystem; i++) {
2473 printf("%s-> [%s]: LIVE\n", __func__, midtoIPString(hostIpAddrs[i]));
2476 printf("%s-> [%s]: DEAD\n", __func__, midtoIPString(hostIpAddrs[i]));
2478 printf("%s-> original:\t[%s]\n", __func__, midtoIPString(locateObjHosts[i*2]));
2479 printf("%s-> backup:\t[%s]\n", __func__, midtoIPString(locateObjHosts[i*2+1]));
2483 int allHostsLive() {
2485 for(i = 0; i < numHostsInSystem; i++) {
2494 void duplicateLostObjects(unsigned int mid){
2496 printf("%s-> Start, mid: [%s]\n", __func__, midtoIPString(mid));
2498 //this needs to be changed.
2499 unsigned int backupMid = getBackupMachine(mid); // get backup machine of dead machine
2500 unsigned int originalMid = getDuplicatedPrimaryMachine(mid); // get primary machine that used deadmachine as backup machine.
2503 printf("%s-> backupMid: [%s], ", __func__, midtoIPString(backupMid));
2504 printf("originalMid: [%s]\n", midtoIPString(originalMid));
2508 setReLocateObjHosts(mid);
2510 //connect to these machines
2511 //go through their object store copying necessary (in a transaction)
2512 int sd = 0, i, j, tmpNumLiveHosts = 0;
2514 /* duplicateLostObject example
2525 if(originalMid == myIpAddr) { // copy local machine's backup data, make it as primary data of backup machine.
2526 duplicateLocalOriginalObjects(backupMid);
2528 else if((sd = getSockWithLock(transPrefetchSockPool, originalMid)) < 0) {
2529 printf("%s -> socket create error, attempt %d\n", __func__,j);
2533 else { // if original is not local
2535 duperequest = DUPLICATE_ORIGINAL;
2536 send_data(sd, &duperequest, sizeof(char));
2538 printf("%s-> SD : %d Sent DUPLICATE_ORIGINAL request to %s\n", __func__,sd,midtoIPString(originalMid));
2540 send_data(sd, &backupMid, sizeof(unsigned int));
2543 recv_data(sd, &response, sizeof(char));
2545 printf("%s (DUPLICATE_ORIGINAL) -> Received %s\n", __func__,(response==DUPLICATION_COMPLETE)?"DUPLICATION_COMPLETE":"DUPLICATION_FAIL");
2548 freeSockWithLock(transPrefetchSockPool, originalMid, sd);
2551 if(backupMid == myIpAddr) { // copy local machine's primary data, and make it as backup data of original machine.
2552 duplicateLocalBackupObjects(originalMid);
2554 else if((sd = getSockWithLock(transPrefetchSockPool, backupMid)) < 0) {
2555 printf("updateLiveHosts(): socket create error, attempt %d\n", j);
2560 duperequest = DUPLICATE_BACKUP;
2561 send_data(sd, &duperequest, sizeof(char));
2563 printf("%s-> SD : %d Sent DUPLICATE_BACKUP request to %s\n", __func__,sd,midtoIPString(backupMid));
2565 send_data(sd, &originalMid, sizeof(unsigned int));
2568 recv_data(sd, &response, sizeof(char));
2570 printf("%s (DUPLICATE_BACKUP) -> Received %s\n", __func__,(response==DUPLICATION_COMPLETE)?"DUPLICATION_COMPLETE":"DUPLICATION_FAIL");
2573 freeSockWithLock(transPrefetchSockPool, backupMid, sd);
2577 printf("%s-> End\n", __func__);
2581 void duplicateLocalBackupObjects(unsigned int mid) {
2584 char *dupeptr, ctrl, response;
2586 printf("%s-> Start; backup mid:%s\n", __func__, midtoIPString(mid));
2589 //copy code from dstmserver here
2590 tempsize = mhashGetDuplicate((void**)&dupeptr, 1);
2593 printf("tempsize:%d, dupeptrfirstvalue:%d\n", tempsize, *((unsigned int *)(dupeptr)));
2595 //send control and dupes after
2596 ctrl = RECEIVE_DUPES;
2597 if((sd = getSockWithLock(transPrefetchSockPool, mid)) < 0) {
2598 printf("duplicatelocalbackup: socket create error\n");
2602 printf("%s -> sd:%d, tempsize:%d, dupeptrfirstvalue:%d\n", __func__,sd, tempsize, *((unsigned int *)(dupeptr)));
2604 send_data(sd, &ctrl, sizeof(char));
2605 send_data(sd, dupeptr, tempsize);
2607 recv_data(sd, &response, sizeof(char));
2608 freeSockWithLock(transPrefetchSockPool,mid,sd);
2611 printf("%s ->response : %d - %d\n",__func__,response,DUPLICATION_COMPLETE);
2614 if(response != DUPLICATION_COMPLETE) {
2616 printf("%s -> DUPLICATION_FAIL\n",__func__);
2623 printf("%s-> End\n", __func__);
2628 void duplicateLocalOriginalObjects(unsigned int mid) {
2630 char *dupeptr, ctrl, response;
2633 printf("%s-> Start\n", __func__);
2635 //copy code fom dstmserver here
2637 tempsize = mhashGetDuplicate((void**)&dupeptr, 0);
2639 //send control and dupes after
2640 ctrl = RECEIVE_DUPES;
2642 if((sd = getSockWithLock(transPrefetchSockPool, mid)) < 0) {
2643 printf("DUPLICATE_ORIGINAL: socket create error\n");
2647 printf("sd:%d, tempsize:%d, dupeptrfirstvalue:%d\n", sd, tempsize, *((unsigned int *)(dupeptr)));
2650 send_data(sd, &ctrl, sizeof(char));
2651 send_data(sd, dupeptr, tempsize);
2653 recv_data(sd, &response, sizeof(char));
2654 freeSockWithLock(transPrefetchSockPool,mid,sd);
2657 printf("%s ->response : %d - %d\n",__func__,response,DUPLICATION_COMPLETE);
2660 if(response != DUPLICATION_COMPLETE) {
2663 printf("%s -> DUPLICATION_FAIL\n",__func__);
2671 printf("%s-> End\n", __func__);
2678 void addHost(unsigned int hostIp) {
2679 unsigned int *tmpArray;
2680 int *tmpliveHostsArray;
2681 unsigned int *tmplocateObjHostsArray;
2683 if (findHost(hostIp) != -1)
2686 if (numHostsInSystem == sizeOfHostArray) {
2687 tmpArray = calloc(sizeOfHostArray * 2, sizeof(unsigned int));
2688 memcpy(tmpArray, hostIpAddrs, sizeof(unsigned int) * numHostsInSystem);
2690 hostIpAddrs = tmpArray;
2693 tmpliveHostsArray = calloc(sizeOfHostArray * 2, sizeof(unsigned int));
2694 memcpy(tmpliveHostsArray, liveHosts, sizeof(unsigned int) * numHostsInSystem);
2696 liveHosts = tmpliveHostsArray;
2698 tmplocateObjHostsArray = calloc(sizeOfHostArray * 2 * 2, sizeof(unsigned int));
2699 memcpy(tmplocateObjHostsArray, locateObjHosts, sizeof(unsigned int) * numHostsInSystem);
2700 free(locateObjHosts);
2701 locateObjHosts = tmplocateObjHostsArray;
2703 sizeOfHostArray *= 2;
2706 hostIpAddrs[numHostsInSystem] = hostIp;
2709 liveHosts[numHostsInSystem] = 0;
2710 locateObjHosts[numHostsInSystem*2] = hostIp;
2717 int findHost(unsigned int hostIp) {
2719 for (i = 0; i < numHostsInSystem; i++)
2720 if (hostIpAddrs[i] == hostIp)
2727 /* This function sends notification request per thread waiting on object(s) whose version
2730 int reqNotify(unsigned int *oidarry, unsigned short *versionarry, unsigned int numoid, int waitmid) {
2732 int reqNotify(unsigned int *oidarry, unsigned short *versionarry, unsigned int numoid) {
2735 objheader_t *objheader;
2736 struct sockaddr_in premoteAddr;
2737 char msg[1 + numoid * (sizeof(unsigned short) + sizeof(unsigned int)) + 3 * sizeof(unsigned int)];
2740 unsigned short version;
2741 unsigned int oid,mid;
2742 static unsigned int threadid = 0;
2743 pthread_mutex_t threadnotify = PTHREAD_MUTEX_INITIALIZER; //Lock and condition var for threadjoin and notification
2744 pthread_cond_t threadcond = PTHREAD_COND_INITIALIZER;
2745 notifydata_t *ndata;
2749 struct sockaddr_in bremoteAddr;
2754 if((mid = lhashSearch(oid)) == 0) {
2755 printf("Error: %s() No such machine found for oid =%x\n",__func__, oid);
2759 int pmid = getPrimaryMachine(mid);
2760 int bmid = getBackupMachine(mid);
2766 if ((psock = socket(AF_INET, SOCK_STREAM, 0)) < 0 ||
2767 (bsock = socket(AF_INET, SOCK_STREAM, 0)) < 0 ) {
2769 if ((psock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2771 perror("reqNotify():socket()");
2775 /* for primary machine */
2776 bzero(&premoteAddr, sizeof(premoteAddr));
2777 premoteAddr.sin_family = AF_INET;
2778 premoteAddr.sin_port = htons(LISTEN_PORT);
2779 premoteAddr.sin_addr.s_addr = htonl(pmid);
2782 /* for backup machine */
2783 bzero(&bremoteAddr, sizeof(bremoteAddr));
2784 bremoteAddr.sin_family = AF_INET;
2785 bremoteAddr.sin_port = htons(LISTEN_PORT);
2786 bremoteAddr.sin_addr.s_addr = htonl(bmid);
2788 /* Generate unique threadid */
2791 /* Save threadid, numoid, oidarray, versionarray, pthread_cond_variable for later processing */
2792 if((ndata = calloc(1, sizeof(notifydata_t))) == NULL) {
2793 printf("Calloc Error %s, %d\n", __FILE__, __LINE__);
2796 ndata->numoid = numoid;
2797 ndata->threadid = threadid;
2798 ndata->oidarry = oidarry;
2799 ndata->versionarry = versionarry;
2800 ndata->threadcond = threadcond;
2801 ndata->threadnotify = threadnotify;
2802 if((status = notifyhashInsert(threadid, ndata)) != 0) {
2803 printf("reqNotify(): Insert into notify hash table not successful %s, %d\n", __FILE__, __LINE__);
2808 /* Send number of oids, oidarry, version array, machine id and threadid */
2810 if ((connect(psock, (struct sockaddr *)&premoteAddr, sizeof(premoteAddr))< 0) ||
2811 (connect(bsock, (struct sockaddr *)&bremoteAddr, sizeof(bremoteAddr))< 0)) {
2813 if ((connect(psock, (struct sockaddr *)&premoteAddr, sizeof(premoteAddr))< 0)) {
2815 printf("reqNotify():error %d connecting to %s:%d\n", errno,
2816 inet_ntoa(premoteAddr.sin_addr), LISTEN_PORT);
2822 printf("%s -> Pmid = %s\n",__func__,midtoIPString(pmid));
2824 printf("%s -> Bmid = %s\n",__func__,midtoIPString(bmid));
2828 msg[0] = THREAD_NOTIFY_REQUEST;
2830 *((unsigned int *)(&msg[1])) = numoid;
2831 /* Send array of oids */
2832 size = sizeof(unsigned int);
2834 for(i = 0;i < numoid; i++) {
2837 printf("%s -> oid[%d] = %d\n",__func__,i,oidarry[i]);
2839 *((unsigned int *)(&msg[1] + size)) = oid;
2840 size += sizeof(unsigned int);
2843 /* Send array of version */
2844 for(i = 0;i < numoid; i++) {
2845 version = versionarry[i];
2846 *((unsigned short *)(&msg[1] + size)) = version;
2847 size += sizeof(unsigned short);
2850 *((unsigned int *)(&msg[1] + size)) = myIpAddr;
2851 size += sizeof(unsigned int);
2852 *((unsigned int *)(&msg[1] + size)) = threadid;
2854 waitThreadMid = waitmid;
2855 waitThreadID = threadid;
2857 printf("%s -> This Thread is waiting for %s\n",__func__,midtoIPString(waitmid));
2861 size = 1 + numoid * (sizeof(unsigned int) + sizeof(unsigned short)) + 3 * sizeof(unsigned int);
2862 pthread_mutex_lock(&(ndata->threadnotify));
2863 send_data(psock, msg, size);
2865 send_data(bsock, msg, size);
2867 pthread_cond_wait(&(ndata->threadcond), &(ndata->threadnotify));
2868 pthread_mutex_unlock(&(ndata->threadnotify));
2871 pthread_cond_destroy(&threadcond);
2872 pthread_mutex_destroy(&threadnotify);
2883 void threadNotify(unsigned int oid, unsigned short version, unsigned int tid) {
2884 notifydata_t *ndata;
2885 int i, objIsFound = 0, index = -1;
2888 printf("%s -> oid = %d vesion = %d tid = %d\n",__func__,oid,version,tid);
2891 //Look up the tid and call the corresponding pthread_cond_signal
2892 if((ndata = notifyhashSearch(tid)) == NULL) {
2893 printf("threadnotify(): No such threadid is present %s, %d\n", __FILE__, __LINE__);
2896 for(i = 0; i < ndata->numoid; i++) {
2897 if(ndata->oidarry[i] == oid) {
2903 if(objIsFound == 0) {
2904 printf("threadNotify(): Oid not found %s, %d\n", __FILE__, __LINE__);
2908 if(version <= ndata->versionarry[index] && version >= 0) {
2909 printf("threadNotify(): New version %d has not changed since last version for oid = %d, %s, %d\n", version, oid, __FILE__, __LINE__);
2913 /* Clear from prefetch cache and free thread related data structure */
2914 if((ptr = prehashSearch(oid)) != NULL) {
2918 pthread_mutex_lock(&(ndata->threadnotify));
2919 pthread_cond_signal(&(ndata->threadcond));
2920 pthread_mutex_unlock(&(ndata->threadnotify));
2926 printf("%s -> Finished\n",__func__);
2931 int notifyAll(threadlist_t **head, unsigned int oid, unsigned int version) {
2934 struct sockaddr_in remoteAddr;
2935 char msg[1 + sizeof(unsigned short) + 2*sizeof(unsigned int)];
2936 int sock, status, size, bytesSent;
2938 printf("%s -> Entering \n",__func__);
2941 while(*head != NULL) {
2945 //create a socket connection to that machine
2946 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2947 perror("notifyAll():socket()");
2951 bzero(&remoteAddr, sizeof(remoteAddr));
2952 remoteAddr.sin_family = AF_INET;
2953 remoteAddr.sin_port = htons(LISTEN_PORT);
2954 remoteAddr.sin_addr.s_addr = htonl(mid);
2955 //send Thread Notify response and threadid to that machine
2956 if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
2957 printf("notifyAll():error %d connecting to %s:%d\n", errno,
2958 inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
2962 bzero(msg, (1+sizeof(unsigned short) + 2*sizeof(unsigned int)));
2963 msg[0] = THREAD_NOTIFY_RESPONSE;
2964 *((unsigned int *)&msg[1]) = oid;
2965 size = sizeof(unsigned int);
2966 *((unsigned short *)(&msg[1]+ size)) = version;
2967 size+= sizeof(unsigned short);
2968 *((unsigned int *)(&msg[1]+ size)) = ptr->threadid;
2970 size = 1 + 2*sizeof(unsigned int) + sizeof(unsigned short);
2971 send_data(sock, msg, size);
2985 removetransactionhash();
2987 objstrDelete(t_cache);
2991 /* This function inserts necessary information into
2992 * a machine pile data structure */
2993 plistnode_t *pInsert(plistnode_t *pile, objheader_t *headeraddr, unsigned int mid, int num_objs) {
2994 plistnode_t *ptr, *tmp;
2995 int found = 0, offset = 0;
2998 //Add oid into a machine that is already present in the pile linked list structure
2999 while(tmp != NULL) {
3000 if (tmp->mid == mid) {
3003 if (STATUS(headeraddr) & NEW) {
3004 tmp->oidcreated[tmp->numcreated] = OID(headeraddr);
3006 GETSIZE(tmpsize, headeraddr);
3007 tmp->sum_bytes += sizeof(objheader_t) + tmpsize;
3008 } else if (STATUS(headeraddr) & DIRTY) {
3009 tmp->oidmod[tmp->nummod] = OID(headeraddr);
3011 GETSIZE(tmpsize, headeraddr);
3012 tmp->sum_bytes += sizeof(objheader_t) + tmpsize;
3013 /* midtoIP(tmp->mid, ip);
3014 printf("pp; Redo? pile->mid: %s, oid: %d, header version: %d\n", ip, OID(headeraddr), headeraddr->version);*/
3016 offset = (sizeof(unsigned int) + sizeof(short)) * tmp->numread;
3017 *((unsigned int *)(((char *)tmp->objread) + offset))=OID(headeraddr);
3018 offset += sizeof(unsigned int);
3019 *((short *)(((char *)tmp->objread) + offset)) = headeraddr->version;
3027 //Add oid for any new machine
3030 if((ptr = pCreate(num_objs)) == NULL) {
3031 printf("pCreate Error\n");
3036 if (STATUS(headeraddr) & NEW) {
3037 ptr->oidcreated[ptr->numcreated] = OID(headeraddr);
3039 GETSIZE(tmpsize, headeraddr);
3040 ptr->sum_bytes += sizeof(objheader_t) + tmpsize;
3041 } else if (STATUS(headeraddr) & DIRTY) {
3042 ptr->oidmod[ptr->nummod] = OID(headeraddr);
3044 GETSIZE(tmpsize, headeraddr);
3045 ptr->sum_bytes += sizeof(objheader_t) + tmpsize;
3047 *((unsigned int *)ptr->objread)=OID(headeraddr);
3048 offset = sizeof(unsigned int);
3049 *((short *)(((char *)ptr->objread) + offset)) = headeraddr->version;
3058 STATUS(headeraddr) = 0;
3063 plistnode_t *sortPiles(plistnode_t *pileptr) {
3064 plistnode_t *head, *ptr, *tail;
3067 /* Get tail pointer */
3073 plistnode_t *prev = pileptr;
3074 /* Arrange local machine processing at the end of the pile list */
3075 while(ptr != NULL) {
3078 if(ptr->mid == myIpAddr && (prev != pileptr)) {
3079 prev->next = ptr->next;
3084 if((ptr->mid == myIpAddr) && (prev == pileptr)) {
3085 prev->next = ptr->next;
3092 if((ptr->mid == myIpAddr))
3094 tail->next = pileptr;
3095 pileptr = ptr->next;
3108 * Executes when the known leader has failed.
3109 * Guarantees consensus on next leader among all live hosts. */
3112 int origRound = paxosRound;
3113 origleader = leader;
3116 printf(">> Debug : Starting paxos..\n");
3120 ret = paxosPrepare(); // phase 1
3122 ret = paxosAccept(); // phase 2
3124 paxosLearn(); // phase 3
3128 // Paxos not successful; wait and retry if new leader is not yet slected
3130 if(paxosRound != origRound)
3132 } while (ret == -1);
3135 printf("\n>> Debug : Leader : [%s]\t[%u]\n", midtoIPString(leader),leader);
3144 //int origleader = leader;
3155 printf("[Prepare]...\n");
3158 temp_v_a = myIpAddr; // if no other value is proposed, make this machine the new leader
3160 for (i = 0; i < numHostsInSystem; ++i) {
3161 control = PAXOS_PREPARE;
3165 if ((sd = getSockWithLock(transPrefetchSockPool, hostIpAddrs[i])) < 0) {
3166 printf("paxosPrepare(): socket create error\n");
3170 printf("%s-> Send PAXOS_PREPARE to mid [%s] with my_n=%d\n", __func__, midtoIPString(hostIpAddrs[i]), my_n);
3172 send_data(sd, &control, sizeof(char));
3173 send_data(sd, &my_n, sizeof(int));
3174 int timeout = recv_data(sd, &control, sizeof(char));
3175 if ((sd == -1) || (timeout < 0)) {
3177 printf("%s-> timeout to machine [%s]\n", __func__, midtoIPString(hostIpAddrs[i]));
3183 case PAXOS_PREPARE_OK:
3185 recv_data(sd, &remote_n, sizeof(int));
3186 recv_data(sd, &remote_v, sizeof(int));
3188 printf("%s-> Received PAXOS_PREPARE_OK from mindex [%d] with remote_v=%s\n", __func__, i, midtoIPString(remote_v));
3190 if(remote_v != origleader) {
3191 if (remote_n > tmp_n) {
3193 temp_v_a = remote_v;
3197 case PAXOS_PREPARE_REJECT:
3201 freeSockWithLock(transPrefetchSockPool,hostIpAddrs[i],sd);
3205 printf("%s-> cnt:%d, numLiveHostsInSystem:%d\n", __func__, cnt, numLiveHostsInSystem);
3208 if (cnt >= (numLiveHostsInSystem / 2)) { // majority of OK replies
3222 int remote_v = temp_v_a;
3225 printf("[Accept]...\n");
3227 for (i = 0; i < numHostsInSystem; ++i) {
3228 control = PAXOS_ACCEPT;
3233 if ((sd = getSockWithLock(transPrefetchSockPool, hostIpAddrs[i])) < 0) {
3234 printf("paxosAccept(): socket create error\n");
3238 send_data(sd, &control, sizeof(char));
3239 send_data(sd, &my_n, sizeof(int));
3240 send_data(sd, &remote_v, sizeof(int));
3242 int timeout = recv_data(sd, &control, sizeof(char));
3243 if ((sd == -1) || (timeout < 0)) {
3245 printf("%s-> timeout to machine [%s]\n", __func__, midtoIPString(hostIpAddrs[i]));
3251 case PAXOS_ACCEPT_OK:
3254 case PAXOS_ACCEPT_REJECT:
3258 printf(">> Debug : Accept - n_h [%d], n_a [%d], v_a [%s]\n", n_h, n_a, midtoIPString(v_a));
3260 freeSockWithLock(transPrefetchSockPool,hostIpAddrs[i],sd);
3263 if (cnt >= (numLiveHostsInSystem / 2)) {
3278 printf("[Learn]...\n");
3281 control = PAXOS_LEARN;
3283 for (i = 0; i < numHostsInSystem; ++i) {
3286 if(hostIpAddrs[i] == myIpAddr)
3291 printf("This is my leader!!!: [%s]\n", midtoIPString(leader));
3295 if ((sd = getSockWithLock(transPrefetchSockPool, hostIpAddrs[i])) < 0) {
3297 // printf("paxosLearn(): socket create error, attemp\n");
3300 send_data(sd, &control, sizeof(char));
3301 send_data(sd, &v_a, sizeof(int));
3303 freeSockWithLock(transPrefetchSockPool,hostIpAddrs[i],sd);
3311 void clearDeadThreadsNotification()
3315 printf("%s -> Entered\n",__func__);
3317 // clear all the threadnotify request first
3319 if(waitThreadID != -1) {
3321 printf("%s -> I was waitng for %s\n",__func__,midtoIPString(waitThreadMid));
3323 int waitThreadIndex = findHost(waitThreadMid);
3325 notifydata_t *ndata;
3327 if(liveHosts[waitThreadIndex] == 0) // the thread waiting for is dead
3329 if((ndata = (notifydata_t*)notifyhashSearch(waitThreadID)) == NULL) {
3333 for(i =0 ; i < ndata->numoid; i++) {
3334 clearNotifyList(ndata->oidarry[i]); // clear thread object's notifylist
3337 pthread_mutex_lock(&(ndata->threadnotify));
3338 pthread_cond_signal(&(ndata->threadcond));
3339 pthread_mutex_unlock(&(ndata->threadnotify));
3347 printf("%s -> Finished\n",__func__);
3351 /* request the primary and the backup machines to clear
3352 thread obj's notify list */
3353 void reqClearNotifyList(unsigned int oid)
3357 objheader_t *objheader;
3358 struct sockaddr_in premoteAddr, bremoteAddr;
3359 char msg[1 + sizeof(unsigned int)];
3361 if((mid = lhashSearch(oid)) == 0) {
3362 printf("%s -> No such machine found for oid %x\n",__func__,oid);
3366 pmid = getPrimaryMachine(mid);
3367 bmid = getBackupMachine(mid);
3369 if((psock = socket(AF_INET, SOCK_STREAM, 0)) < 0 ||
3370 (bsock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
3371 perror("clearNotifyList() : socket()");
3375 /* for primary machine */
3376 bzero(&premoteAddr, sizeof(premoteAddr));
3377 premoteAddr.sin_family = AF_INET;
3378 premoteAddr.sin_port = htons(LISTEN_PORT);
3379 premoteAddr.sin_addr.s_addr = htonl(pmid);
3381 /* for backup machine */
3382 bzero(&bremoteAddr, sizeof(bremoteAddr));
3383 bremoteAddr.sin_family = AF_INET;
3384 bremoteAddr.sin_port = htons(LISTEN_PORT);
3385 bremoteAddr.sin_addr.s_addr = htonl(bmid);
3387 /* send message to both the primary and the backup */
3388 if((connect(psock, (struct sockaddr *)&premoteAddr, sizeof(premoteAddr)) < 0) ||
3389 (connect(bsock, (struct sockaddr *)&bremoteAddr, sizeof(bremoteAddr)) < 0)) {
3390 printf("%s -> error in connecting\n",__func__);
3394 printf("%s -> Pmid = %s\n",__func__,midtoIPString(pmid));
3395 printf("%s -> Bmid = %s\n",__func__,midtoIPString(bmid));
3397 msg[0] = CLEAR_NOTIFY_LIST;
3398 *((unsigned int *)(&msg[1])) = oid;
3400 send_data(psock, &msg, sizeof(char) + sizeof(unsigned int));
3401 send_data(bsock, &msg, sizeof(char) + sizeof(unsigned int));
3410 int checkiftheMachineDead(unsigned int mid) {
3411 int mIndex = findHost(mid);
3412 return getStatus(mIndex);