1 #include "machinepile.h"
6 #include "threadnotify.h"
8 #include "addUdpEnhance.h"
9 #include "addPrefetchEnhance.h"
17 #include "abortreaders.h"
24 #include <sys/select.h>
29 #define CONFIG_FILENAME "dstm.conf"
31 /* Thread transaction variables */
33 __thread objstr_t *t_cache;
34 __thread struct ___Object___ *revertlist;
37 __thread jmp_buf aborttrans;
40 /* Global Variables */
41 extern int classsize[];
42 pfcstats_t *evalPrefetch;
43 extern int numprefetchsites; //Global variable containing number of prefetch sites
44 extern pthread_mutex_t mainobjstore_mutex; // Mutex to lock main Object store
45 pthread_mutex_t prefetchcache_mutex; // Mutex to lock Prefetch Cache
46 pthread_mutexattr_t prefetchcache_mutex_attr; /* Attribute for lock to make it a recursive lock */
47 extern prehashtable_t pflookup; //Global Prefetch cache's lookup table
48 pthread_t wthreads[NUM_THREADS]; //Worker threads for working on the prefetch queue
49 pthread_t tPrefetch; /* Primary Prefetch thread that processes the prefetch queue */
50 extern objstr_t *mainobjstore;
51 unsigned int myIpAddr;
52 unsigned int *hostIpAddrs;
55 int myIndexInHostArray;
56 unsigned int oidsPerBlock;
60 sockPoolHashTable_t *transReadSockPool;
61 sockPoolHashTable_t *transPrefetchSockPool;
62 sockPoolHashTable_t *transRequestSockPool;
63 pthread_mutex_t notifymutex;
64 pthread_mutex_t atomicObjLock;
66 /***********************************
67 * Global Variables for statistics
68 **********************************/
69 int numTransCommit = 0;
70 int numTransAbort = 0;
73 int nprehashSearch = 0;
80 /***********************************
81 * Global variables for Duplication
82 ***********************************/
85 int numLiveHostsInSystem;
86 int flipBit; // Used to distribute requests between primary and backup evenly
87 unsigned int *locateObjHosts;
88 __thread int timeoutFlag;
89 extern int leaderFixing;
90 extern pthread_mutex_t leaderFixing_mutex;
91 extern pthread_mutex_t liveHosts_mutex;
93 unsigned int liveTransactions[25];
94 unsigned int transIDMax;
95 unsigned int transIDMin;
96 unsigned int transIDIndex;
101 /******************************
102 * Global variables for Paxos
103 ******************************/
109 unsigned int origleader;
110 unsigned int temp_v_a;
113 void printhex(unsigned char *, int);
114 plistnode_t *createPiles();
115 plistnode_t *sortPiles(plistnode_t *pileptr);
117 /*******************************
118 * Send and Recv function calls
119 *******************************/
120 void send_data(int fd, void *buf, int buflen) {
122 // printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
124 char *buffer = (char *)(buf);
128 numbytes = send(fd, buffer, size, 0);
129 bytesSent = bytesSent + numbytes;
132 // printf("%s-> numbytes: %d\n", __func__, numbytes);
134 if(errno == ECONNRESET) { // EINT/EPIPE??; Connection reset, possible disconnected machine
136 printf("%s-> errno = ECONNRESET; connection reset\n", __func__);
137 printf("***SETTING TIMEOUTFLAG***\n");
143 else if(errno == EAGAIN || errno == EWOULDBLOCK) {
145 printf("%s-> errno = EAGAIN|EWOULDBLOCK; socket timeout\n", __func__);
146 printf("***SETTING TIMEOUTFLAG***\n");
152 else if(numbytes == -1) {
154 printf("%s-> numbytes = -1; socket timeout\n", __func__);
155 printf("***SETTING TIMEOUTFLAG***\n");
161 if (numbytes == -1) {
170 // printf("%s-> Exiting\n", __func__);
174 void recv_data(int fd, void *buf, int buflen) {
176 // printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
178 char *buffer = (char *)(buf);
182 numbytes = recv(fd, buffer, size, 0);
183 bytesRecv = bytesRecv + numbytes;
186 // printf("%s-> numbytes: %d\n", __func__, numbytes);
188 if(errno == ECONNRESET) {
190 printf("%s-> errno = ECONNRESET; connection reset\n", __func__);
191 printf("***SETTING TIMEOUTFLAG***\n");
197 else if(errno == EAGAIN || errno == EWOULDBLOCK) {
199 printf("%s-> errno = EAGAIN|EWOULDBLOCK; socket timeout\n", __func__);
200 printf("***SETTING TIMEOUTFLAG***\n");
206 else if(numbytes == -1) {
208 printf("%s-> numbytes = -1; socket timeout\n", __func__);
209 printf("***SETTING TIMEOUTFLAG***\n");
215 if (numbytes == -1) {
224 // printf("%s-> Exiting\n", __func__);
228 void recv_data_block(int fd, void *buf, int buflen) {
230 printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
232 char *buffer = (char *)(buf);
236 numbytes = recv(fd, buffer, size, 0);
238 printf("%s-> numbytes: %d\n", __func__, numbytes);
240 if(errno == EAGAIN || errno == EWOULDBLOCK) {
244 bytesRecv = bytesRecv + numbytes;
250 printf("%s-> Exiting\n", __func__);
254 int recv_data_errorcode(int fd, void *buf, int buflen) {
256 printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
258 char *buffer = (char *)(buf);
262 numbytes = recv(fd, buffer, size, 0);
264 printf("%s-> numbytes: %d\n", __func__, numbytes);
268 else if (numbytes == -1) {
269 perror("recv_data_errorcode");
276 printf("%s-> Exiting\n", __func__);
281 void printhex(unsigned char *ptr, int numBytes) {
283 for (i = 0; i < numBytes; i++) {
285 printf("0%x ", ptr[i]);
287 printf("%x ", ptr[i]);
293 inline int arrayLength(int *array) {
295 for(i=0 ; array[i] != -1; i++)
300 inline int findmax(int *array, int arraylength) {
303 for(i = 0; i < arraylength; i++) {
311 char* midtoIPString(unsigned int mid){
315 /* This function is a prefetch call generated by the compiler that
316 * populates the shared primary prefetch queue*/
317 void prefetch(int siteid, int ntuples, unsigned int *oids, unsigned short *endoffsets, short *arrayfields) {
318 /* Allocate for the queue node*/
319 int qnodesize = 2*sizeof(int) + ntuples * (sizeof(unsigned short) + sizeof(unsigned int)) + endoffsets[ntuples - 1] * sizeof(short);
321 char * node= getmemory(qnodesize);
322 int top=endoffsets[ntuples-1];
326 /* Set queue node values */
328 /* TODO: Remove this after testing */
329 evalPrefetch[siteid].callcount++;
331 *((int *)(node))=siteid;
332 *((int *)(node + sizeof(int))) = ntuples;
334 memcpy(node+len, oids, ntuples*sizeof(unsigned int));
335 memcpy(node+len+ntuples*sizeof(unsigned int), endoffsets, ntuples*sizeof(unsigned short));
336 memcpy(node+len+ntuples*(sizeof(unsigned int)+sizeof(short)), arrayfields, top*sizeof(short));
338 /* Lock and insert into primary prefetch queue */
342 /* This function starts up the transaction runtime. */
343 int dstmStartup(const char * option) {
344 pthread_t thread_Listen, udp_thread_Listen;
346 int master=option!=NULL && strcmp(option, "master")==0;
350 if (processConfigFile() != 0)
351 return 0; //TODO: return error value, cause main program to exit
358 printf("Trans stats is on\n");
365 //Initialize socket pool
366 transReadSockPool = createSockPool(transReadSockPool, DEFAULTSOCKPOOLSIZE);
367 transPrefetchSockPool = createSockPool(transPrefetchSockPool, DEFAULTSOCKPOOLSIZE);
368 transRequestSockPool = createSockPool(transRequestSockPool, DEFAULTSOCKPOOLSIZE);
374 pthread_attr_init(&attr);
375 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
378 pthread_create(&udp_thread_Listen, &attr, udpListenBroadcast, (void*)udpfd);
381 pthread_create(&thread_Listen, &attr, dstmListen, (void*)fd);
385 updateLiveHostsCommit();
387 if(!allHostsLive()) {
388 printf("Not all hosts live. Exiting.\n");
394 dstmListen((void *)fd);
399 //TODO Use this later
400 void *pCacheAlloc(objstr_t *store, unsigned int size) {
406 while(ptr->next != NULL) {
407 /* check if store is empty */
408 if(((unsigned int)ptr->top - (unsigned int)ptr - sizeof(objstr_t) + size) <= ptr->size) {
423 /* This function initiates the prefetch thread A queue is shared
424 * between the main thread of execution and the prefetch thread to
425 * process the prefetch call Call from compiler populates the shared
426 * queue with prefetch requests while prefetch thread processes the
427 * prefetch requests */
430 //Create and initialize prefetch cache structure
433 if((evalPrefetch = initPrefetchStats()) == NULL) {
434 printf("%s() Error allocating memory at %s, %d\n", __func__, __FILE__, __LINE__);
439 /* Initialize attributes for mutex */
440 pthread_mutexattr_init(&prefetchcache_mutex_attr);
441 pthread_mutexattr_settype(&prefetchcache_mutex_attr, PTHREAD_MUTEX_RECURSIVE_NP);
443 pthread_mutex_init(&prefetchcache_mutex, &prefetchcache_mutex_attr);
444 pthread_mutex_init(¬ifymutex, NULL);
445 pthread_mutex_init(&atomicObjLock, NULL);
447 //Create prefetch cache lookup table
448 if(prehashCreate(PHASH_SIZE, PLOADFACTOR)) {
453 //Initialize primary shared queue
455 //Initialize machine pile w/prefetch oids and offsets shared queue
458 //Create the primary prefetch thread
462 retval=pthread_create(&tPrefetch, NULL, transPrefetchNew, NULL);
466 retval=pthread_create(&tPrefetch, NULL, transPrefetch, NULL);
469 pthread_detach(tPrefetch);
473 /* This function stops the threads spawned */
477 pthread_cancel(tPrefetch);
478 for(t = 0; t < NUM_THREADS; t++)
479 pthread_cancel(wthreads[t]);
485 /* This functions inserts randowm wait delays in the order of msec
486 * Mostly used when transaction commits retry*/
493 req.tv_nsec = (long)(1000 + (t%10000)); //1-11 microsec
494 nanosleep(&req, NULL);
498 /* This function initializes things required in the transaction start*/
500 t_cache = objstrCreate(1048576);
501 t_chashCreate(CHASH_SIZE, CLOADFACTOR);
508 // Search for an address for a given oid
509 /*#define INLINE inline __attribute__((always_inline))
511 INLINE void * chashSearchI(chashtable_t *table, unsigned int key) {
512 //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE
513 chashlistnode_t *node = &table->table[(key & table->mask)>>1];
516 if(node->key == key) {
520 } while(node != NULL);
528 /* This function finds the location of the objects involved in a transaction
529 * and returns the pointer to the object if found in a remote location */
530 __attribute__((pure)) objheader_t *transRead(unsigned int oid) {
531 unsigned int machinenumber;
532 objheader_t *tmp, *objheader;
533 objheader_t *objcopy;
536 chashlistnode_t *node;
542 node= &c_table[(oid & c_mask)>>1];
544 if(node->key == oid) {
549 return &((objheader_t*)node->val)[1];
555 } while(node != NULL);
559 if((objheader = chashSearchI(record->lookupTable, oid)) != NULL) {
564 return &objheader[1];
573 //abort this transaction
574 //printf("ABORTING\n");
575 removetransactionhash();
576 objstrDelete(t_cache);
578 _longjmp(aborttrans,1);
583 if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
587 /* Look up in machine lookup table and copy into cache*/
588 GETSIZE(size, objheader);
589 size += sizeof(objheader_t);
590 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
591 memcpy(objcopy, objheader, size);
592 /* Insert into cache's lookup table */
594 t_chashInsert(OID(objheader), objcopy);
602 if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) {
606 /* Look up in prefetch cache */
608 size+=sizeof(objheader_t);
609 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
610 memcpy(objcopy, tmp, size);
611 /* Insert into cache's lookup table */
612 t_chashInsert(OID(tmp), objcopy);
620 /* Get the object from the remote location */
621 if((machinenumber = lhashSearch(oid)) == 0) {
622 printf("Error: %s() No machine found for oid =% %s,%dx\n",__func__, machinenumber, __FILE__, __LINE__);
625 objcopy = getRemoteObj(machinenumber, oid);
627 if(objcopy == NULL) {
628 printf("Error: Object not found in Remote location %s, %d\n", __FILE__, __LINE__);
644 /* This function finds the location of the objects involved in a transaction
645 * and returns the pointer to the object if found in a remote location */
646 __attribute__((pure)) objheader_t *transRead2(unsigned int oid) {
647 unsigned int machinenumber;
648 objheader_t *tmp, *objheader;
649 objheader_t *objcopy;
653 printf("%s-> Start, oid:%u\n", __func__, oid);
658 //abort this transaction
659 //printf("ABORTING\n");
660 removetransactionhash();
661 objstrDelete(t_cache);
663 _longjmp(aborttrans,1);
668 if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
670 printf("%s-> Grab from this machine\n", __func__);
675 /* Look up in machine lookup table and copy into cache*/
676 GETSIZE(size, objheader);
677 size += sizeof(objheader_t);
678 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
679 memcpy(objcopy, objheader, size);
680 /* Insert into cache's lookup table */
682 t_chashInsert(OID(objheader), objcopy);
690 , TYPE(header)if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) {
694 /* Look up in prefetch cache */
696 size+=sizeof(objheader_t);
697 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
698 memcpy(objcopy, tmp, size);
699 /* Insert into cache's lookup table */
700 t_chashInsert(OID(tmp), objcopy);
708 /* Get the object from the remote location */
710 printf("%s-> Grab from remote machine\n", __func__);
713 //while(!liveHostsValid) {
715 /*if(!liveHostsValid){
718 unsigned int mindex = findHost(lhashSearch(oid));
719 machinenumber = locateObjHosts[2*mindex+flipBit];
721 printf("mindex:%d, oid:%d, machinenumber:%s\n", mindex, oid, midtoIPString(machinenumber));
723 if((machinenumber = lhashSearch(oid)) == 0) {
724 printf("Error: %s() No machine found for oid =% %s,%dx\n",__func__, machinenumber, __FILE__, __LINE__);
728 objcopy = getRemoteObj(machinenumber, oid);
730 if(objcopy == NULL) {
731 printf("Error: Object not found in Remote location %s, %d\n", __FILE__, __LINE__);
746 /* This function creates objects in the transaction record */
747 objheader_t *transCreateObj(unsigned int size) {
748 objheader_t *tmp = (objheader_t *) objstrAlloc(&t_cache, (sizeof(objheader_t) + size));
749 OID(tmp) = getNewOID();
754 t_chashInsert(OID(tmp), tmp);
757 return &tmp[1]; //want space after object header
765 /* This function creates machine piles based on all machines involved in a
766 * transaction commit request */
767 plistnode_t *createPiles() {
770 plistnode_t *pile = NULL;
771 unsigned int machinenum;
772 unsigned int destMachine[2];
773 objheader_t *headeraddr;
774 chashlistnode_t * ptr = c_table;
775 /* Represents number of bins in the chash table */
776 unsigned int size = c_size;
778 for(i = 0; i < size ; i++) {
779 chashlistnode_t * curr = &ptr[i];
780 /* Inner loop to traverse the linked list of the cache lookupTable */
781 while(curr != NULL) {
782 //if the first bin in hash table is empty
785 headeraddr=(objheader_t *) curr->val;
788 oid = OID(headeraddr);
790 printf("%s-> oid:%u, version:%d, status:%d, type:%d\n", __func__, OID(headeraddr), headeraddr->version, STATUS(headeraddr), TYPE(headeraddr));
792 if (STATUS(headeraddr) & NEW) { // new/local object
793 printf("%s-> new/local object\n", __func__);
795 else if ((mhashSearch(curr->key) != NULL)) { //local/nonnew
796 if(STATUS(headeraddr) & DIRTY) { // modified
797 printf("%s-> old/local/mod object\n", __func__);
800 printf("%s-> old/local/read object\n", __func__);
802 } else if ((machinenum = lhashSearch(curr->key)) != 0) { // remote/nonnew object
803 if(STATUS(headeraddr) & DIRTY) { //modified
804 printf("%s-> remote/local/mod object\n", __func__);
807 printf("%s-> remote/local/read object\n", __func__);
810 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
813 unsigned int pmid = getPrimaryMachine(lhashSearch(oid));
814 unsigned int bmid = getBackupMachine(lhashSearch(oid));
815 printf("%s-> Primary Machine: [%s], ", __func__, midtoIPString(pmid));
816 printf("Backup Machine: [%s]\n", midtoIPString(bmid));
819 if(STATUS(headeraddr) & DIRTY || STATUS(headeraddr) & NEW) {
822 pile = pInsert(pile, headeraddr, getPrimaryMachine(lhashSearch(oid)), c_numelements);
825 STATUS(headeraddr) = DIRTY;
827 pile = pInsert(pile, headeraddr, getBackupMachine(lhashSearch(oid)), c_numelements);
829 // Get machine location for object id (and whether local or not)
830 if (STATUS(headeraddr) & NEW || (mhashSearch(curr->key) != NULL)) {
831 machinenum = myIpAddr;
832 } else if ((machinenum = lhashSearch(curr->key)) == 0) {
833 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
837 //Make machine groups
838 pile = pInsert(pile, headeraddr, machinenum, c_numelements);
846 /* This function creates machine piles based on all machines involved in a
847 * transaction commit request */
848 plistnode_t *createPiles() {
850 plistnode_t *pile = NULL;
851 unsigned int machinenum;
852 unsigned int destMachine[2];
853 objheader_t *headeraddr;
854 struct chashentry * ptr = c_table;
855 /* Represents number of bins in the chash table */
856 unsigned int size = c_size;
858 for(i = 0; i < size ; i++) {
859 struct chashentry * curr = & ptr[i];
860 /* Inner loop to traverse the linked list of the cache lookupTable */
861 // if the first bin in hash table is empty
864 headeraddr=(objheader_t *) curr->ptr;
866 //Get machine location for object id (and whether local or not)
867 if (STATUS(headeraddr) & NEW || (mhashSearch(curr->key) != NULL)) {
868 machinenum = myIpAddr;
869 } else if ((machinenum = lhashSearch(curr->key)) == 0) {
870 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
874 //Make machine groups
875 pile = pInsert(pile, headeraddr, machinenum, c_numelements);
881 /* This function initiates the transaction commit process
882 * Spawns threads for each of the new connections with Participants
883 * and creates new piles by calling the createPiles(),
884 * Sends a transrequest() to each remote machines for objects found remotely
885 * and calls handleLocalReq() to process objects found locally */
887 unsigned int tot_bytes_mod, *listmid;
888 plistnode_t *pile, *pile_ptr;
890 char treplyretry; /* keeps track of the common response that needs to be sent */
892 trans_commit_data_t transinfo; /* keeps track of objs locked during transaction */
895 int tmpTransIndex = (transIDIndex++)%25;
896 liveTransactions[tmpTransIndex] = getNewTransID();
899 printf("%s-> Start, transID:%d\n", __func__, liveTransactions[tmpTransIndex]);
904 //abort this transaction
906 * printf("ABORTING TRANSACTION AT COMMIT\n");
908 removetransactionhash();
909 objstrDelete(t_cache);
912 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
915 liveTransactions[tmpTransIndex] = 0;
926 /* Look through all the objects in the transaction record and make piles
927 * for each machine involved in the transaction*/
929 pile_ptr = pile = createPiles();
930 pile_ptr = pile = sortPiles(pile);
935 /* Create the packet to be sent in TRANS_REQUEST */
937 /* Count the number of participants */
939 pilecount = pCount(pile);
941 /* Create a list of machine ids(Participants) involved in transaction */
942 listmid = calloc(pilecount, sizeof(unsigned int));
943 pListMid(pile, listmid);
945 /* Create a socket and getReplyCtrl array, initialize */
946 int socklist[pilecount];
948 for(loopcount = 0 ; loopcount < pilecount; loopcount++)
949 socklist[loopcount] = 0;
950 char getReplyCtrl[pilecount];
951 for(loopcount = 0 ; loopcount < pilecount; loopcount++)
952 getReplyCtrl[loopcount] = 0;
954 /* Process each machine pile */
956 int localReqsock = -1;
957 trans_req_data_t *tosend;
958 tosend = calloc(pilecount, sizeof(trans_req_data_t));
959 while(pile != NULL) {
961 printf("%s-> New pile:[%s],", __func__, midtoIPString(pile->mid));
962 printf(" myIp:[%s]\n", midtoIPString(myIpAddr));
964 tosend[sockindex].f.control = TRANS_REQUEST;
965 tosend[sockindex].f.mcount = pilecount;
966 tosend[sockindex].f.numread = pile->numread;
967 tosend[sockindex].f.nummod = pile->nummod;
968 tosend[sockindex].f.numcreated = pile->numcreated;
970 printf("%s-> numread:%d, nummod:%d, numcreated:%d\n", __func__, pile->numread, pile->nummod, pile->numcreated);
972 tosend[sockindex].f.sum_bytes = pile->sum_bytes;
973 tosend[sockindex].listmid = listmid;
974 tosend[sockindex].objread = pile->objread;
975 tosend[sockindex].oidmod = pile->oidmod;
976 tosend[sockindex].oidcreated = pile->oidcreated;
978 if(pile->mid != myIpAddr) {
980 if((sd = getSockWithLock(transRequestSockPool, pile->mid)) < 0) {
982 if((sd = getSock2WithLock(transRequestSockPool, pile->mid)) < 0) {
984 printf("\ntransRequest(): socket create error\n");
988 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
991 liveTransactions[tmpTransIndex] = 0;
995 socklist[sockindex] = sd;
996 /* Send bytes of data with TRANS_REQUEST control message */
997 send_data(sd, &(tosend[sockindex].f), sizeof(fixed_data_t));
999 printf("send_data: remote machine dead, line:%d\n", __LINE__);
1003 /* Send list of machines involved in the transaction */
1005 int size=sizeof(unsigned int)*(tosend[sockindex].f.mcount);
1006 send_data(sd, tosend[sockindex].listmid, size);
1009 /* Send oids and version number tuples for objects that are read */
1011 int size=(sizeof(unsigned int)+sizeof(unsigned short))*(tosend[sockindex].f.numread);
1012 send_data(sd, tosend[sockindex].objread, size);
1015 /* Send objects that are modified */
1017 if((modptr = calloc(1, tosend[sockindex].f.sum_bytes)) == NULL) {
1018 printf("Calloc error for modified objects %s, %d\n", __FILE__, __LINE__);
1022 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1025 liveTransactions[tmpTransIndex] = 0;
1031 for(i = 0; i < tosend[sockindex].f.nummod ; i++) {
1033 objheader_t *headeraddr;
1034 if((headeraddr = t_chashSearch(tosend[sockindex].oidmod[i])) == NULL) {
1035 printf("%s() Error: No such oid %s, %d\n", __func__, __FILE__, __LINE__);
1040 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1043 liveTransactions[tmpTransIndex] = 0;
1047 GETSIZE(size,headeraddr);
1048 size+=sizeof(objheader_t);
1049 memcpy(modptr+offset, headeraddr, size);
1052 send_data(sd, modptr, tosend[sockindex].f.sum_bytes);
1054 } else { //handle request locally
1055 localReqsock = sockindex;
1056 handleLocalReq(&tosend[sockindex], &transinfo, &getReplyCtrl[sockindex]);
1060 } //end of pile processing
1062 /* Recv Ctrl msgs from all machines */
1064 printf("%s-> Finished sending transaction read/mod objects\n",__func__);
1067 for(i = 0; i < pilecount; i++) {
1068 printf("i:%d\n", i);
1069 if(i == localReqsock)
1071 int sd = socklist[i];
1074 recv_data(sd, &control, sizeof(char));
1076 printf("recv_data: remote machine dead, timeoutFlag:%d, timeoutFlag:%d, line:%d\n", timeoutFlag, timeoutFlag, __LINE__);
1080 //Update common data structure with new ctrl msg
1081 getReplyCtrl[i] = control;
1082 /* Recv Objects if participant sends TRANS_DISAGREE */
1083 //printf("getReplyCtrl[%d] = %d\n", i, (int)getReplyCtrl[i]);
1085 if(control == TRANS_DISAGREE) {
1087 recv_data(sd, &length, sizeof(int));
1089 pthread_mutex_lock(&prefetchcache_mutex);
1090 if ((newAddr = prefetchobjstrAlloc((unsigned int)length)) == NULL) {
1091 printf("Error: %s() objstrAlloc error for copying into prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1094 pthread_mutex_unlock(&prefetchcache_mutex);
1096 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1099 liveTransactions[tmpTransIndex] = 0;
1103 pthread_mutex_unlock(&prefetchcache_mutex);
1104 recv_data(sd, newAddr, length);
1106 while(length != 0) {
1107 unsigned int oidToPrefetch;
1108 objheader_t * header;
1109 header = (objheader_t *)(((char *)newAddr) + offset);
1110 oidToPrefetch = OID(header);
1113 GETSIZE(size, header);
1114 size += sizeof(objheader_t);
1115 //make an entry in prefetch hash table
1117 if((oldptr = prehashSearch(oidToPrefetch)) != NULL) {
1118 prehashRemove(oidToPrefetch);
1119 prehashInsert(oidToPrefetch, header);
1121 prehashInsert(oidToPrefetch, header);
1123 length = length - size;
1126 } //end of receiving objs
1131 printf("%s-> Decide final response now\n", __func__);
1133 /* Decide the final response */
1134 if((finalResponse = decideResponse(getReplyCtrl, &treplyretry, pilecount)) == 0) {
1135 printf("Error: %s() in updating prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1139 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1142 liveTransactions[tmpTransIndex] = 0;
1147 printf("%s-> Final Response: %d\n", __func__, (int)finalResponse);
1149 /* Send responses to all machines */
1150 for(i = 0; i < pilecount; i++) {
1151 int sd = socklist[i];
1154 if(finalResponse == TRANS_COMMIT) {
1156 /* Update prefetch cache */
1157 if((retval = updatePrefetchCache(&(tosend[i]))) != 0) {
1158 printf("Error: %s() in updating prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1162 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1165 liveTransactions[tmpTransIndex] = 0;
1171 /* Invalidate objects in other machine cache */
1172 if(tosend[i].f.nummod > 0) {
1173 if((retval = invalidateObj(&(tosend[i]))) != 0) {
1174 printf("Error: %s() in invalidating Objects %s, %d\n", __func__, __FILE__, __LINE__);
1178 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1181 liveTransactions[tmpTransIndex] = 0;
1187 removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
1188 removethisreadtransaction(tosend[i].objread, tosend[i].f.numread);
1192 else if (!treplyretry) {
1193 removethistransaction(tosend[i].oidmod,tosend[i].f.nummod);
1194 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1198 send_data(sd, &finalResponse, sizeof(char));
1200 /* Complete local processing */
1201 doLocalProcess(finalResponse, &(tosend[i]), &transinfo);
1203 if(finalResponse == TRANS_COMMIT) {
1204 removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
1205 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1206 } else if (!treplyretry) {
1207 removethistransaction(tosend[i].oidmod,tosend[i].f.nummod);
1208 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1216 printf("%s-> Free sockets\n", __func__);
1218 for(i = 0; i < pilecount; i++) {
1219 if(socklist[i] != 0) {
1220 freeSockWithLock(transRequestSockPool, listmid[i], socklist[i]);
1224 /* Free resources */
1229 /* wait a random amount of time before retrying to commit transaction*/
1236 /* Retry trans commit procedure during soft_abort case */
1237 } while (treplyretry);
1239 if(finalResponse == TRANS_ABORT) {
1240 //printf("Aborting trans\n");
1244 /* Free Resources */
1245 objstrDelete(t_cache);
1248 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1251 liveTransactions[tmpTransIndex] = 0;
1254 } else if(finalResponse == TRANS_COMMIT) {
1258 /* Free Resources */
1259 objstrDelete(t_cache);
1262 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1265 liveTransactions[tmpTransIndex] = 0;
1269 //TODO Add other cases
1270 printf("Error: in %s() THIS SHOULD NOT HAPPEN.....EXIT PROGRAM\n", __func__);
1272 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1275 liveTransactions[tmpTransIndex] = 0;
1280 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1283 liveTransactions[tmpTransIndex] = 0;
1288 /* This function handles the local objects involved in a transaction
1289 * commiting process. It also makes a decision if this local machine
1290 * sends AGREE or DISAGREE or SOFT_ABORT to coordinator */
1291 void handleLocalReq(trans_req_data_t *tdata, trans_commit_data_t *transinfo, char *getReplyCtrl) {
1292 unsigned int *oidnotfound = NULL, *oidlocked = NULL;
1293 int numoidnotfound = 0, numoidlocked = 0;
1294 int v_nomatch = 0, v_matchlock = 0, v_matchnolock = 0;
1297 unsigned short version;
1299 /* Counters and arrays to formulate decision on control message to be sent */
1300 oidnotfound = (unsigned int *) calloc((tdata->f.numread + tdata->f.nummod), sizeof(unsigned int));
1301 oidlocked = (unsigned int *) calloc((tdata->f.numread + tdata->f.nummod +1), sizeof(unsigned int)); // calloc additional 1 byte for
1302 //setting a divider between read and write locks
1303 numread = tdata->f.numread;
1304 /* Process each oid in the machine pile/ group per thread */
1305 for (i = 0; i < tdata->f.numread + tdata->f.nummod; i++) {
1306 if (i < tdata->f.numread) {
1307 int incr = sizeof(unsigned int) + sizeof(unsigned short); // Offset that points to next position in the objread array
1309 oid = *((unsigned int *)(((char *)tdata->objread) + incr));
1310 version = *((unsigned short *)(((char *)tdata->objread) + incr + sizeof(unsigned int)));
1311 commitCountForObjRead(getReplyCtrl, oidnotfound, oidlocked, &numoidnotfound, &numoidlocked, &v_nomatch, &v_matchlock, &v_matchnolock, oid, version);
1312 } else { // Objects Modified
1313 if(i == tdata->f.numread) {
1314 oidlocked[numoidlocked++] = -1;
1317 objheader_t *headptr;
1318 headptr = (objheader_t *) t_chashSearch(tdata->oidmod[i-numread]);
1319 if (headptr == NULL) {
1320 printf("Error: handleLocalReq() returning NULL, no such oid %s, %d\n", __FILE__, __LINE__);
1324 version = headptr->version;
1325 commitCountForObjMod(getReplyCtrl, oidnotfound, oidlocked, &numoidnotfound, &numoidlocked, &v_nomatch, &v_matchlock, &v_matchnolock, oid, version);
1329 /* Fill out the trans_commit_data_t data structure. This is required for a trans commit process
1330 * if Participant receives a TRANS_COMMIT */
1331 transinfo->objlocked = oidlocked;
1332 transinfo->objnotfound = oidnotfound;
1333 transinfo->modptr = NULL;
1334 transinfo->numlocked = numoidlocked;
1335 transinfo->numnotfound = numoidnotfound;
1337 /* Condition to send TRANS_AGREE */
1338 if(v_matchnolock == tdata->f.numread + tdata->f.nummod) {
1339 *getReplyCtrl = TRANS_AGREE;
1341 /* Condition to send TRANS_SOFT_ABORT */
1342 if((v_matchlock > 0 && v_nomatch == 0) || (numoidnotfound > 0 && v_nomatch == 0)) {
1343 *getReplyCtrl = TRANS_SOFT_ABORT;
1347 void doLocalProcess(char finalResponse, trans_req_data_t *tdata, trans_commit_data_t *transinfo) {
1348 if(finalResponse == TRANS_ABORT) {
1349 if(transAbortProcess(transinfo) != 0) {
1350 printf("Error in transAbortProcess() %s,%d\n", __FILE__, __LINE__);
1354 } else if(finalResponse == TRANS_COMMIT) {
1356 /* Invalidate objects in other machine cache */
1357 if(tdata->f.nummod > 0) {
1359 if((retval = invalidateObj(tdata)) != 0) {
1360 printf("Error: %s() in invalidating Objects %s, %d\n", __func__, __FILE__, __LINE__);
1365 if(transComProcess(tdata, transinfo) != 0) {
1366 printf("Error in transComProcess() %s,%d\n", __FILE__, __LINE__);
1371 printf("ERROR...No Decision\n");
1375 if (transinfo->objlocked != NULL) {
1376 free(transinfo->objlocked);
1378 if (transinfo->objnotfound != NULL) {
1379 free(transinfo->objnotfound);
1383 /* This function decides the reponse that needs to be sent to
1384 * all Participant machines after the TRANS_REQUEST protocol */
1385 char decideResponse(char *getReplyCtrl, char *treplyretry, int pilecount) {
1386 int i, transagree = 0, transdisagree = 0, transsoftabort = 0; /* Counters to formulate decision of what
1388 for (i = 0 ; i < pilecount; i++) {
1390 control = getReplyCtrl[i];
1394 printf("%s-> Participant sent unknown message, i:%d, Control: %d\n", __func__, i, (int)control);
1397 /* treat as disagree, pass thru */
1398 case TRANS_DISAGREE:
1401 printf("%s-> Participant sent TRANS_DISAGREE, i:%d, Control: %d\n", __func__, i, (int)control);
1408 printf("%s-> Participant sent TRANS_AGREE, i:%d, Control: %d\n", __func__, i, (int)control);
1412 case TRANS_SOFT_ABORT:
1415 printf("%s-> Participant sent TRANS_SOFT_ABORT, i:%d, Control: %d\n", __func__, i, (int)control);
1421 if(transdisagree > 0) {
1426 /* clear objects from prefetch cache */
1429 } else if(transagree == pilecount) {
1432 return TRANS_COMMIT;
1434 /* Send Abort in soft abort case followed by retry commiting transaction again*/
1441 /* This function opens a connection, places an object read request to
1442 * the remote machine, reads the control message and object if
1443 * available and copies the object and its header to the local
1446 void *getRemoteObj(unsigned int mnum, unsigned int oid) {
1448 struct sockaddr_in serv_addr;
1452 void *objcopy = NULL;
1454 int sd = getSock2(transReadSockPool, mnum);
1455 char readrequest[sizeof(char)+sizeof(unsigned int)];
1456 readrequest[0] = READ_REQUEST;
1457 *((unsigned int *)(&readrequest[1])) = oid;
1458 send_data(sd, readrequest, sizeof(readrequest));
1460 /* Read response from the Participant */
1461 recv_data(sd, &control, sizeof(char));
1463 if (control==OBJECT_NOT_FOUND) {
1465 } else if(control==OBJECT_FOUND) {
1466 /* Read object if found into local cache */
1467 recv_data(sd, &size, sizeof(int));
1468 objcopy = objstrAlloc(&t_cache, size);
1469 recv_data(sd, objcopy, size);
1471 /* Insert into cache's lookup table */
1472 t_chashInsert(oid, objcopy);
1474 totalObjSize += size;
1479 if( detectMachineFailure(mnum) ) { //check for timeouts
1480 printf("looking for oid:%d\n", oid);
1481 restoreDuplicationState(mnum); // suspect machine failure, restore state
1484 temp = transRead2(oid); // retry transRead
1486 temp -= 1; // return object w/ objheader
1488 return (void *)temp;
1494 int detectMachineFailure(unsigned int mid) {
1495 if(timeoutFlag == 1) {
1497 printf("%s-> Suspect machine failure: [%s]\n", __func__, midtoIPString(mid));
1506 void restoreDuplicationState(unsigned int deadHost) {
1510 if(!liveHosts[findHost(deadHost)]) {
1514 if(deadHost == leader)
1518 printf("%s-> leader?:%s, me?:%d\n", __func__, midtoIPString(leader), (myIpAddr == leader));
1521 if(leader == myIpAddr) {
1522 pthread_mutex_lock(&leaderFixing_mutex);
1525 pthread_mutex_unlock(&leaderFixing_mutex);
1529 if(!liveHosts[findHost(deadHost)]) { //confirmed dead
1530 duplicateLostObjects(deadHost);
1532 if(updateLiveHostsCommit() != 0) {
1533 printf("error updateLiveHostsCommit()\n");
1536 pthread_mutex_lock(&leaderFixing_mutex);
1538 pthread_mutex_unlock(&leaderFixing_mutex);
1541 pthread_mutex_unlock(&leaderFixing_mutex);
1543 //while(leaderFixing);
1548 if((sd = getSock2WithLock(transRequestSockPool, leader)) < 0) {
1549 printf("restoreDuplicationState(): socket create error\n");
1552 ctrl = REMOTE_RESTORE_DUPLICATED_STATE;
1553 send_data(sd, &ctrl, sizeof(char));
1554 send_data(sd, &deadHost, sizeof(unsigned int));
1555 recv_data(sd, &ctrl, sizeof(char));
1561 /* Commit info for objects modified */
1562 void commitCountForObjMod(char *getReplyCtrl, unsigned int *oidnotfound, unsigned int *oidlocked, int *numoidnotfound,
1563 int *numoidlocked, int *v_nomatch, int *v_matchlock, int *v_matchnolock, unsigned int oid, unsigned short version) {
1565 /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
1566 /* Save the oids not found and number of oids not found for later use */
1567 if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
1568 /* Save the oids not found and number of oids not found for later use */
1569 oidnotfound[*numoidnotfound] = oid;
1570 (*numoidnotfound)++;
1571 } else { /* If Obj found in machine (i.e. has not moved) */
1572 /* Check if Obj is locked by any previous transaction */
1573 if (write_trylock(STATUSPTR(mobj))) { // Can acquire write lock
1574 if (version == ((objheader_t *)mobj)->version) { /* match versions */
1576 //Keep track of what is locked
1577 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1578 } else { /* If versions don't match ...HARD ABORT */
1580 /* Send TRANS_DISAGREE to Coordinator */
1581 *getReplyCtrl = TRANS_DISAGREE;
1583 //Keep track of what is locked
1584 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1585 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1588 } else { //A lock is acquired some place else
1589 if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
1591 } else { /* If versions don't match ...HARD ABORT */
1593 /* Send TRANS_DISAGREE to Coordinator */
1594 *getReplyCtrl = TRANS_DISAGREE;
1595 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1602 /* Commit info for objects modified */
1603 void commitCountForObjRead(char *getReplyCtrl, unsigned int *oidnotfound, unsigned int *oidlocked, int *numoidnotfound,
1604 int *numoidlocked, int *v_nomatch, int *v_matchlock, int *v_matchnolock, unsigned int oid, unsigned short version) {
1606 /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
1607 /* Save the oids not found and number of oids not found for later use */
1608 if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
1609 /* Save the oids not found and number of oids not found for later use */
1610 oidnotfound[*numoidnotfound] = oid;
1611 (*numoidnotfound)++;
1612 } else { /* If Obj found in machine (i.e. has not moved) */
1613 /* Check if Obj is locked by any previous transaction */
1614 if (read_trylock(STATUSPTR(mobj))) { // Can further acquire read locks
1615 if (version == ((objheader_t *)mobj)->version) { /* If locked then match versions */
1617 //Keep track of what is locked
1618 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1619 } else { /* If versions don't match ...HARD ABORT */
1621 /* Send TRANS_DISAGREE to Coordinator */
1622 *getReplyCtrl = TRANS_DISAGREE;
1623 //Keep track of what is locked
1624 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1625 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1628 } else { //Has reached max number of readers or some other transaction
1629 //has acquired a lock on this object
1630 if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
1632 } else { /* If versions don't match ...HARD ABORT */
1634 /* Send TRANS_DISAGREE to Coordinator */
1635 *getReplyCtrl = TRANS_DISAGREE;
1636 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1643 /* This function completes the ABORT process if the transaction is aborting */
1644 int transAbortProcess(trans_commit_data_t *transinfo) {
1646 unsigned int *objlocked;
1649 numlocked = transinfo->numlocked;
1650 objlocked = transinfo->objlocked;
1652 int useWriteUnlock = 0;
1653 for (i = 0; i < numlocked; i++) {
1654 if(objlocked[i] == -1) {
1658 if((header = mhashSearch(objlocked[i])) == NULL) {
1659 printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1662 if(!useWriteUnlock) {
1663 read_unlock(STATUSPTR(header));
1665 write_unlock(STATUSPTR(header));
1672 /*This function completes the COMMIT process if the transaction is commiting*/
1673 int transComProcess(trans_req_data_t *tdata, trans_commit_data_t *transinfo) {
1674 objheader_t *header, *tcptr;
1675 int i, nummod, tmpsize, numcreated, numlocked;
1676 unsigned int *oidmod, *oidcreated, *oidlocked;
1679 printf("%s-> Entering transComProcess, trans.c\n", __func__);
1682 nummod = tdata->f.nummod;
1683 oidmod = tdata->oidmod;
1684 numcreated = tdata->f.numcreated;
1685 oidcreated = tdata->oidcreated;
1686 numlocked = transinfo->numlocked;
1687 oidlocked = transinfo->objlocked;
1691 printf("%s-> nummod: %d, numcreated: %d, numlocked: %d\n", __func__, nummod, numcreated, numlocked);
1694 for (i = 0; i < nummod; i++) {
1695 if((header = (objheader_t *) mhashSearch(oidmod[i])) == NULL) {
1696 printf("Error: transComProcess() mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1699 /* Copy from transaction cache -> main object store */
1700 if ((tcptr = ((objheader_t *) t_chashSearch(oidmod[i]))) == NULL) {
1701 printf("Error: transComProcess() chashSearch returned NULL at %s, %d\n", __FILE__, __LINE__);
1704 GETSIZE(tmpsize, header);
1705 char *tmptcptr = (char *) tcptr;
1707 struct ___Object___ *dst=(struct ___Object___*)((char*)header+sizeof(objheader_t));
1708 struct ___Object___ *src=(struct ___Object___*)((char*)tmptcptr+sizeof(objheader_t));
1709 dst->___cachedCode___=src->___cachedCode___;
1710 dst->___cachedHash___=src->___cachedHash___;
1712 memcpy(&dst[1], &src[1], tmpsize-sizeof(struct ___Object___));
1715 header->version += 1;
1716 //printf("oid: %u, new header version: %d\n", oidmod[i], header->version);
1717 if(header->notifylist != NULL) {
1718 notifyAll(&header->notifylist, OID(header), header->version);
1721 /* If object is newly created inside transaction then commit it */
1722 for (i = 0; i < numcreated; i++) {
1723 if ((header = ((objheader_t *) t_chashSearch(oidcreated[i]))) == NULL) {
1724 printf("Error: transComProcess() chashSearch returned NULL for oid = %x at %s, %d\n", oidcreated[i], __FILE__, __LINE__);
1727 header->version += 1;
1728 //printf("oid: %u, new header version: %d\n", oidcreated[i], header->version);
1729 GETSIZE(tmpsize, header);
1730 tmpsize += sizeof(objheader_t);
1731 pthread_mutex_lock(&mainobjstore_mutex);
1732 if ((ptrcreate = objstrAlloc(&mainobjstore, tmpsize)) == NULL) {
1733 printf("Error: transComProcess() failed objstrAlloc %s, %d\n", __FILE__, __LINE__);
1734 pthread_mutex_unlock(&mainobjstore_mutex);
1737 pthread_mutex_unlock(&mainobjstore_mutex);
1738 /* Initialize read and write locks */
1739 initdsmlocks(STATUSPTR(header));
1740 memcpy(ptrcreate, header, tmpsize);
1741 mhashInsert(oidcreated[i], ptrcreate);
1742 lhashInsert(oidcreated[i], myIpAddr);
1744 /* Unlock locked objects */
1745 int useWriteUnlock = 0;
1746 for(i = 0; i < numlocked; i++) {
1747 if(oidlocked[i] == -1) {
1751 if((header = (objheader_t *) mhashSearch(oidlocked[i])) == NULL) {
1752 printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1755 if(!useWriteUnlock) {
1756 read_unlock(STATUSPTR(header));
1758 write_unlock(STATUSPTR(header));
1764 prefetchpile_t *foundLocal(char *ptr) {
1765 int siteid = *(GET_SITEID(ptr));
1766 int ntuples = *(GET_NTUPLES(ptr));
1767 unsigned int * oidarray = GET_PTR_OID(ptr);
1768 unsigned short * endoffsets = GET_PTR_EOFF(ptr, ntuples);
1769 short * arryfields = GET_PTR_ARRYFLD(ptr, ntuples);
1770 prefetchpile_t * head=NULL;
1774 for(i=0; i<ntuples; i++) {
1775 unsigned short baseindex=(i==0) ? 0 : endoffsets[i-1];
1776 unsigned short endindex=endoffsets[i];
1777 unsigned int oid=oidarray[i];
1782 //Look up fields locally
1783 for(newbase=baseindex; newbase<endindex; newbase++) {
1784 if (!lookupObject(&oid, arryfields[newbase]))
1786 //Ended in a null pointer...
1790 //Entire prefetch is local
1791 if (newbase==endindex&&checkoid(oid)) {
1795 //Add to remote requests
1796 machinenum=lhashSearch(oid);
1797 insertPile(machinenum, oid, endindex-newbase, &arryfields[newbase], &head);
1802 /* handle dynamic prefetching */
1803 handleDynPrefetching(numLocal, ntuples, siteid);
1807 int checkoid(unsigned int oid) {
1808 objheader_t *header;
1809 if ((header=mhashSearch(oid))!=NULL) {
1812 } else if ((header=prehashSearch(oid))!=NULL) {
1820 int lookupObject(unsigned int * oid, short offset) {
1821 objheader_t *header;
1822 if ((header=mhashSearch(*oid))!=NULL) {
1825 } else if ((header=prehashSearch(*oid))!=NULL) {
1832 if(TYPE(header) >= NUMCLASSES) {
1833 int elementsize = classsize[TYPE(header)];
1834 struct ArrayObject *ao = (struct ArrayObject *) (((char *)header) + sizeof(objheader_t));
1835 int length = ao->___length___;
1836 /* Check if array out of bounds */
1837 if(offset < 0 || offset >= length) {
1838 //if yes treat the object as found
1842 (*oid) = *((unsigned int *)(((char *)ao) + sizeof(struct ArrayObject) + (elementsize*offset)));
1845 (*oid) = *((unsigned int *)(((char *)header) + sizeof(objheader_t) + offset));
1851 /* This function is called by the thread calling transPrefetch */
1852 void *transPrefetch(void *t) {
1854 /* read from prefetch queue */
1855 void *node=gettail();
1856 /* Check if the tuples are found locally, if yes then reduce them further*/
1857 /* and group requests by remote machine ids by calling the makePreGroups() */
1858 prefetchpile_t *pilehead = foundLocal(node);
1860 if (pilehead!=NULL) {
1861 // Get sock from shared pool
1863 /* Send Prefetch Request */
1864 prefetchpile_t *ptr = pilehead;
1865 while(ptr != NULL) {
1866 int sd = getSock2(transPrefetchSockPool, ptr->mid);
1867 sendPrefetchReq(ptr, sd);
1871 /* Release socket */
1872 // freeSock(transPrefetchSockPool, pilehead->mid, sd);
1874 /* Deallocated pilehead */
1875 mcdealloc(pilehead);
1877 // Deallocate the prefetch queue pile node
1882 void sendPrefetchReqnew(prefetchpile_t *mcpilenode, int sd) {
1885 int size=sizeof(char)+sizeof(int);
1886 for(tmp=mcpilenode->objpiles; tmp!=NULL; tmp=tmp->next) {
1887 size += sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1892 *buf=TRANS_PREFETCH;
1895 for(tmp=mcpilenode->objpiles; tmp!=NULL; tmp=tmp->next) {
1896 int len = sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1899 *((unsigned int *)buf)=tmp->oid;
1900 buf+=sizeof(unsigned int);
1901 *((unsigned int *)(buf)) = myIpAddr;
1902 buf+=sizeof(unsigned int);
1903 memcpy(buf, tmp->offset, tmp->numoffset*sizeof(short));
1904 buf+=tmp->numoffset*sizeof(short);
1907 send_data(sd, buft, size);
1911 void sendPrefetchReq(prefetchpile_t *mcpilenode, int sd) {
1916 /* Send TRANS_PREFETCH control message */
1917 control = TRANS_PREFETCH;
1918 send_data(sd, &control, sizeof(char));
1920 /* Send Oids and offsets in pairs */
1921 tmp = mcpilenode->objpiles;
1922 while(tmp != NULL) {
1923 len = sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1924 char oidnoffset[len];
1925 char *buf=oidnoffset;
1926 *((int*)buf) = tmp->numoffset;
1928 *((unsigned int *)buf) = tmp->oid;
1929 buf+=sizeof(unsigned int);
1930 *((unsigned int *)buf) = myIpAddr;
1931 buf += sizeof(unsigned int);
1932 memcpy(buf, tmp->offset, (tmp->numoffset)*sizeof(short));
1933 send_data(sd, oidnoffset, len);
1937 /* Send a special char -1 to represent the end of sending oids + offset pair to remote machine */
1939 send_data(sd, &endpair, sizeof(int));
1944 int getPrefetchResponse(int sd) {
1945 int length = 0, size = 0;
1948 void *modptr, *oldptr;
1950 recv_data((int)sd, &length, sizeof(int));
1951 size = length - sizeof(int);
1952 char recvbuffer[size];
1954 recv_data((int)sd, recvbuffer, size);
1955 control = *((char *) recvbuffer);
1956 if(control == OBJECT_FOUND) {
1957 oid = *((unsigned int *)(recvbuffer + sizeof(char)));
1958 size = size - (sizeof(char) + sizeof(unsigned int));
1959 pthread_mutex_lock(&prefetchcache_mutex);
1960 if ((modptr = prefetchobjstrAlloc(size)) == NULL) {
1961 printf("Error: objstrAlloc error for copying into prefetch cache %s, %d\n", __FILE__, __LINE__);
1962 pthread_mutex_unlock(&prefetchcache_mutex);
1965 pthread_mutex_unlock(&prefetchcache_mutex);
1966 memcpy(modptr, recvbuffer + sizeof(char) + sizeof(unsigned int), size);
1969 /* Insert the oid and its address into the prefetch hash lookup table */
1970 /* Do a version comparison if the oid exists */
1971 if((oldptr = prehashSearch(oid)) != NULL) {
1972 /* If older version then update with new object ptr */
1973 if(((objheader_t *)oldptr)->version <= ((objheader_t *)modptr)->version) {
1975 prehashInsert(oid, modptr);
1977 } else { /* Else add the object ptr to hash table*/
1978 prehashInsert(oid, modptr);
1980 /* Lock the Prefetch Cache look up table*/
1981 pthread_mutex_lock(&pflookup.lock);
1982 /* Broadcast signal on prefetch cache condition variable */
1983 pthread_cond_broadcast(&pflookup.cond);
1984 /* Unlock the Prefetch Cache look up table*/
1985 pthread_mutex_unlock(&pflookup.lock);
1986 } else if(control == OBJECT_NOT_FOUND) {
1987 oid = *((unsigned int *)(recvbuffer + sizeof(char)));
1988 /* TODO: For each object not found query DHT for new location and retrieve the object */
1989 /* Throw an error */
1990 //printf("OBJECT %x NOT FOUND.... THIS SHOULD NOT HAPPEN...TERMINATE PROGRAM\n", oid);
1993 printf("Error: in decoding the control value %d, %s, %d\n",control, __FILE__, __LINE__);
1999 unsigned short getObjType(unsigned int oid) {
2000 objheader_t *objheader;
2001 unsigned short numoffset[] ={0};
2002 short fieldoffset[] ={};
2004 if ((objheader = (objheader_t *) mhashSearch(oid)) == NULL) {
2006 if ((objheader = (objheader_t *) prehashSearch(oid)) == NULL) {
2008 unsigned int mid = lhashSearch(oid);
2009 int sd = getSock2(transReadSockPool, mid);
2010 char remotereadrequest[sizeof(char)+sizeof(unsigned int)];
2011 remotereadrequest[0] = READ_REQUEST;
2012 *((unsigned int *)(&remotereadrequest[1])) = oid;
2013 send_data(sd, remotereadrequest, sizeof(remotereadrequest));
2015 /* Read response from the Participant */
2017 recv_data(sd, &control, sizeof(char));
2019 if (control==OBJECT_NOT_FOUND) {
2020 printf("Error: in %s() THIS SHOULD NOT HAPPEN.....EXIT PROGRAM\n", __func__);
2024 /* Read object if found into local cache */
2026 recv_data(sd, &size, sizeof(int));
2028 pthread_mutex_lock(&prefetchcache_mutex);
2029 if ((objheader = prefetchobjstrAlloc(size)) == NULL) {
2030 printf("Error: %s() objstrAlloc error for copying into prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
2033 pthread_mutex_unlock(&prefetchcache_mutex);
2034 recv_data(sd, objheader, size);
2035 prehashInsert(oid, objheader);
2036 return TYPE(objheader);
2039 if((buffer = calloc(1, size)) == NULL) {
2040 printf("%s() Calloc Error %s at line %d\n", __func__, __FILE__, __LINE__);
2044 recv_data(sd, buffer, size);
2045 objheader = (objheader_t *)buffer;
2046 unsigned short type = TYPE(objheader);
2055 return TYPE(objheader);
2058 int startRemoteThread(unsigned int oid, unsigned int mid) {
2060 struct sockaddr_in remoteAddr;
2061 char msg[1 + sizeof(unsigned int)];
2065 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2066 perror("startRemoteThread():socket()");
2070 bzero(&remoteAddr, sizeof(remoteAddr));
2071 remoteAddr.sin_family = AF_INET;
2072 remoteAddr.sin_port = htons(LISTEN_PORT);
2073 remoteAddr.sin_addr.s_addr = htonl(mid);
2075 if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
2076 printf("startRemoteThread():error %d connecting to %s:%d\n", errno,
2077 inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
2081 msg[0] = START_REMOTE_THREAD;
2082 *((unsigned int *) &msg[1]) = oid;
2083 send_data(sock, msg, 1 + sizeof(unsigned int));
2090 //TODO: when reusing oids, make sure they are not already in use!
2091 static unsigned int id = 0xFFFFFFFF;
2092 unsigned int getNewOID(void) {
2094 if (id > oidMax || id < oidMin) {
2100 static unsigned int tid = 0xFFFFFFFF;
2101 unsigned int getNewTransID(void) {
2103 if (tid > transIDMax || tid < transIDMin) {
2104 tid = (transIDMin | 1);
2109 int processConfigFile() {
2111 const int maxLineLength = 200;
2112 char lineBuffer[maxLineLength];
2114 const char *delimiters = " \t\n";
2118 configFile = fopen(CONFIG_FILENAME, "r");
2119 if (configFile == NULL) {
2120 printf("error opening %s:\n", CONFIG_FILENAME);
2125 numHostsInSystem = 0;
2126 sizeOfHostArray = 8;
2127 hostIpAddrs = calloc(sizeOfHostArray, sizeof(unsigned int));
2129 liveHosts = calloc(sizeOfHostArray, sizeof(unsigned int));
2130 locateObjHosts = calloc(sizeOfHostArray*2, sizeof(unsigned int));
2134 while(fgets(lineBuffer, maxLineLength, configFile) != NULL) {
2135 commentBegin = strchr(lineBuffer, '#');
2136 if (commentBegin != NULL)
2137 *commentBegin = '\0';
2138 token = strtok(lineBuffer, delimiters);
2139 while (token != NULL) {
2140 tmpAddr = inet_addr(token);
2141 if ((int)tmpAddr == -1) {
2142 printf("error in %s: bad token:%s\n", CONFIG_FILENAME, token);
2146 addHost(htonl(tmpAddr));
2147 token = strtok(NULL, delimiters);
2153 if (numHostsInSystem < 1) {
2154 printf("error in %s: no IP Adresses found\n", CONFIG_FILENAME);
2158 myIpAddr = getMyIpAddr("en1");
2160 myIpAddr = getMyIpAddr("eth0");
2162 myIndexInHostArray = findHost(myIpAddr);
2164 liveHosts[myIndexInHostArray] = 1;
2165 //locateObjHosts[myIndexInHostArray] = myIpAddr;
2167 if (myIndexInHostArray == -1) {
2168 printf("error in %s: IP Address of eth0 not found\n", CONFIG_FILENAME);
2171 oidsPerBlock = (0xFFFFFFFF / numHostsInSystem) + 1;
2172 oidMin = oidsPerBlock * myIndexInHostArray;
2173 if (myIndexInHostArray == numHostsInSystem - 1)
2174 oidMax = 0xFFFFFFFF;
2176 oidMax = oidsPerBlock * (myIndexInHostArray + 1) - 1;
2178 transIDMin = oidMin;
2179 transIDMax = oidMax;
2184 unsigned int getDuplicatedPrimaryMachine(unsigned int mid) {
2186 for(i = 0; i < numHostsInSystem; i++) {
2187 if(mid == locateObjHosts[(i*2)+1]) {
2188 return locateObjHosts[i*2];
2194 unsigned int getPrimaryMachine(unsigned int mid) {
2196 int pmidindex = 2*findHost(mid);
2198 pthread_mutex_lock(&liveHosts_mutex);
2199 pmid = locateObjHosts[pmidindex];
2200 pthread_mutex_unlock(&liveHosts_mutex);
2204 unsigned int getBackupMachine(unsigned int mid) {
2206 int bmidindex = 2*findHost(mid)+1;
2208 pthread_mutex_lock(&liveHosts_mutex);
2209 bmid = locateObjHosts[bmidindex];
2210 pthread_mutex_unlock(&liveHosts_mutex);
2214 // updates the leader's liveHostArray and locateObj
2215 void updateLiveHosts() {
2217 printf("%s-> Entering updateLiveHosts\n", __func__);
2219 // update everyone's list
2221 //int *tmpLiveHosts = calloc(sizeOfHostArray, sizeof(unsigned int));
2222 //foreach in hostipaddrs, ping -> update list of livemachines
2223 //socket connection?
2225 //liveHosts lock here
2226 int sd = 0, i, j, tmpNumLiveHosts = 0;
2227 for(i = 0; i < numHostsInSystem; i++) {
2228 if(i == myIndexInHostArray)
2233 for(j = 0; j < 5; j++) { // hard define num of retries
2234 if((sd = getSock2WithLock(transRequestSockPool, hostIpAddrs[i])) < 0) {
2235 printf("updateLiveHosts(): Cannot create socket connection to [%s], attempt %d\n", __func__, midtoIPString(hostIpAddrs[i]), j);
2241 char liverequest[sizeof(char)];
2242 liverequest[0] = RESPOND_LIVE;
2244 send_data(sd, &liverequest[0], sizeof(liverequest));
2246 recv_data(sd, &response, sizeof(response));
2249 //if timeout, dead host
2250 printf("YES received %d\n", response);
2251 if(response == LIVE) {
2252 printf("must enter here\n");
2255 //locateObjHosts[i*2] = hostIpAddrs[i];
2258 printf("or here\n");
2265 if(liveHosts[i] == 0)
2266 printf("updateLiveHosts(): cannot make connection to machine %s\n", midtoIPString(hostIpAddrs[i]));
2268 numLiveHostsInSystem = tmpNumLiveHosts;
2269 printf("numLiveHostsInSystem:%d\n", numLiveHostsInSystem);
2270 //have updated list of live machines
2272 printf("%s-> Exiting updateLiveHosts\n", __func__);
2277 int getNumLiveHostsInSystem() {
2278 int count = 0, i = 0;
2279 for(; i<numHostsInSystem; i++) {
2286 int updateLiveHostsCommit() {
2289 char updaterequest[sizeof(char)+sizeof(int)*numHostsInSystem+sizeof(unsigned int)*(numHostsInSystem*2)];
2290 updaterequest[0] = UPDATE_LIVE_HOSTS;
2292 for(i = 0; i < numHostsInSystem; i++) {
2293 *((int *)(&updaterequest[i*4+1])) = liveHosts[i]; // clean this up later
2296 for(i = 0; i < numHostsInSystem*2; i++) {
2297 *((unsigned int *)(&updaterequest[i*4+(numHostsInSystem*4)+1])) = locateObjHosts[i]; //ditto
2300 //for each machine send data
2301 for(i = 1; i < numHostsInSystem; i++) { // hard define num of retries
2302 if(i == myIndexInHostArray)
2304 if(liveHosts[i] == 1) {
2305 if((sd = getSock2WithLock(transRequestSockPool, hostIpAddrs[i])) < 0) {
2306 printf("updateLiveHosts(): socket create error, attempt %d\n", i);
2309 send_data(sd, updaterequest, sizeof(updaterequest));
2317 /*void updateLocateObjHosts(unsigned int failedmid) {
2318 int failedmidIndex = findHost(failedmid);
2319 int i = 0, validIndex = 0;
2321 for(; i < numHostsInSystem; i++) {
2322 if(locateObjHosts[(i*2)] == failedmid) {
2323 while(liveHosts[(i+validIndex)%numHostsInSystem] == 0)
2325 locateObjHosts[(i*2)] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
2327 while(liveHosts[(i+validIndex)%numHostsInSystem] == 0)
2329 locateObjHosts[(i*2)+1] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
2331 else if(locateObjHosts[(i*2)+1] == failedmid) {
2332 while(liveHosts[(i+validIndex)%numHostsInSystem] == 0)
2334 locateObjHosts[(i*2)+1] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
2340 void setLocateObjHosts() {
2341 int i = 0, validIndex = 0;
2343 //check num hosts even valid first
2345 for(;i < numHostsInSystem; i++) {
2347 printf("%s-> i:%d\n", __func__, i);
2350 while(liveHosts[(i+validIndex)%numHostsInSystem] == 0) {
2353 locateObjHosts[i*2] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
2355 printf("%s-> locateObjHosts[%d]:%s\n", __func__, i*2, midtoIPString(locateObjHosts[(i*2)]));
2359 while(liveHosts[(i+validIndex)%numHostsInSystem] == 0) {
2363 printf("%s-> validIndex:%d, this mid is: [%s]\n", __func__, validIndex, midtoIPString(hostIpAddrs[(i+validIndex)%numHostsInSystem]));
2365 locateObjHosts[(i*2)+1] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
2369 printf("%s-> locateObjHosts[%d]:%s\n", __func__, i*2+1, midtoIPString(locateObjHosts[(i*2)+1]));
2375 void printHostsStatus() {
2378 printf("%s-> *printing live machines and backups*\n", __func__);
2380 for(i = 0; i < numHostsInSystem; i++) {
2383 printf("%s-> [%s]: LIVE\n", __func__, midtoIPString(hostIpAddrs[i]));
2388 printf("%s-> [%s]: DEAD\n", __func__, midtoIPString(hostIpAddrs[i]));
2392 printf("%s-> original:\t[%s]\n", __func__, midtoIPString(locateObjHosts[i*2]));
2393 printf("%s-> backup:\t[%s]\n", __func__, midtoIPString(locateObjHosts[i*2+1]));
2398 int allHostsLive() {
2400 for(i = 0; i < numHostsInSystem; i++) {
2407 void duplicateLostObjects(unsigned int mid){
2410 printf("%s-> Start, mid: [%s]\n", __func__, midtoIPString(mid));
2413 //this needs to be changed.
2414 unsigned int backupMid = getBackupMachine(mid);
2415 unsigned int originalMid = getDuplicatedPrimaryMachine(mid);
2418 printf("%s-> backupMid: [%s], ", __func__, midtoIPString(backupMid));
2419 printf("originalMid: [%s]\n", midtoIPString(originalMid));
2422 setLocateObjHosts();
2425 //connect to these machines
2426 //go through their object store copying necessary (in a transaction)
2427 //transRequestSockPool = createSockPool(transRequestSockPool, DEFAULTSOCKPOOLSIZE);
2428 int sd = 0, i, j, tmpNumLiveHosts = 0;
2430 if(originalMid == myIpAddr) {
2431 originalMid = getPrimaryMachine(mid);
2432 printf("originalMid: [%s]\n", midtoIPString(originalMid));
2433 duplicateLocalOriginalObjects(originalMid);
2435 else if((sd = getSock2WithLock(transRequestSockPool, originalMid)) < 0) {
2436 printf("updateLiveHosts(): socket create error, attempt %d\n", j);
2441 duperequest = DUPLICATE_ORIGINAL;
2442 send_data(sd, &duperequest, sizeof(char));
2444 printf("%s-> Sent DUPLICATE_ORIGINAL request\n", __func__);
2446 originalMid = getPrimaryMachine(mid);
2447 printf("originalMid: [%s]\n", midtoIPString(originalMid));
2448 send_data(sd, &originalMid, sizeof(unsigned int));
2450 printf("%s-> Sent originalMid\n", __func__);
2453 recv_data_block(sd, &response, sizeof(char));
2454 printf("YES! Received %d\n", response);
2457 if(backupMid == myIpAddr) {
2458 backupMid = getBackupMachine(mid);
2459 duplicateLocalBackupObjects(backupMid);
2461 else if((sd = getSock2WithLock(transRequestSockPool, backupMid)) < 0) {
2462 printf("updateLiveHosts(): socket create error, attempt %d\n", j);
2467 duperequest = DUPLICATE_BACKUP;
2468 send_data(sd, &duperequest, sizeof(char));
2470 printf("%s-> Sent DUPLICATE_BACKUP request\n", __func__);
2472 backupMid = getBackupMachine(mid);
2473 send_data(sd, &backupMid, sizeof(unsigned int));
2475 printf("%s-> Sent backupMid\n", __func__);
2479 recv_data_block(sd, &response, sizeof(char));
2480 printf("YES! Received %d\n", response);
2484 printf("%s-> End\n", __func__);
2488 void duplicateLocalBackupObjects(unsigned int mid) {
2490 char *dupeptr, ctrl, response;
2493 printf("%s-> Start; backup mid:%s\n", __func__, midtoIPString(mid));
2495 //copy code from dstmserver here
2496 tempsize = mhashGetDuplicate(&dupeptr, 1);
2498 printf("tempsize:%d, dupeptrfirstvalue:%d\n", tempsize, *((unsigned int *)(dupeptr)));
2499 //send control and dupes after
2500 ctrl = RECEIVE_DUPES;
2501 if((sd = getSockWithLock(transRequestSockPool, mid)) < 0) {
2502 printf("duplicatelocalbackup: socket create error\n");
2506 printf("sd:%d, tempsize:%d, dupeptrfirstvalue:%d\n", sd, tempsize, *((unsigned int *)(dupeptr)));
2507 send_data(sd, &ctrl, sizeof(char));
2508 send_data(sd, dupeptr, tempsize);
2509 recv_data(sd, &response, sizeof(char));
2510 if(response != DUPLICATION_COMPLETE) {
2514 freeSockWithLock(transRequestSockPool, mid, sd);
2516 printf("%s-> End\n", __func__);
2521 void duplicateLocalOriginalObjects(unsigned int mid) {
2523 char *dupeptr, ctrl, response;
2526 printf("%s-> Start\n", __func__);
2528 //copy code fom dstmserver here
2530 tempsize = mhashGetDuplicate(&dupeptr, 0);
2532 //send control and dupes after
2533 ctrl = RECEIVE_DUPES;
2535 if((sd = getSockWithLock(transRequestSockPool, mid)) < 0) {
2536 printf("DUPLICATE_ORIGINAL: socket create error\n");
2539 printf("sd:%d, tempsize:%d, dupeptrfirstvalue:%d\n", sd, tempsize, *((unsigned int *)(dupeptr)));
2541 send_data(sd, &ctrl, sizeof(char));
2542 send_data(sd, dupeptr, tempsize);
2544 recv_data(sd, &response, sizeof(char));
2545 if(response != DUPLICATION_COMPLETE) {
2548 freeSockWithLock(transRequestSockPool, mid, sd);
2551 printf("%s-> End\n", __func__);
2556 void addHost(unsigned int hostIp) {
2557 unsigned int *tmpArray;
2558 int *tmpliveHostsArray;
2559 unsigned int *tmplocateObjHostsArray;
2561 if (findHost(hostIp) != -1)
2564 if (numHostsInSystem == sizeOfHostArray) {
2565 tmpArray = calloc(sizeOfHostArray * 2, sizeof(unsigned int));
2566 memcpy(tmpArray, hostIpAddrs, sizeof(unsigned int) * numHostsInSystem);
2568 hostIpAddrs = tmpArray;
2570 tmpliveHostsArray = calloc(sizeOfHostArray * 2, sizeof(unsigned int));
2571 memcpy(tmpliveHostsArray, liveHosts, sizeof(unsigned int) * numHostsInSystem);
2573 liveHosts = tmpliveHostsArray;
2575 tmplocateObjHostsArray = calloc(sizeOfHostArray * 2 * 2, sizeof(unsigned int));
2576 memcpy(tmplocateObjHostsArray, locateObjHosts, sizeof(unsigned int) * numHostsInSystem);
2577 free(locateObjHosts);
2578 locateObjHosts = tmplocateObjHostsArray;
2580 sizeOfHostArray *= 2;
2583 hostIpAddrs[numHostsInSystem] = hostIp;
2584 liveHosts[numHostsInSystem] = 0;
2585 locateObjHosts[numHostsInSystem*2] = hostIp;
2591 int findHost(unsigned int hostIp) {
2593 for (i = 0; i < numHostsInSystem; i++)
2594 if (hostIpAddrs[i] == hostIp)
2601 /* This function sends notification request per thread waiting on object(s) whose version
2603 int reqNotify(unsigned int *oidarry, unsigned short *versionarry, unsigned int numoid) {
2605 objheader_t *objheader;
2606 struct sockaddr_in remoteAddr;
2607 char msg[1 + numoid * (sizeof(unsigned short) + sizeof(unsigned int)) + 3 * sizeof(unsigned int)];
2611 unsigned short version;
2612 unsigned int oid,mid;
2613 static unsigned int threadid = 0;
2614 pthread_mutex_t threadnotify = PTHREAD_MUTEX_INITIALIZER; //Lock and condition var for threadjoin and notification
2615 pthread_cond_t threadcond = PTHREAD_COND_INITIALIZER;
2616 notifydata_t *ndata;
2619 if((mid = lhashSearch(oid)) == 0) {
2620 printf("Error: %s() No such machine found for oid =%x\n",__func__, oid);
2624 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2625 perror("reqNotify():socket()");
2629 bzero(&remoteAddr, sizeof(remoteAddr));
2630 remoteAddr.sin_family = AF_INET;
2631 remoteAddr.sin_port = htons(LISTEN_PORT);
2632 remoteAddr.sin_addr.s_addr = htonl(mid);
2634 /* Generate unique threadid */
2637 /* Save threadid, numoid, oidarray, versionarray, pthread_cond_variable for later processing */
2638 if((ndata = calloc(1, sizeof(notifydata_t))) == NULL) {
2639 printf("Calloc Error %s, %d\n", __FILE__, __LINE__);
2642 ndata->numoid = numoid;
2643 ndata->threadid = threadid;
2644 ndata->oidarry = oidarry;
2645 ndata->versionarry = versionarry;
2646 ndata->threadcond = threadcond;
2647 ndata->threadnotify = threadnotify;
2648 if((status = notifyhashInsert(threadid, ndata)) != 0) {
2649 printf("reqNotify(): Insert into notify hash table not successful %s, %d\n", __FILE__, __LINE__);
2654 /* Send number of oids, oidarry, version array, machine id and threadid */
2655 if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
2656 printf("reqNotify():error %d connecting to %s:%d\n", errno,
2657 inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
2661 msg[0] = THREAD_NOTIFY_REQUEST;
2662 *((unsigned int *)(&msg[1])) = numoid;
2663 /* Send array of oids */
2664 size = sizeof(unsigned int);
2666 for(i = 0;i < numoid; i++) {
2668 *((unsigned int *)(&msg[1] + size)) = oid;
2669 size += sizeof(unsigned int);
2672 /* Send array of version */
2673 for(i = 0;i < numoid; i++) {
2674 version = versionarry[i];
2675 *((unsigned short *)(&msg[1] + size)) = version;
2676 size += sizeof(unsigned short);
2679 *((unsigned int *)(&msg[1] + size)) = myIpAddr; size += sizeof(unsigned int);
2680 *((unsigned int *)(&msg[1] + size)) = threadid;
2681 pthread_mutex_lock(&(ndata->threadnotify));
2682 size = 1 + numoid * (sizeof(unsigned int) + sizeof(unsigned short)) + 3 * sizeof(unsigned int);
2683 send_data(sock, msg, size);
2684 pthread_cond_wait(&(ndata->threadcond), &(ndata->threadnotify));
2685 pthread_mutex_unlock(&(ndata->threadnotify));
2688 pthread_cond_destroy(&threadcond);
2689 pthread_mutex_destroy(&threadnotify);
2695 void threadNotify(unsigned int oid, unsigned short version, unsigned int tid) {
2696 notifydata_t *ndata;
2697 int i, objIsFound = 0, index;
2700 //Look up the tid and call the corresponding pthread_cond_signal
2701 if((ndata = notifyhashSearch(tid)) == NULL) {
2702 printf("threadnotify(): No such threadid is present %s, %d\n", __FILE__, __LINE__);
2705 for(i = 0; i < ndata->numoid; i++) {
2706 if(ndata->oidarry[i] == oid) {
2711 if(objIsFound == 0) {
2712 printf("threadNotify(): Oid not found %s, %d\n", __FILE__, __LINE__);
2715 if(version <= ndata->versionarry[index]) {
2716 printf("threadNotify(): New version %d has not changed since last version for oid = %d, %s, %d\n", version, oid, __FILE__, __LINE__);
2720 /* Clear from prefetch cache and free thread related data structure */
2721 if((ptr = prehashSearch(oid)) != NULL) {
2725 pthread_mutex_lock(&(ndata->threadnotify));
2726 pthread_cond_signal(&(ndata->threadcond));
2727 pthread_mutex_unlock(&(ndata->threadnotify));
2734 int notifyAll(threadlist_t **head, unsigned int oid, unsigned int version) {
2737 struct sockaddr_in remoteAddr;
2738 char msg[1 + sizeof(unsigned short) + 2*sizeof(unsigned int)];
2739 int sock, status, size, bytesSent;
2741 while(*head != NULL) {
2744 //create a socket connection to that machine
2745 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2746 perror("notifyAll():socket()");
2750 bzero(&remoteAddr, sizeof(remoteAddr));
2751 remoteAddr.sin_family = AF_INET;
2752 remoteAddr.sin_port = htons(LISTEN_PORT);
2753 remoteAddr.sin_addr.s_addr = htonl(mid);
2754 //send Thread Notify response and threadid to that machine
2755 if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
2756 printf("notifyAll():error %d connecting to %s:%d\n", errno,
2757 inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
2761 bzero(msg, (1+sizeof(unsigned short) + 2*sizeof(unsigned int)));
2762 msg[0] = THREAD_NOTIFY_RESPONSE;
2763 *((unsigned int *)&msg[1]) = oid;
2764 size = sizeof(unsigned int);
2765 *((unsigned short *)(&msg[1]+ size)) = version;
2766 size+= sizeof(unsigned short);
2767 *((unsigned int *)(&msg[1]+ size)) = ptr->threadid;
2769 size = 1 + 2*sizeof(unsigned int) + sizeof(unsigned short);
2770 send_data(sock, msg, size);
2783 removetransactionhash();
2785 objstrDelete(t_cache);
2789 /* This function inserts necessary information into
2790 * a machine pile data structure */
2791 plistnode_t *pInsert(plistnode_t *pile, objheader_t *headeraddr, unsigned int mid, int num_objs) {
2792 plistnode_t *ptr, *tmp;
2793 int found = 0, offset = 0;
2796 //Add oid into a machine that is already present in the pile linked list structure
2797 while(tmp != NULL) {
2798 if (tmp->mid == mid) {
2801 if (STATUS(headeraddr) & NEW) {
2802 tmp->oidcreated[tmp->numcreated] = OID(headeraddr);
2804 GETSIZE(tmpsize, headeraddr);
2805 tmp->sum_bytes += sizeof(objheader_t) + tmpsize;
2806 /*if(numHostsInSystem > 1) {
2807 STATUS(headeraddr) = DIRTY;
2808 //printf("Redo pInsert for oid %d, now modified\n", OID(headeraddr));
2809 //printf("this machine: %d\n", mid);
2810 midtoIP(tmp->mid, ip);
2811 pile = pInsert(tmp, headeraddr, locateBackupMachine(headeraddr), num_objs);
2813 // printf("header version: %d\n", headeraddr->version);
2814 //printf("Finished Redo pInsert for oid %d, now modified\n", OID(headeraddr));
2816 } else if (STATUS(headeraddr) & DIRTY) {
2817 tmp->oidmod[tmp->nummod] = OID(headeraddr);
2819 GETSIZE(tmpsize, headeraddr);
2820 tmp->sum_bytes += sizeof(objheader_t) + tmpsize;
2821 /* midtoIP(tmp->mid, ip);
2822 printf("pp; Redo? pile->mid: %s, oid: %d, header version: %d\n", ip, OID(headeraddr), headeraddr->version);*/
2824 offset = (sizeof(unsigned int) + sizeof(short)) * tmp->numread;
2825 *((unsigned int *)(((char *)tmp->objread) + offset))=OID(headeraddr);
2826 offset += sizeof(unsigned int);
2827 *((short *)(((char *)tmp->objread) + offset)) = headeraddr->version;
2835 //Add oid for any new machine
2838 if((ptr = pCreate(num_objs)) == NULL) {
2842 if (STATUS(headeraddr) & NEW) {
2843 ptr->oidcreated[ptr->numcreated] = OID(headeraddr);
2845 GETSIZE(tmpsize, headeraddr);
2846 ptr->sum_bytes += sizeof(objheader_t) + tmpsize;
2847 /*if(numHostsInSystem > 1) {
2848 STATUS(headeraddr) = DIRTY;
2849 midtoIP(ptr->mid, ip);
2851 printf("np; ptr->mid: %s, oid: %d, header version: %d\n", ip, OID(headeraddr), headeraddr->version);
2852 //printf("header version: %d\n", headeraddr->version);
2853 pile = pInsert(tmp, headeraddr, locateBackupMachine(headeraddr), num_objs);
2854 //printf("header version: %d\n", headeraddr->version);
2856 } else if (STATUS(headeraddr) & DIRTY) {
2857 ptr->oidmod[ptr->nummod] = OID(headeraddr);
2859 GETSIZE(tmpsize, headeraddr);
2860 ptr->sum_bytes += sizeof(objheader_t) + tmpsize;
2861 //printf("Redo oid %d?\n", OID(headeraddr));
2862 /* midtoIP(ptr->mid, ip);
2863 printf("np; Redo? ptr->mid: %s, oid: %d, header version: %d\n", ip, OID(headeraddr), headeraddr->version);*/
2865 *((unsigned int *)ptr->objread)=OID(headeraddr);
2866 offset = sizeof(unsigned int);
2867 *((short *)(((char *)ptr->objread) + offset)) = headeraddr->version;
2875 STATUS(headeraddr) = 0;
2880 plistnode_t *sortPiles(plistnode_t *pileptr) {
2881 plistnode_t *head, *ptr, *tail;
2884 /* Get tail pointer */
2890 plistnode_t *prev = pileptr;
2891 /* Arrange local machine processing at the end of the pile list */
2892 while(ptr != NULL) {
2894 if(ptr->mid == myIpAddr && (prev != pileptr)) {
2895 prev->next = ptr->next;
2900 if((ptr->mid == myIpAddr) && (prev == pileptr)) {
2914 * Executes when the known leader has failed.
2915 * Guarantees consensus on next leader among all live hosts. */
2918 int origRound = paxosRound;
2919 origleader = leader;
2922 printf(">> Debug : Starting paxos..\n");
2926 ret = paxosPrepare(); // phase 1
2928 ret = paxosAccept(); // phase 2
2930 paxosLearn(); // phase 3
2934 // Paxos not successful; wait and retry if new leader is not yet slected
2936 if(paxosRound != origRound)
2938 } while (ret == -1);
2941 printf("\n>> Debug : Leader : [%s]\n", midtoIPString(leader));
2950 //int origleader = leader;
2961 printf("[Prepare]...\n");
2964 temp_v_a = myIpAddr; // if no other value is proposed, make this machine the new leader
2966 for (i = 0; i < numHostsInSystem; ++i) {
2967 control = PAXOS_PREPARE;
2971 if ((sd = getSock2WithLock(transRequestSockPool, hostIpAddrs[i])) < 0) {
2972 printf("paxosPrepare(): socket create error\n");
2976 printf("%s-> Send PAXOS_PREPARE to mid [%s] with my_n=%d\n", __func__, midtoIPString(hostIpAddrs[i]), my_n);
2978 send_data(sd, &control, sizeof(char));
2979 send_data(sd, &my_n, sizeof(int));
2980 recv_data(sd, &control, sizeof(char));
2981 if ((sd == -1) || (timeoutFlag == 1)) {
2983 printf("%s-> timeout to machine [%s]\n", __func__, midtoIPString(hostIpAddrs[i]));
2990 case PAXOS_PREPARE_OK:
2992 recv_data(sd, &remote_n, sizeof(int));
2993 recv_data(sd, &remote_v, sizeof(int));
2995 printf("%s-> Received PAXOS_PREPARE_OK from mindex [%d] with remote_v=%s\n", __func__, i, midtoIPString(remote_v));
2997 if(remote_v != origleader) {
2998 if (remote_n > tmp_n) {
3000 temp_v_a = remote_v;
3004 case PAXOS_PREPARE_REJECT:
3010 printf("%s-> cnt:%d, numLiveHostsInSystem:%d\n", __func__, cnt, numLiveHostsInSystem);
3013 if (cnt >= (numLiveHostsInSystem / 2)) { // majority of OK replies
3027 int remote_v = temp_v_a;
3030 printf("[Accept]...\n");
3033 for (i = 0; i < numHostsInSystem; ++i) {
3034 control = PAXOS_ACCEPT;
3038 if ((sd = getSock2WithLock(transRequestSockPool, hostIpAddrs[i])) < 0) {
3039 printf("paxosAccept(): socket create error\n");
3043 send_data(sd, &control, sizeof(char));
3044 send_data(sd, &my_n, sizeof(int));
3045 send_data(sd, &remote_v, sizeof(int));
3047 recv_data(sd, &control, sizeof(char));
3048 if ((sd == -1) || (timeoutFlag == 1)) {
3050 printf("%s-> timeout to machine [%s]\n", __func__, midtoIPString(hostIpAddrs[i]));
3057 case PAXOS_ACCEPT_OK:
3060 case PAXOS_ACCEPT_REJECT:
3064 printf(">> Debug : Accept - n_h [%d], n_a [%d], v_a [%s]\n", n_h, n_a, midtoIPString(v_a));
3068 if (cnt >= (numLiveHostsInSystem / 2)) {
3083 printf("[Learn]...\n");
3086 control = PAXOS_LEARN;
3087 // transRequestSockPool = createSockPool(transRequestSockPool, DEFAULTSOCKPOOLSIZE);
3089 for (i = 0; i < numHostsInSystem; ++i) {
3092 if(hostIpAddrs[i] == myIpAddr)
3097 printf("This is my leader!!!: [%s]\n", midtoIPString(leader));
3101 if ((sd = getSock2WithLock(transRequestSockPool, hostIpAddrs[i])) < 0) {
3103 // printf("paxosLearn(): socket create error, attemp\n");
3106 send_data(sd, &control, sizeof(char));
3107 send_data(sd, &v_a, sizeof(int));